From e85fe5e885bca603a151af05bd17eb55e9a25871 Mon Sep 17 00:00:00 2001 From: Peng Qu <82029664+pengqu123@users.noreply.github.com> Date: Sun, 6 Apr 2025 11:45:15 +0800 Subject: [PATCH 1/6] fix undefined name tf, while execute tf.stack (#1294) --- modelscope/pipelines/cv/ocr_detection_pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py index 5b0fbda5..bf5ae3d6 100644 --- a/modelscope/pipelines/cv/ocr_detection_pipeline.py +++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py @@ -206,6 +206,7 @@ class OCRDetectionPipeline(Pipeline): img_pad_resize = img_pad_resize - np.array( [123.68, 116.78, 103.94], dtype=np.float32) + import tensorflow as tf with self._graph.as_default(): resize_size = tf.stack([resize_size, resize_size]) orig_size = tf.stack([max(h, w), max(h, w)]) From 6adc8614c945a0f88711058959eee6f4789e1f27 Mon Sep 17 00:00:00 2001 From: "Xingjun.Wang" Date: Mon, 7 Apr 2025 10:24:26 +0800 Subject: [PATCH 2/6] Add create_repo and upload UTs (#1282) * add delete_repo, delete_dataset, exists_ok in create_repo * add UT for create_repo --- modelscope/hub/api.py | 141 +++++++++++++++++------- modelscope/utils/repo_utils.py | 3 +- tests/hub/test_create_repo.py | 58 ++++++++++ tests/hub/test_upload_file_folder.py | 138 +++++++++++++++++++++++ tests/msdatasets/test_dataset_upload.py | 137 ----------------------- tests/run_config.yaml | 2 +- 6 files changed, 302 insertions(+), 177 deletions(-) create mode 100644 tests/hub/test_create_repo.py create mode 100644 tests/hub/test_upload_file_folder.py delete mode 100644 tests/msdatasets/test_dataset_upload.py diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index e3ec5ad0..a93ca2ff 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -226,7 +226,7 @@ class HubApi: headers=self.builder_headers(self.headers)) handle_http_post_error(r, path, body) raise_on_error(r.json()) - model_repo_url = f'{endpoint}/{model_id}' + model_repo_url = f'{endpoint}/models/{model_id}' return model_repo_url def delete_model(self, model_id: str, endpoint: Optional[str] = None): @@ -401,6 +401,33 @@ class HubApi: 'Failed to check existence of repo: %s, make sure you have access authorization.' % repo_type) + def delete_repo(self, repo_id: str, repo_type: str, endpoint: Optional[str] = None): + """ + Delete a repository from ModelScope. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + repo_type (`str`): + The type of the repository. Supported types are `model` and `dataset`. + endpoint(`str`): + The endpoint to use. If not provided, the default endpoint is `https://www.modelscope.cn` + Could be set to `https://ai.modelscope.ai` for international version. + """ + + if not endpoint: + endpoint = self.endpoint + + if repo_type == REPO_TYPE_DATASET: + self.delete_dataset(repo_id, endpoint) + elif repo_type == REPO_TYPE_MODEL: + self.delete_model(repo_id, endpoint) + else: + raise Exception(f'Arg repo_type {repo_type} not supported.') + + logger.info(f'Repo {repo_id} deleted successfully.') + @staticmethod def _create_default_config(model_dir): cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) @@ -924,6 +951,21 @@ class HubApi: dataset_list = r.json()[API_RESPONSE_FIELD_DATA] return [x['Name'] for x in dataset_list] + def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None): + + cookies = ModelScopeConfig.get_cookies() + if not endpoint: + endpoint = self.endpoint + if cookies is None: + raise ValueError('Token does not exist, please login first.') + + path = f'{endpoint}/api/v1/datasets/{dataset_id}' + r = self.session.delete(path, + cookies=cookies, + headers=self.builder_headers(self.headers)) + raise_for_http_status(r) + raise_on_error(r.json()) + def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None): """ Get the dataset id and type. """ if not endpoint: @@ -1361,15 +1403,42 @@ class HubApi: chinese_name: Optional[str] = '', license: Optional[str] = Licenses.APACHE_V2, endpoint: Optional[str] = None, + exist_ok: Optional[bool] = False, **kwargs, ) -> str: + """ + Create a repository on the ModelScope Hub. + + Args: + repo_id (str): The repo id in the format of `owner_name/repo_name`. + token (Union[str, bool, None]): The access token. + visibility (Optional[str]): The visibility of the repo, + could be `public`, `private`, `internal`, default to `public`. + repo_type (Optional[str]): The repo type, default to `model`. + chinese_name (Optional[str]): The Chinese name of the repo. + license (Optional[str]): The license of the repo, default to `apache-2.0`. + endpoint (Optional[str]): The endpoint to use. + In the format of `https://www.modelscope.cn` or 'https://www.modelscope.ai' + exist_ok (Optional[bool]): If the repo exists, whether to return the repo url directly. + **kwargs: The additional arguments. + + Returns: + str: The repo url. + """ - # TODO: exist_ok if not repo_id: raise ValueError('Repo id cannot be empty!') if not endpoint: endpoint = self.endpoint - self.login(access_token=token) + + repo_exists: bool = self.repo_exists(repo_id, repo_type=repo_type, endpoint=endpoint) + if repo_exists: + if exist_ok: + return f'{endpoint}/{repo_type}s/{repo_id}' + else: + raise ValueError(f'Repo {repo_id} already exists!') + + self.login(access_token=token, endpoint=endpoint) repo_id_list = repo_id.split('/') if len(repo_id_list) != 2: @@ -1382,31 +1451,28 @@ class HubApi: if visibility is None: raise ValueError(f'Invalid visibility: {visibility}, ' f'supported visibilities: `public`, `private`, `internal`') - if not self.repo_exists(repo_id, repo_type=repo_type): - repo_url: str = self.create_model( - model_id=repo_id, - visibility=visibility, - license=license, - chinese_name=chinese_name, - ) - with tempfile.TemporaryDirectory() as temp_cache_dir: - from modelscope.hub.repository import Repository - repo = Repository(temp_cache_dir, repo_id) - default_config = { - 'framework': 'pytorch', - 'task': 'text-generation', - 'allow_remote': True - } - config_json = kwargs.get('config_json') - if not config_json: - config_json = {} - config = {**default_config, **config_json} - add_content_to_file( - repo, - 'configuration.json', [json.dumps(config)], - ignore_push_error=True) - else: - repo_url = f'{endpoint}/{repo_id}' + repo_url: str = self.create_model( + model_id=repo_id, + visibility=visibility, + license=license, + chinese_name=chinese_name, + ) + with tempfile.TemporaryDirectory() as temp_cache_dir: + from modelscope.hub.repository import Repository + repo = Repository(temp_cache_dir, repo_id) + default_config = { + 'framework': 'pytorch', + 'task': 'text-generation', + 'allow_remote': True + } + config_json = kwargs.get('config_json') + if not config_json: + config_json = {} + config = {**default_config, **config_json} + add_content_to_file( + repo, + 'configuration.json', [json.dumps(config)], + ignore_push_error=True) elif repo_type == REPO_TYPE_DATASET: visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')} @@ -1414,20 +1480,19 @@ class HubApi: if visibility is None: raise ValueError(f'Invalid visibility: {visibility}, ' f'supported visibilities: `public`, `private`, `internal`') - if not self.repo_exists(repo_id, repo_type=repo_type): - repo_url: str = self.create_dataset( - dataset_name=repo_name, - namespace=namespace, - chinese_name=chinese_name, - license=license, - visibility=visibility, - ) - else: - repo_url = f'{endpoint}/datasets/{namespace}/{repo_name}' + repo_url: str = self.create_dataset( + dataset_name=repo_name, + namespace=namespace, + chinese_name=chinese_name, + license=license, + visibility=visibility, + ) else: raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}') + logger.info(f'Repo created: {repo_url}') + return repo_url def create_commit( diff --git a/modelscope/utils/repo_utils.py b/modelscope/utils/repo_utils.py index d14adbf8..038ba908 100644 --- a/modelscope/utils/repo_utils.py +++ b/modelscope/utils/repo_utils.py @@ -323,7 +323,8 @@ class UploadInfo: file_hash_info = file_hash_info or get_file_hash(path) size = file_hash_info['file_size'] sha = file_hash_info['file_hash'] - sample = open(path, 'rb').read(512) + with open(path, 'rb') as f: + sample = f.read(512) return cls(sha256=sha, size=size, sample=sample) diff --git a/tests/hub/test_create_repo.py b/tests/hub/test_create_repo.py new file mode 100644 index 00000000..b5658075 --- /dev/null +++ b/tests/hub/test_create_repo.py @@ -0,0 +1,58 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest +import uuid + +from modelscope import HubApi +from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1 +from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG +from modelscope.utils.test_utils import delete_credential, test_level + +logger = get_logger() + + +class TestCreateRepo(unittest.TestCase): + + def setUp(self): + self.api = HubApi() + self.api.login(TEST_ACCESS_TOKEN1) + + self.repo_id_model: str = f'{TEST_ORG}/test_create_repo_model_{uuid.uuid4().hex[-6:]}' + self.repo_id_dataset: str = f'{TEST_ORG}/test_create_repo_dataset_{uuid.uuid4().hex[-6:]}' + + def tearDown(self): + self.api.delete_repo( + repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL) + self.api.delete_repo( + repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET) + delete_credential() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_create_repo(self): + + logger.info( + f'TEST: Creating repo {self.repo_id_model} and {self.repo_id_dataset} ...' + ) + + try: + self.api.create_repo( + repo_id=self.repo_id_model, + repo_type=REPO_TYPE_MODEL, + exist_ok=True) + except Exception as e: + logger.error(f'Failed to create repo {self.repo_id_model} !') + raise e + + try: + self.api.create_repo( + repo_id=self.repo_id_dataset, + repo_type=REPO_TYPE_DATASET, + exist_ok=True) + except Exception as e: + logger.error(f'Failed to create repo {self.repo_id_dataset} !') + raise e + + logger.info( + f'TEST: Created repo {self.repo_id_model} and {self.repo_id_dataset} successfully !' + ) diff --git a/tests/hub/test_upload_file_folder.py b/tests/hub/test_upload_file_folder.py new file mode 100644 index 00000000..aab0a1fd --- /dev/null +++ b/tests/hub/test_upload_file_folder.py @@ -0,0 +1,138 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import shutil +import struct +import tempfile +import unittest +import uuid + +import json + +from modelscope import HubApi +from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1 +from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG +from modelscope.utils.test_utils import delete_credential, test_level + +logger = get_logger() + + +class TestUploadFileFolder(unittest.TestCase): + + def setUp(self): + self.api = HubApi() + self.api.login(TEST_ACCESS_TOKEN1) + + self.repo_id_model: str = f'{TEST_ORG}/test_upload_file_folder_model_{uuid.uuid4().hex[-6:]}' + self.repo_id_dataset: str = f'{TEST_ORG}/test_upload_file_folder_dataset_{uuid.uuid4().hex[-6:]}' + + self.work_dir = tempfile.mkdtemp() + self.model_file_path = f'{self.work_dir}/test_model.bin' + self.dataset_file_path = f'{self.work_dir}/test_data.jsonl' + + logger.info(f'Work directory: {self.work_dir}') + + self.api.create_repo( + repo_id=self.repo_id_model, + repo_type=REPO_TYPE_MODEL, + exist_ok=True) + self.api.create_repo( + repo_id=self.repo_id_dataset, + repo_type=REPO_TYPE_DATASET, + exist_ok=True) + + self._construct_file() + + def tearDown(self): + + # Remove repositories + self.api.delete_repo( + repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL) + self.api.delete_repo( + repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET) + + # Clean up the temporary credentials + delete_credential() + + # Clean up the temporary directory + shutil.rmtree(self.work_dir) + + def _construct_file(self): + + # Construct data + data_list = [ + { + 'id': 1, + 'value': 3.14 + }, + { + 'id': 2, + 'value': 2.71 + }, + { + 'id': 3, + 'value': 3.69 + }, + { + 'id': 4, + 'value': 9.31 + }, + { + 'id': 5, + 'value': 1.21 + }, + ] + + with open(self.model_file_path, 'wb') as f: + for entry in data_list: + packed_data = struct.pack('if', entry['id'], entry['value']) + f.write(packed_data) + logger.info(f'Constructed model file: {self.model_file_path}') + + with open(self.dataset_file_path, 'w') as f: + for entry in data_list: + f.write(json.dumps(entry) + '\n') + logger.info(f'Constructed dataset file: {self.dataset_file_path}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_upload_file_folder(self): + """ + Test uploading file/folder to the model/dataset repository. + """ + + commit_info_upload_file_model = self.api.upload_file( + path_or_fileobj=self.model_file_path, + path_in_repo=os.path.basename(self.model_file_path), + repo_id=self.repo_id_model, + repo_type=REPO_TYPE_MODEL, + commit_message='Add model file for CI_TEST', + ) + self.assertTrue(commit_info_upload_file_model is not None) + + commit_info_upload_file_dataset = self.api.upload_file( + path_or_fileobj=self.dataset_file_path, + path_in_repo=os.path.basename(self.dataset_file_path), + repo_id=self.repo_id_dataset, + repo_type=REPO_TYPE_DATASET, + commit_message='Add dataset file for CI_TEST', + ) + self.assertTrue(commit_info_upload_file_dataset is not None) + + commit_info_upload_folder_model = self.api.upload_folder( + repo_id=self.repo_id_model, + folder_path=self.work_dir, + path_in_repo='test_data', + repo_type=REPO_TYPE_MODEL, + commit_message='Add model folder for CI_TEST', + ) + self.assertTrue(commit_info_upload_folder_model is not None) + + commit_info_upload_folder_dataset = self.api.upload_folder( + repo_id=self.repo_id_dataset, + folder_path=self.work_dir, + path_in_repo='test_data', + repo_type=REPO_TYPE_DATASET, + commit_message='Add dataset folder for CI_TEST', + ) + self.assertTrue(commit_info_upload_folder_dataset is not None) diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py deleted file mode 100644 index 2cd910c2..00000000 --- a/tests/msdatasets/test_dataset_upload.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os -import shutil -import tempfile -import unittest -import zipfile - -from modelscope.msdatasets import MsDataset -from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects -from modelscope.utils import logger as logging -from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, - ModelFile) -from modelscope.utils.test_utils import test_level - -logger = logging.get_logger() - -KEY_EXTRACTED = 'extracted' - - -class DatasetUploadTest(unittest.TestCase): - - def setUp(self): - self.old_dir = os.getcwd() - self.dataset_name = 'small_coco_for_test' - self.dataset_file_name = self.dataset_name - self.prepared_dataset_name = 'pets_small' - self.token = os.getenv('TEST_UPLOAD_MS_TOKEN') - error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN' - self.assertIsNotNone(self.token, msg=error_msg) - from modelscope.hub.api import HubApi - from modelscope.hub.api import ModelScopeConfig - self.api = HubApi() - self.api.login(self.token) - - # get user info - self.namespace, _ = ModelScopeConfig.get_user_info() - - self.temp_dir = tempfile.mkdtemp() - self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name) - self.test_meta_dir = os.path.join(self.test_work_dir, 'meta') - if not os.path.exists(self.test_work_dir): - os.makedirs(self.test_work_dir) - - def tearDown(self): - os.chdir(self.old_dir) - shutil.rmtree(self.temp_dir, ignore_errors=True) - logger.info( - f'Temporary directory {self.temp_dir} successfully removed!') - - @staticmethod - def get_raw_downloaded_file_path(extracted_path): - raw_downloaded_file_path = '' - raw_data_dir = os.path.abspath( - os.path.join(extracted_path, '../../..')) - for root, dirs, files in os.walk(raw_data_dir): - if KEY_EXTRACTED in dirs: - for file in files: - curr_file_path = os.path.join(root, file) - if zipfile.is_zipfile(curr_file_path): - raw_downloaded_file_path = curr_file_path - return raw_downloaded_file_path - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_upload(self): - # Get the prepared data from hub, using default modelscope namespace - ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train') - config_res = ms_ds_train._hf_ds.config_kwargs - extracted_path = config_res.get('split_config').get('train') - raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path) - - MsDataset.upload( - object_name=self.dataset_file_name + '.zip', - local_file_path=raw_zipfile_path, - dataset_name=self.dataset_name, - namespace=self.namespace) - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_upload_dir(self): - ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train') - config_train = ms_ds_train._hf_ds.config_kwargs - extracted_path_train = config_train.get('split_config').get('train') - - MsDataset.upload( - object_name='train', - local_file_path=os.path.join(extracted_path_train, - 'Pets/images/train'), - dataset_name=self.dataset_name, - namespace=self.namespace) - MsDataset.upload( - object_name='val', - local_file_path=os.path.join(extracted_path_train, - 'Pets/images/val'), - dataset_name=self.dataset_name, - namespace=self.namespace) - - objects = list_dataset_objects( - hub_api=self.api, - max_limit=-1, - is_recursive=True, - dataset_name=self.dataset_name, - namespace=self.namespace, - version=DEFAULT_DATASET_REVISION) - - logger.info(f'{len(objects)} objects have been uploaded: {objects}') - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_download_dir(self): - test_ds = MsDataset.load( - self.dataset_name, - namespace=self.namespace, - download_mode=DownloadMode.FORCE_REDOWNLOAD) - assert test_ds.config_kwargs['split_config'].values() - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_clone_meta(self): - MsDataset.clone_meta( - dataset_work_dir=self.test_meta_dir, - dataset_id=os.path.join(self.namespace, self.dataset_name)) - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_upload_meta(self): - # Clone dataset meta repo first. - MsDataset.clone_meta( - dataset_work_dir=self.test_meta_dir, - dataset_id=os.path.join(self.namespace, self.dataset_name)) - - with open(os.path.join(self.test_meta_dir, ModelFile.README), - 'a') as f: - f.write('\nThis is a line for unit test.') - - MsDataset.upload_meta( - dataset_work_dir=self.test_meta_dir, - commit_message='Update for unit test.') - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/run_config.yaml b/tests/run_config.yaml index d9768a57..dd1611c1 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -1,5 +1,5 @@ # isolate cases in env, we can install different dependencies in each env. -isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process. +isolated: # test cases that may require excessive amount of GPU memory or run long time, which will be executed in dedicated process. - test_text_to_speech.py - test_multi_modal_embedding.py - test_ofa_tasks.py From 6b9bfdf25a341b0ad5b887c605df72f6b3df2e2d Mon Sep 17 00:00:00 2001 From: "Xingjun.Wang" Date: Mon, 7 Apr 2025 10:24:45 +0800 Subject: [PATCH 3/6] Set kwsbp==0.0.6 (#1293) * set kwsbp==0.0.6 to avoid install 0.1.0 version which is invalid wheel package on PyPI * update ddpm_guided_diffusion, shotdetect_scenedetect_lgss,MinDAEC --- requirements/audio/audio_kws.txt | 2 +- requirements/audio/audio_signal.txt | 2 +- requirements/cv.txt | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/audio/audio_kws.txt b/requirements/audio/audio_kws.txt index 276a0a2f..622ce981 100644 --- a/requirements/audio/audio_kws.txt +++ b/requirements/audio/audio_kws.txt @@ -1,5 +1,5 @@ kaldiio -kwsbp>=0.0.6 +kwsbp==0.0.6 matplotlib py_sound_connect>=0.1 scipy diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt index 65f1ec61..328bff4d 100644 --- a/requirements/audio/audio_signal.txt +++ b/requirements/audio/audio_signal.txt @@ -1,7 +1,7 @@ hdbscan hyperpyyaml librosa==0.10.1 -MinDAEC +MinDAEC==0.0.2 mir_eval>=0.7 rotary_embedding_torch>=0.1.5 scipy diff --git a/requirements/cv.txt b/requirements/cv.txt index efc0d5aa..e63d9d00 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -5,7 +5,7 @@ bmt_clipit>=1.0 chumpy clip>=1.0 control_ldm -ddpm_guided_diffusion +ddpm_guided_diffusion==0.0.0 diffusers easydict edit_distance @@ -55,7 +55,7 @@ regex scikit-image scikit-learn shapely -shotdetect_scenedetect_lgss>=0.0.4 +shotdetect_scenedetect_lgss==0.0.4 smplx tensorflow-estimator>=1.15.1 tf_slim From 3bfff5cb53624b6951c058d84b4724c094345bea Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 7 Apr 2025 13:24:18 +0800 Subject: [PATCH 4/6] Add swift docker (#1299) --- .github/workflows/docker-image.yml | 2 +- docker/Dockerfile.ubuntu | 2 +- docker/build_image.py | 72 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index e028d36a..ca2d325b 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -11,7 +11,7 @@ on: description: 'ModelScope branch to build from(release/x.xx)' required: true image_type: - description: 'The image type to build(cpu/gpu/llm)' + description: 'The image type to build(cpu/gpu/llm/swift)' required: true modelscope_version: description: 'ModelScope version to use(x.xx.x)' diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 4f6186f0..103d7141 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -51,7 +51,7 @@ RUN echo $CUR_TIME RUN sh /tmp/install.sh {version_args} && \ curl -fsSL https://ollama.com/install.sh | sh && \ pip install --no-cache-dir -U funasr scikit-learn && \ - pip install --no-cache-dir -U qwen_vl_utils pyav librosa timm transformers accelerate peft trl safetensors && \ + pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \ cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {modelscope_branch} --single-branch https://github.com/modelscope/modelscope.git && \ cd modelscope && pip install . -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ cd / && rm -fr /tmp/modelscope && pip cache purge; \ diff --git a/docker/build_image.py b/docker/build_image.py index 5f253eae..4bfe9db2 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -335,6 +335,76 @@ class LLMImageBuilder(Builder): return os.system(f'docker push {image_tag2}') +class SwiftImageBuilder(LLMImageBuilder): + + def init_args(self, args) -> Any: + if not args.torch_version: + args.torch_version = '2.5.1' + args.torchaudio_version = '2.5.1' + args.torchvision_version = '0.20.1' + if not args.cuda_version: + args.cuda_version = '12.4.0' + if not args.vllm_version: + args.vllm_version = '0.7.3' + return super().init_args(args) + + def generate_dockerfile(self) -> str: + meta_file = './docker/install.sh' + with open('docker/Dockerfile.extra_install', 'r') as f: + extra_content = f.read() + extra_content = extra_content.replace('{python_version}', + self.args.python_version) + extra_content += """ +RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \ + pip install --no-cache-dir -U icecream soundfile pybind11 && \ + SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \ + CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \ + pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable +""" + version_args = ( + f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} ' + f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} ' + f'{self.args.flashattn_version}') + with open('docker/Dockerfile.ubuntu', 'r') as f: + content = f.read() + content = content.replace('{base_image}', self.args.base_image) + content = content.replace('{extra_content}', extra_content) + content = content.replace('{meta_file}', meta_file) + content = content.replace('{version_args}', version_args) + content = content.replace('{cur_time}', formatted_time) + content = content.replace('{install_ms_deps}', 'False') + content = content.replace('{torch_version}', + self.args.torch_version) + content = content.replace('{torchvision_version}', + self.args.torchvision_version) + content = content.replace('{torchaudio_version}', + self.args.torchaudio_version) + content = content.replace('{index_url}', '') + content = content.replace('{modelscope_branch}', + self.args.modelscope_branch) + content = content.replace('{swift_branch}', self.args.swift_branch) + return content + + def image(self) -> str: + return ( + f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' + f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-swift-test' + ) + + def push(self): + ret = os.system(f'docker push {self.image()}') + if ret != 0: + return ret + image_tag2 = ( + f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' + f'{self.args.python_tag}-torch{self.args.torch_version}-' + f'{self.args.modelscope_version}-swift-{formatted_time}-test') + ret = os.system(f'docker tag {self.image()} {image_tag2}') + if ret != 0: + return ret + return os.system(f'docker push {image_tag2}') + + parser = argparse.ArgumentParser() parser.add_argument('--base_image', type=str, default=None) parser.add_argument('--image_type', type=str) @@ -366,6 +436,8 @@ elif args.image_type.lower() == 'gpu': builder_cls = GPUImageBuilder elif args.image_type.lower() == 'llm': builder_cls = LLMImageBuilder +elif args.image_type.lower() == 'swift': + builder_cls = SwiftImageBuilder else: raise ValueError(f'Unsupported image_type: {args.image_type}') From 32aefd7428d2a1fa177f9f1ef6f43735821c83dd Mon Sep 17 00:00:00 2001 From: Jintao Date: Mon, 7 Apr 2025 13:32:09 +0800 Subject: [PATCH 5/6] Merge release1.24 to master (#1298) --- .dev_scripts/dockerci.sh | 2 -- README.md | 2 +- docker/Dockerfile.ubuntu | 11 +++++++++-- docker/Dockerfile.ubuntu_base | 19 ++++++++++--------- docker/build_image.py | 2 +- docker/scripts/install_unifold.sh | 1 - .../models/cv/nerf_recon_4k/network/utils.py | 3 ++- .../models/cv/nerf_recon_acc/network/nerf.py | 3 ++- .../models/cv/nerf_recon_acc/network/utils.py | 3 ++- .../pipelines/nlp/text_generation_pipeline.py | 2 +- requirements/audio/audio_codec.txt | 2 +- requirements/audio/audio_tts.txt | 4 ++-- requirements/multi-modal.txt | 6 ++++-- 13 files changed, 35 insertions(+), 25 deletions(-) diff --git a/.dev_scripts/dockerci.sh b/.dev_scripts/dockerci.sh index 4f66073c..d892f61c 100644 --- a/.dev_scripts/dockerci.sh +++ b/.dev_scripts/dockerci.sh @@ -43,7 +43,6 @@ do -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \ -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ - -e TEST_LEVEL=$TEST_LEVEL \ -e MODELSCOPE_ENVIRONMENT='ci' \ -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ -e MODEL_TAG_URL=$MODEL_TAG_URL \ @@ -66,7 +65,6 @@ do -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \ -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ - -e TEST_LEVEL=$TEST_LEVEL \ -e MODELSCOPE_ENVIRONMENT='ci' \ -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ -e MODEL_TAG_URL=$MODEL_TAG_URL \ diff --git a/README.md b/README.md index 0656b117..eb65c053 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -itest` +


diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 103d7141..fd51af0e 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -20,11 +20,10 @@ ARG INSTALL_MS_DEPS={install_ms_deps} COPY requirements /var/modelscope RUN pip uninstall ms-swift modelscope -y && pip --no-cache-dir install pip==23.* -U && \ - pip install --no-cache-dir apex -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ if [ "$INSTALL_MS_DEPS" = "True" ]; then \ pip --no-cache-dir install omegaconf==2.0.6 && \ + pip install 'editdistance==0.8.1' && \ pip install --no-cache-dir 'cython<=0.29.36' versioneer 'numpy<2.0' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ - pip install --no-cache-dir kwsbp==0.0.6 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ @@ -39,6 +38,13 @@ if [ "$INSTALL_MS_DEPS" = "True" ]; then \ pip install --no-cache-dir funtextprocessing typeguard==2.13.3 scikit-learn -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir text2sql_lgesql==1.3.0 git+https://github.com/jin-s13/xtcocoapi.git@v1.14 git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps && \ pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 mpi4py paint_ldm ipykernel fasttext -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip uninstall ddpm_guided_diffusion -y && \ + pip install --no-cache-dir 'blobfile>=1.0.5' && \ + pip install 'ddpm_guided_diffusion' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --no-index && \ + pip uninstall shotdetect_scenedetect_lgss -y && \ + pip install 'shotdetect_scenedetect_lgss' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --no-index && \ + pip uninstall MinDAEC -y && \ + pip install https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/MinDAEC-0.0.2-py3-none-any.whl && \ pip cache purge; \ else \ pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ @@ -49,6 +55,7 @@ ARG CUR_TIME={cur_time} RUN echo $CUR_TIME RUN sh /tmp/install.sh {version_args} && \ + pip install --no-cache-dir xformers==0.0.27 && \ curl -fsSL https://ollama.com/install.sh | sh && \ pip install --no-cache-dir -U funasr scikit-learn && \ pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \ diff --git a/docker/Dockerfile.ubuntu_base b/docker/Dockerfile.ubuntu_base index 903c9930..a80f008c 100644 --- a/docker/Dockerfile.ubuntu_base +++ b/docker/Dockerfile.ubuntu_base @@ -186,7 +186,15 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ RUN if [ "$USE_GPU" = "True" ] ; then \ export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \ else \ - echo 'cpu unsupport Pointnet2'; \ + echo 'CPU env does not support Pointnet2'; \ + fi + +# NVIDIA apex building +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir packaging &&\ + bash /tmp/install_apex.sh; \ + else \ + echo 'CPU env does not support NVIDIA Apex.'; \ fi @@ -203,13 +211,6 @@ ARG TENSORFLOW_VERSION={tf_version} cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && MMCV_WITH_OPS=1 MAX_JOBS=32 pip install . && cd / && rm -fr /tmp/mmcv && pip cache purge; \ fi - # This limits the cuda121 version - RUN if [ "$USE_GPU" = "True" ] ; then \ - pip install --no-cache-dir --force tinycudann==1.7 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ - else \ - echo 'cpu not install tinycudann'; \ - fi - - RUN pip install --no-cache-dir fairseq + RUN pip install --no-cache-dir https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl # for py311 ENTRYPOINT [] diff --git a/docker/build_image.py b/docker/build_image.py index 4bfe9db2..268ba658 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -31,7 +31,7 @@ class Builder: if not args.cuda_version: args.cuda_version = '12.1.0' if not args.vllm_version: - args.vllm_version = '0.5.3' + args.vllm_version = '0.7.2' if not args.lmdeploy_version: args.lmdeploy_version = '0.6.2' if not args.autogptq_version: diff --git a/docker/scripts/install_unifold.sh b/docker/scripts/install_unifold.sh index 0e9f3682..ba7a1870 100644 --- a/docker/scripts/install_unifold.sh +++ b/docker/scripts/install_unifold.sh @@ -8,5 +8,4 @@ apt-get update && apt-get install -y hmmer kalign curl cmake \ && ln -s /opt/hhsuite/bin/* /usr/bin \ && popd \ && rm -rf /tmp/hh-suite \ - && pip install --no-cache-dir unicore -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html \ && pip install --no-cache-dir biopython ipdb diff --git a/modelscope/models/cv/nerf_recon_4k/network/utils.py b/modelscope/models/cv/nerf_recon_4k/network/utils.py index aa8c3d66..6983d36a 100644 --- a/modelscope/models/cv/nerf_recon_4k/network/utils.py +++ b/modelscope/models/cv/nerf_recon_4k/network/utils.py @@ -5,7 +5,6 @@ from collections import defaultdict import mcubes import numpy as np -import tinycudann as tcnn import torch import torch.nn as nn import torch.nn.functional as F @@ -166,6 +165,8 @@ def normalize(dat, inp_scale, tgt_scale): def cleanup(): + import tinycudann as tcnn + gc.collect() torch.cuda.empty_cache() tcnn.free_temporary_memory() diff --git a/modelscope/models/cv/nerf_recon_acc/network/nerf.py b/modelscope/models/cv/nerf_recon_acc/network/nerf.py index 972d2d85..64ffae14 100644 --- a/modelscope/models/cv/nerf_recon_acc/network/nerf.py +++ b/modelscope/models/cv/nerf_recon_acc/network/nerf.py @@ -1,7 +1,6 @@ # The implementation is modified from nerfacc, made publicly available under the MIT License # at https://github.com/KAIR-BAIR/nerfacc/blob/master/examples/radiance_fields/ngp.py import numpy as np -import tinycudann as tcnn import torch import torch.nn as nn from nerfacc import ContractionType, OccupancyGrid, ray_marching, rendering @@ -113,6 +112,8 @@ trunc_exp = _TruncExp.apply class VolumeDensity(nn.Module): def __init__(self, config): + import tinycudann as tcnn + super().__init__() self.config = config self.radius = self.config.radius diff --git a/modelscope/models/cv/nerf_recon_acc/network/utils.py b/modelscope/models/cv/nerf_recon_acc/network/utils.py index aa8c3d66..6983d36a 100644 --- a/modelscope/models/cv/nerf_recon_acc/network/utils.py +++ b/modelscope/models/cv/nerf_recon_acc/network/utils.py @@ -5,7 +5,6 @@ from collections import defaultdict import mcubes import numpy as np -import tinycudann as tcnn import torch import torch.nn as nn import torch.nn.functional as F @@ -166,6 +165,8 @@ def normalize(dat, inp_scale, tgt_scale): def cleanup(): + import tinycudann as tcnn + gc.collect() torch.cuda.empty_cache() tcnn.free_temporary_memory() diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 8d3a5a07..2f0c4b7c 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -440,7 +440,7 @@ class QWenTextGenerationPipeline(Pipeline): class SeqGPTPipeline(Pipeline): def __init__(self, model: Union[Model, str], **kwargs): - from modelscope.utils.hf_util import AutoTokenizer + from modelscope import AutoTokenizer if isinstance(model, str): model_dir = snapshot_download( diff --git a/requirements/audio/audio_codec.txt b/requirements/audio/audio_codec.txt index c7ac8b2b..bb6097b9 100644 --- a/requirements/audio/audio_codec.txt +++ b/requirements/audio/audio_codec.txt @@ -1 +1 @@ -funcodec>=0.2.0 +ms-funcodec>=0.2.0 diff --git a/requirements/audio/audio_tts.txt b/requirements/audio/audio_tts.txt index 5cff1b28..972bb992 100644 --- a/requirements/audio/audio_tts.txt +++ b/requirements/audio/audio_tts.txt @@ -15,7 +15,7 @@ protobuf ptflops ptyprocess>=0.7.0 pygments>=2.12.0 -pysptk>=0.1.15,<0.1.19 +#pysptk>=0.1.15,<0.1.19 pytorch_wavelets PyWavelets>=1.0.0 scikit-learn @@ -23,6 +23,6 @@ sox tensorboardx tqdm traitlets>=5.3.0 -ttsfrd>=0.1.2 +#ttsfrd>=0.1.2 # not supported for py311 or above unidecode wcwidth>=0.2.5 diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt index 6c974b56..b2f767c5 100644 --- a/requirements/multi-modal.txt +++ b/requirements/multi-modal.txt @@ -2,9 +2,11 @@ accelerate cloudpickle decord>=0.6.0 diffusers>=0.25.0 -# 0.12.1 has issue of No such file or directory: 'fairseq/version.txt' -fairseq==0.12.2 ftfy>=6.0.3 +# 0.12.1 has issue of No such file or directory: 'fairseq/version.txt' +# 0.12.2 not support py311 +#fairseq==0.12.2 +https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl librosa==0.10.1 opencv-python pycocoevalcap>=1.2 From 5a0d8b6523f336d3b055b3d16f3f9293021d9e69 Mon Sep 17 00:00:00 2001 From: Yunfeng Wang Date: Mon, 7 Apr 2025 15:45:40 +0800 Subject: [PATCH 6/6] fix: Update speaker_verification_rdino_pipeline.py (#1300) --- .../pipelines/audio/speaker_verification_rdino_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py index dd08ccf4..c5ae9f6c 100644 --- a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py +++ b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py @@ -93,7 +93,7 @@ class RDINO_Pipeline(Pipeline): if fs != self.model_config['sample_rate']: raise ValueError( 'modelscope error: Only support %d sample rate files' - % self.model_cfg['sample_rate']) + % self.model_config['sample_rate']) output['data%d' % (i + 1)] = torch.from_numpy(data).unsqueeze(0) else: