diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index abb6ae63..a8c32154 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -11,7 +11,7 @@ on: description: 'ModelScope branch to build from(release/x.xx)' required: true image_type: - description: 'The image type to build(cpu/gpu/llm/paddle_cpu)' + description: 'The image type to build(cpu/gpu/llm/swift/paddle_cpu)' required: true modelscope_version: description: 'ModelScope version to use(x.xx.x)' diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 883cfde5..fd51af0e 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -24,7 +24,6 @@ if [ "$INSTALL_MS_DEPS" = "True" ]; then \ pip --no-cache-dir install omegaconf==2.0.6 && \ pip install 'editdistance==0.8.1' && \ pip install --no-cache-dir 'cython<=0.29.36' versioneer 'numpy<2.0' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ - # pip install --no-cache-dir kwsbp==0.0.6 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ @@ -59,7 +58,7 @@ RUN sh /tmp/install.sh {version_args} && \ pip install --no-cache-dir xformers==0.0.27 && \ curl -fsSL https://ollama.com/install.sh | sh && \ pip install --no-cache-dir -U funasr scikit-learn && \ - pip install --no-cache-dir -U qwen_vl_utils pyav librosa timm transformers accelerate peft trl safetensors && \ + pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \ cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {modelscope_branch} --single-branch https://github.com/modelscope/modelscope.git && \ cd modelscope && pip install . -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ cd / && rm -fr /tmp/modelscope && pip cache purge; \ diff --git a/docker/Dockerfile.ubuntu_base b/docker/Dockerfile.ubuntu_base index 0816e868..a80f008c 100644 --- a/docker/Dockerfile.ubuntu_base +++ b/docker/Dockerfile.ubuntu_base @@ -211,14 +211,6 @@ ARG TENSORFLOW_VERSION={tf_version} cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && MMCV_WITH_OPS=1 MAX_JOBS=32 pip install . && cd / && rm -fr /tmp/mmcv && pip cache purge; \ fi - # This limits the cuda121 version -# RUN if [ "$USE_GPU" = "True" ] ; then \ -# pip install --no-cache-dir --force tinycudann==1.7 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ -# else \ -# echo 'cpu not install tinycudann'; \ -# fi - - # RUN pip install --no-cache-dir fairseq # for py310 RUN pip install --no-cache-dir https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl # for py311 ENTRYPOINT [] diff --git a/docker/build_image.py b/docker/build_image.py index 4a636b7a..e2d4d546 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -335,6 +335,76 @@ class LLMImageBuilder(Builder): return os.system(f'docker push {image_tag2}') +class SwiftImageBuilder(LLMImageBuilder): + + def init_args(self, args) -> Any: + if not args.torch_version: + args.torch_version = '2.5.1' + args.torchaudio_version = '2.5.1' + args.torchvision_version = '0.20.1' + if not args.cuda_version: + args.cuda_version = '12.4.0' + if not args.vllm_version: + args.vllm_version = '0.7.3' + return super().init_args(args) + + def generate_dockerfile(self) -> str: + meta_file = './docker/install.sh' + with open('docker/Dockerfile.extra_install', 'r') as f: + extra_content = f.read() + extra_content = extra_content.replace('{python_version}', + self.args.python_version) + extra_content += """ +RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \ + pip install --no-cache-dir -U icecream soundfile pybind11 && \ + SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \ + CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \ + pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable +""" + version_args = ( + f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} ' + f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} ' + f'{self.args.flashattn_version}') + with open('docker/Dockerfile.ubuntu', 'r') as f: + content = f.read() + content = content.replace('{base_image}', self.args.base_image) + content = content.replace('{extra_content}', extra_content) + content = content.replace('{meta_file}', meta_file) + content = content.replace('{version_args}', version_args) + content = content.replace('{cur_time}', formatted_time) + content = content.replace('{install_ms_deps}', 'False') + content = content.replace('{torch_version}', + self.args.torch_version) + content = content.replace('{torchvision_version}', + self.args.torchvision_version) + content = content.replace('{torchaudio_version}', + self.args.torchaudio_version) + content = content.replace('{index_url}', '') + content = content.replace('{modelscope_branch}', + self.args.modelscope_branch) + content = content.replace('{swift_branch}', self.args.swift_branch) + return content + + def image(self) -> str: + return ( + f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' + f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-swift-test' + ) + + def push(self): + ret = os.system(f'docker push {self.image()}') + if ret != 0: + return ret + image_tag2 = ( + f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-' + f'{self.args.python_tag}-torch{self.args.torch_version}-' + f'{self.args.modelscope_version}-swift-{formatted_time}-test') + ret = os.system(f'docker tag {self.image()} {image_tag2}') + if ret != 0: + return ret + return os.system(f'docker push {image_tag2}') + + class PaddleCPUImageBuilder(Builder): def __init__(self, args: Any, dry_run: bool): @@ -412,6 +482,8 @@ elif args.image_type.lower() == 'gpu': builder_cls = GPUImageBuilder elif args.image_type.lower() == 'llm': builder_cls = LLMImageBuilder +elif args.image_type.lower() == 'swift': + builder_cls = SwiftImageBuilder elif args.image_type.lower() == 'paddle_cpu': builder_cls = PaddleCPUImageBuilder else: diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index e3ec5ad0..a93ca2ff 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -226,7 +226,7 @@ class HubApi: headers=self.builder_headers(self.headers)) handle_http_post_error(r, path, body) raise_on_error(r.json()) - model_repo_url = f'{endpoint}/{model_id}' + model_repo_url = f'{endpoint}/models/{model_id}' return model_repo_url def delete_model(self, model_id: str, endpoint: Optional[str] = None): @@ -401,6 +401,33 @@ class HubApi: 'Failed to check existence of repo: %s, make sure you have access authorization.' % repo_type) + def delete_repo(self, repo_id: str, repo_type: str, endpoint: Optional[str] = None): + """ + Delete a repository from ModelScope. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + repo_type (`str`): + The type of the repository. Supported types are `model` and `dataset`. + endpoint(`str`): + The endpoint to use. If not provided, the default endpoint is `https://www.modelscope.cn` + Could be set to `https://ai.modelscope.ai` for international version. + """ + + if not endpoint: + endpoint = self.endpoint + + if repo_type == REPO_TYPE_DATASET: + self.delete_dataset(repo_id, endpoint) + elif repo_type == REPO_TYPE_MODEL: + self.delete_model(repo_id, endpoint) + else: + raise Exception(f'Arg repo_type {repo_type} not supported.') + + logger.info(f'Repo {repo_id} deleted successfully.') + @staticmethod def _create_default_config(model_dir): cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) @@ -924,6 +951,21 @@ class HubApi: dataset_list = r.json()[API_RESPONSE_FIELD_DATA] return [x['Name'] for x in dataset_list] + def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None): + + cookies = ModelScopeConfig.get_cookies() + if not endpoint: + endpoint = self.endpoint + if cookies is None: + raise ValueError('Token does not exist, please login first.') + + path = f'{endpoint}/api/v1/datasets/{dataset_id}' + r = self.session.delete(path, + cookies=cookies, + headers=self.builder_headers(self.headers)) + raise_for_http_status(r) + raise_on_error(r.json()) + def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None): """ Get the dataset id and type. """ if not endpoint: @@ -1361,15 +1403,42 @@ class HubApi: chinese_name: Optional[str] = '', license: Optional[str] = Licenses.APACHE_V2, endpoint: Optional[str] = None, + exist_ok: Optional[bool] = False, **kwargs, ) -> str: + """ + Create a repository on the ModelScope Hub. + + Args: + repo_id (str): The repo id in the format of `owner_name/repo_name`. + token (Union[str, bool, None]): The access token. + visibility (Optional[str]): The visibility of the repo, + could be `public`, `private`, `internal`, default to `public`. + repo_type (Optional[str]): The repo type, default to `model`. + chinese_name (Optional[str]): The Chinese name of the repo. + license (Optional[str]): The license of the repo, default to `apache-2.0`. + endpoint (Optional[str]): The endpoint to use. + In the format of `https://www.modelscope.cn` or 'https://www.modelscope.ai' + exist_ok (Optional[bool]): If the repo exists, whether to return the repo url directly. + **kwargs: The additional arguments. + + Returns: + str: The repo url. + """ - # TODO: exist_ok if not repo_id: raise ValueError('Repo id cannot be empty!') if not endpoint: endpoint = self.endpoint - self.login(access_token=token) + + repo_exists: bool = self.repo_exists(repo_id, repo_type=repo_type, endpoint=endpoint) + if repo_exists: + if exist_ok: + return f'{endpoint}/{repo_type}s/{repo_id}' + else: + raise ValueError(f'Repo {repo_id} already exists!') + + self.login(access_token=token, endpoint=endpoint) repo_id_list = repo_id.split('/') if len(repo_id_list) != 2: @@ -1382,31 +1451,28 @@ class HubApi: if visibility is None: raise ValueError(f'Invalid visibility: {visibility}, ' f'supported visibilities: `public`, `private`, `internal`') - if not self.repo_exists(repo_id, repo_type=repo_type): - repo_url: str = self.create_model( - model_id=repo_id, - visibility=visibility, - license=license, - chinese_name=chinese_name, - ) - with tempfile.TemporaryDirectory() as temp_cache_dir: - from modelscope.hub.repository import Repository - repo = Repository(temp_cache_dir, repo_id) - default_config = { - 'framework': 'pytorch', - 'task': 'text-generation', - 'allow_remote': True - } - config_json = kwargs.get('config_json') - if not config_json: - config_json = {} - config = {**default_config, **config_json} - add_content_to_file( - repo, - 'configuration.json', [json.dumps(config)], - ignore_push_error=True) - else: - repo_url = f'{endpoint}/{repo_id}' + repo_url: str = self.create_model( + model_id=repo_id, + visibility=visibility, + license=license, + chinese_name=chinese_name, + ) + with tempfile.TemporaryDirectory() as temp_cache_dir: + from modelscope.hub.repository import Repository + repo = Repository(temp_cache_dir, repo_id) + default_config = { + 'framework': 'pytorch', + 'task': 'text-generation', + 'allow_remote': True + } + config_json = kwargs.get('config_json') + if not config_json: + config_json = {} + config = {**default_config, **config_json} + add_content_to_file( + repo, + 'configuration.json', [json.dumps(config)], + ignore_push_error=True) elif repo_type == REPO_TYPE_DATASET: visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')} @@ -1414,20 +1480,19 @@ class HubApi: if visibility is None: raise ValueError(f'Invalid visibility: {visibility}, ' f'supported visibilities: `public`, `private`, `internal`') - if not self.repo_exists(repo_id, repo_type=repo_type): - repo_url: str = self.create_dataset( - dataset_name=repo_name, - namespace=namespace, - chinese_name=chinese_name, - license=license, - visibility=visibility, - ) - else: - repo_url = f'{endpoint}/datasets/{namespace}/{repo_name}' + repo_url: str = self.create_dataset( + dataset_name=repo_name, + namespace=namespace, + chinese_name=chinese_name, + license=license, + visibility=visibility, + ) else: raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}') + logger.info(f'Repo created: {repo_url}') + return repo_url def create_commit( diff --git a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py index dd08ccf4..c5ae9f6c 100644 --- a/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py +++ b/modelscope/pipelines/audio/speaker_verification_rdino_pipeline.py @@ -93,7 +93,7 @@ class RDINO_Pipeline(Pipeline): if fs != self.model_config['sample_rate']: raise ValueError( 'modelscope error: Only support %d sample rate files' - % self.model_cfg['sample_rate']) + % self.model_config['sample_rate']) output['data%d' % (i + 1)] = torch.from_numpy(data).unsqueeze(0) else: diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py index 5b0fbda5..bf5ae3d6 100644 --- a/modelscope/pipelines/cv/ocr_detection_pipeline.py +++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py @@ -206,6 +206,7 @@ class OCRDetectionPipeline(Pipeline): img_pad_resize = img_pad_resize - np.array( [123.68, 116.78, 103.94], dtype=np.float32) + import tensorflow as tf with self._graph.as_default(): resize_size = tf.stack([resize_size, resize_size]) orig_size = tf.stack([max(h, w), max(h, w)]) diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 8d3a5a07..2f0c4b7c 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -440,7 +440,7 @@ class QWenTextGenerationPipeline(Pipeline): class SeqGPTPipeline(Pipeline): def __init__(self, model: Union[Model, str], **kwargs): - from modelscope.utils.hf_util import AutoTokenizer + from modelscope import AutoTokenizer if isinstance(model, str): model_dir = snapshot_download( diff --git a/modelscope/utils/repo_utils.py b/modelscope/utils/repo_utils.py index d14adbf8..038ba908 100644 --- a/modelscope/utils/repo_utils.py +++ b/modelscope/utils/repo_utils.py @@ -323,7 +323,8 @@ class UploadInfo: file_hash_info = file_hash_info or get_file_hash(path) size = file_hash_info['file_size'] sha = file_hash_info['file_hash'] - sample = open(path, 'rb').read(512) + with open(path, 'rb') as f: + sample = f.read(512) return cls(sha256=sha, size=size, sample=sample) diff --git a/requirements/audio/audio_codec.txt b/requirements/audio/audio_codec.txt index 80d23ce2..bb6097b9 100644 --- a/requirements/audio/audio_codec.txt +++ b/requirements/audio/audio_codec.txt @@ -1,2 +1 @@ -#funcodec>=0.2.0 ms-funcodec>=0.2.0 diff --git a/requirements/audio/audio_kws.txt b/requirements/audio/audio_kws.txt index 276a0a2f..622ce981 100644 --- a/requirements/audio/audio_kws.txt +++ b/requirements/audio/audio_kws.txt @@ -1,5 +1,5 @@ kaldiio -kwsbp>=0.0.6 +kwsbp==0.0.6 matplotlib py_sound_connect>=0.1 scipy diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt index 65f1ec61..328bff4d 100644 --- a/requirements/audio/audio_signal.txt +++ b/requirements/audio/audio_signal.txt @@ -1,7 +1,7 @@ hdbscan hyperpyyaml librosa==0.10.1 -MinDAEC +MinDAEC==0.0.2 mir_eval>=0.7 rotary_embedding_torch>=0.1.5 scipy diff --git a/requirements/cv.txt b/requirements/cv.txt index efc0d5aa..e63d9d00 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -5,7 +5,7 @@ bmt_clipit>=1.0 chumpy clip>=1.0 control_ldm -ddpm_guided_diffusion +ddpm_guided_diffusion==0.0.0 diffusers easydict edit_distance @@ -55,7 +55,7 @@ regex scikit-image scikit-learn shapely -shotdetect_scenedetect_lgss>=0.0.4 +shotdetect_scenedetect_lgss==0.0.4 smplx tensorflow-estimator>=1.15.1 tf_slim diff --git a/tests/hub/test_create_repo.py b/tests/hub/test_create_repo.py new file mode 100644 index 00000000..b5658075 --- /dev/null +++ b/tests/hub/test_create_repo.py @@ -0,0 +1,58 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest +import uuid + +from modelscope import HubApi +from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1 +from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG +from modelscope.utils.test_utils import delete_credential, test_level + +logger = get_logger() + + +class TestCreateRepo(unittest.TestCase): + + def setUp(self): + self.api = HubApi() + self.api.login(TEST_ACCESS_TOKEN1) + + self.repo_id_model: str = f'{TEST_ORG}/test_create_repo_model_{uuid.uuid4().hex[-6:]}' + self.repo_id_dataset: str = f'{TEST_ORG}/test_create_repo_dataset_{uuid.uuid4().hex[-6:]}' + + def tearDown(self): + self.api.delete_repo( + repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL) + self.api.delete_repo( + repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET) + delete_credential() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_create_repo(self): + + logger.info( + f'TEST: Creating repo {self.repo_id_model} and {self.repo_id_dataset} ...' + ) + + try: + self.api.create_repo( + repo_id=self.repo_id_model, + repo_type=REPO_TYPE_MODEL, + exist_ok=True) + except Exception as e: + logger.error(f'Failed to create repo {self.repo_id_model} !') + raise e + + try: + self.api.create_repo( + repo_id=self.repo_id_dataset, + repo_type=REPO_TYPE_DATASET, + exist_ok=True) + except Exception as e: + logger.error(f'Failed to create repo {self.repo_id_dataset} !') + raise e + + logger.info( + f'TEST: Created repo {self.repo_id_model} and {self.repo_id_dataset} successfully !' + ) diff --git a/tests/hub/test_upload_file_folder.py b/tests/hub/test_upload_file_folder.py new file mode 100644 index 00000000..aab0a1fd --- /dev/null +++ b/tests/hub/test_upload_file_folder.py @@ -0,0 +1,138 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import shutil +import struct +import tempfile +import unittest +import uuid + +import json + +from modelscope import HubApi +from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1 +from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG +from modelscope.utils.test_utils import delete_credential, test_level + +logger = get_logger() + + +class TestUploadFileFolder(unittest.TestCase): + + def setUp(self): + self.api = HubApi() + self.api.login(TEST_ACCESS_TOKEN1) + + self.repo_id_model: str = f'{TEST_ORG}/test_upload_file_folder_model_{uuid.uuid4().hex[-6:]}' + self.repo_id_dataset: str = f'{TEST_ORG}/test_upload_file_folder_dataset_{uuid.uuid4().hex[-6:]}' + + self.work_dir = tempfile.mkdtemp() + self.model_file_path = f'{self.work_dir}/test_model.bin' + self.dataset_file_path = f'{self.work_dir}/test_data.jsonl' + + logger.info(f'Work directory: {self.work_dir}') + + self.api.create_repo( + repo_id=self.repo_id_model, + repo_type=REPO_TYPE_MODEL, + exist_ok=True) + self.api.create_repo( + repo_id=self.repo_id_dataset, + repo_type=REPO_TYPE_DATASET, + exist_ok=True) + + self._construct_file() + + def tearDown(self): + + # Remove repositories + self.api.delete_repo( + repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL) + self.api.delete_repo( + repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET) + + # Clean up the temporary credentials + delete_credential() + + # Clean up the temporary directory + shutil.rmtree(self.work_dir) + + def _construct_file(self): + + # Construct data + data_list = [ + { + 'id': 1, + 'value': 3.14 + }, + { + 'id': 2, + 'value': 2.71 + }, + { + 'id': 3, + 'value': 3.69 + }, + { + 'id': 4, + 'value': 9.31 + }, + { + 'id': 5, + 'value': 1.21 + }, + ] + + with open(self.model_file_path, 'wb') as f: + for entry in data_list: + packed_data = struct.pack('if', entry['id'], entry['value']) + f.write(packed_data) + logger.info(f'Constructed model file: {self.model_file_path}') + + with open(self.dataset_file_path, 'w') as f: + for entry in data_list: + f.write(json.dumps(entry) + '\n') + logger.info(f'Constructed dataset file: {self.dataset_file_path}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_upload_file_folder(self): + """ + Test uploading file/folder to the model/dataset repository. + """ + + commit_info_upload_file_model = self.api.upload_file( + path_or_fileobj=self.model_file_path, + path_in_repo=os.path.basename(self.model_file_path), + repo_id=self.repo_id_model, + repo_type=REPO_TYPE_MODEL, + commit_message='Add model file for CI_TEST', + ) + self.assertTrue(commit_info_upload_file_model is not None) + + commit_info_upload_file_dataset = self.api.upload_file( + path_or_fileobj=self.dataset_file_path, + path_in_repo=os.path.basename(self.dataset_file_path), + repo_id=self.repo_id_dataset, + repo_type=REPO_TYPE_DATASET, + commit_message='Add dataset file for CI_TEST', + ) + self.assertTrue(commit_info_upload_file_dataset is not None) + + commit_info_upload_folder_model = self.api.upload_folder( + repo_id=self.repo_id_model, + folder_path=self.work_dir, + path_in_repo='test_data', + repo_type=REPO_TYPE_MODEL, + commit_message='Add model folder for CI_TEST', + ) + self.assertTrue(commit_info_upload_folder_model is not None) + + commit_info_upload_folder_dataset = self.api.upload_folder( + repo_id=self.repo_id_dataset, + folder_path=self.work_dir, + path_in_repo='test_data', + repo_type=REPO_TYPE_DATASET, + commit_message='Add dataset folder for CI_TEST', + ) + self.assertTrue(commit_info_upload_folder_dataset is not None) diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py deleted file mode 100644 index 2cd910c2..00000000 --- a/tests/msdatasets/test_dataset_upload.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os -import shutil -import tempfile -import unittest -import zipfile - -from modelscope.msdatasets import MsDataset -from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects -from modelscope.utils import logger as logging -from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, - ModelFile) -from modelscope.utils.test_utils import test_level - -logger = logging.get_logger() - -KEY_EXTRACTED = 'extracted' - - -class DatasetUploadTest(unittest.TestCase): - - def setUp(self): - self.old_dir = os.getcwd() - self.dataset_name = 'small_coco_for_test' - self.dataset_file_name = self.dataset_name - self.prepared_dataset_name = 'pets_small' - self.token = os.getenv('TEST_UPLOAD_MS_TOKEN') - error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN' - self.assertIsNotNone(self.token, msg=error_msg) - from modelscope.hub.api import HubApi - from modelscope.hub.api import ModelScopeConfig - self.api = HubApi() - self.api.login(self.token) - - # get user info - self.namespace, _ = ModelScopeConfig.get_user_info() - - self.temp_dir = tempfile.mkdtemp() - self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name) - self.test_meta_dir = os.path.join(self.test_work_dir, 'meta') - if not os.path.exists(self.test_work_dir): - os.makedirs(self.test_work_dir) - - def tearDown(self): - os.chdir(self.old_dir) - shutil.rmtree(self.temp_dir, ignore_errors=True) - logger.info( - f'Temporary directory {self.temp_dir} successfully removed!') - - @staticmethod - def get_raw_downloaded_file_path(extracted_path): - raw_downloaded_file_path = '' - raw_data_dir = os.path.abspath( - os.path.join(extracted_path, '../../..')) - for root, dirs, files in os.walk(raw_data_dir): - if KEY_EXTRACTED in dirs: - for file in files: - curr_file_path = os.path.join(root, file) - if zipfile.is_zipfile(curr_file_path): - raw_downloaded_file_path = curr_file_path - return raw_downloaded_file_path - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_upload(self): - # Get the prepared data from hub, using default modelscope namespace - ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train') - config_res = ms_ds_train._hf_ds.config_kwargs - extracted_path = config_res.get('split_config').get('train') - raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path) - - MsDataset.upload( - object_name=self.dataset_file_name + '.zip', - local_file_path=raw_zipfile_path, - dataset_name=self.dataset_name, - namespace=self.namespace) - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_upload_dir(self): - ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train') - config_train = ms_ds_train._hf_ds.config_kwargs - extracted_path_train = config_train.get('split_config').get('train') - - MsDataset.upload( - object_name='train', - local_file_path=os.path.join(extracted_path_train, - 'Pets/images/train'), - dataset_name=self.dataset_name, - namespace=self.namespace) - MsDataset.upload( - object_name='val', - local_file_path=os.path.join(extracted_path_train, - 'Pets/images/val'), - dataset_name=self.dataset_name, - namespace=self.namespace) - - objects = list_dataset_objects( - hub_api=self.api, - max_limit=-1, - is_recursive=True, - dataset_name=self.dataset_name, - namespace=self.namespace, - version=DEFAULT_DATASET_REVISION) - - logger.info(f'{len(objects)} objects have been uploaded: {objects}') - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_download_dir(self): - test_ds = MsDataset.load( - self.dataset_name, - namespace=self.namespace, - download_mode=DownloadMode.FORCE_REDOWNLOAD) - assert test_ds.config_kwargs['split_config'].values() - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_clone_meta(self): - MsDataset.clone_meta( - dataset_work_dir=self.test_meta_dir, - dataset_id=os.path.join(self.namespace, self.dataset_name)) - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_ds_upload_meta(self): - # Clone dataset meta repo first. - MsDataset.clone_meta( - dataset_work_dir=self.test_meta_dir, - dataset_id=os.path.join(self.namespace, self.dataset_name)) - - with open(os.path.join(self.test_meta_dir, ModelFile.README), - 'a') as f: - f.write('\nThis is a line for unit test.') - - MsDataset.upload_meta( - dataset_work_dir=self.test_meta_dir, - commit_message='Update for unit test.') - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/run_config.yaml b/tests/run_config.yaml index d9768a57..dd1611c1 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -1,5 +1,5 @@ # isolate cases in env, we can install different dependencies in each env. -isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process. +isolated: # test cases that may require excessive amount of GPU memory or run long time, which will be executed in dedicated process. - test_text_to_speech.py - test_multi_modal_embedding.py - test_ofa_tasks.py