merge master

This commit is contained in:
xingjun.wxj
2025-04-08 14:54:50 +08:00
17 changed files with 383 additions and 195 deletions

View File

@@ -11,7 +11,7 @@ on:
description: 'ModelScope branch to build from(release/x.xx)'
required: true
image_type:
description: 'The image type to build(cpu/gpu/llm/paddle_cpu)'
description: 'The image type to build(cpu/gpu/llm/swift/paddle_cpu)'
required: true
modelscope_version:
description: 'ModelScope version to use(x.xx.x)'

View File

@@ -24,7 +24,6 @@ if [ "$INSTALL_MS_DEPS" = "True" ]; then \
pip --no-cache-dir install omegaconf==2.0.6 && \
pip install 'editdistance==0.8.1' && \
pip install --no-cache-dir 'cython<=0.29.36' versioneer 'numpy<2.0' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
# pip install --no-cache-dir kwsbp==0.0.6 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
pip install --no-cache-dir -r /var/modelscope/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
@@ -59,7 +58,7 @@ RUN sh /tmp/install.sh {version_args} && \
pip install --no-cache-dir xformers==0.0.27 && \
curl -fsSL https://ollama.com/install.sh | sh && \
pip install --no-cache-dir -U funasr scikit-learn && \
pip install --no-cache-dir -U qwen_vl_utils pyav librosa timm transformers accelerate peft trl safetensors && \
pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \
cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {modelscope_branch} --single-branch https://github.com/modelscope/modelscope.git && \
cd modelscope && pip install . -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
cd / && rm -fr /tmp/modelscope && pip cache purge; \

View File

@@ -211,14 +211,6 @@ ARG TENSORFLOW_VERSION={tf_version}
cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && MMCV_WITH_OPS=1 MAX_JOBS=32 pip install . && cd / && rm -fr /tmp/mmcv && pip cache purge; \
fi
# This limits the cuda121 version
# RUN if [ "$USE_GPU" = "True" ] ; then \
# pip install --no-cache-dir --force tinycudann==1.7 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
# else \
# echo 'cpu not install tinycudann'; \
# fi
# RUN pip install --no-cache-dir fairseq # for py310
RUN pip install --no-cache-dir https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl # for py311
ENTRYPOINT []

View File

@@ -335,6 +335,76 @@ class LLMImageBuilder(Builder):
return os.system(f'docker push {image_tag2}')
class SwiftImageBuilder(LLMImageBuilder):
def init_args(self, args) -> Any:
if not args.torch_version:
args.torch_version = '2.5.1'
args.torchaudio_version = '2.5.1'
args.torchvision_version = '0.20.1'
if not args.cuda_version:
args.cuda_version = '12.4.0'
if not args.vllm_version:
args.vllm_version = '0.7.3'
return super().init_args(args)
def generate_dockerfile(self) -> str:
meta_file = './docker/install.sh'
with open('docker/Dockerfile.extra_install', 'r') as f:
extra_content = f.read()
extra_content = extra_content.replace('{python_version}',
self.args.python_version)
extra_content += """
RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \
pip install --no-cache-dir -U icecream soundfile pybind11 && \
SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \
CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
"""
version_args = (
f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} '
f'{self.args.flashattn_version}')
with open('docker/Dockerfile.ubuntu', 'r') as f:
content = f.read()
content = content.replace('{base_image}', self.args.base_image)
content = content.replace('{extra_content}', extra_content)
content = content.replace('{meta_file}', meta_file)
content = content.replace('{version_args}', version_args)
content = content.replace('{cur_time}', formatted_time)
content = content.replace('{install_ms_deps}', 'False')
content = content.replace('{torch_version}',
self.args.torch_version)
content = content.replace('{torchvision_version}',
self.args.torchvision_version)
content = content.replace('{torchaudio_version}',
self.args.torchaudio_version)
content = content.replace('{index_url}', '')
content = content.replace('{modelscope_branch}',
self.args.modelscope_branch)
content = content.replace('{swift_branch}', self.args.swift_branch)
return content
def image(self) -> str:
return (
f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-'
f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-swift-test'
)
def push(self):
ret = os.system(f'docker push {self.image()}')
if ret != 0:
return ret
image_tag2 = (
f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-'
f'{self.args.python_tag}-torch{self.args.torch_version}-'
f'{self.args.modelscope_version}-swift-{formatted_time}-test')
ret = os.system(f'docker tag {self.image()} {image_tag2}')
if ret != 0:
return ret
return os.system(f'docker push {image_tag2}')
class PaddleCPUImageBuilder(Builder):
def __init__(self, args: Any, dry_run: bool):
@@ -412,6 +482,8 @@ elif args.image_type.lower() == 'gpu':
builder_cls = GPUImageBuilder
elif args.image_type.lower() == 'llm':
builder_cls = LLMImageBuilder
elif args.image_type.lower() == 'swift':
builder_cls = SwiftImageBuilder
elif args.image_type.lower() == 'paddle_cpu':
builder_cls = PaddleCPUImageBuilder
else:

View File

@@ -226,7 +226,7 @@ class HubApi:
headers=self.builder_headers(self.headers))
handle_http_post_error(r, path, body)
raise_on_error(r.json())
model_repo_url = f'{endpoint}/{model_id}'
model_repo_url = f'{endpoint}/models/{model_id}'
return model_repo_url
def delete_model(self, model_id: str, endpoint: Optional[str] = None):
@@ -401,6 +401,33 @@ class HubApi:
'Failed to check existence of repo: %s, make sure you have access authorization.'
% repo_type)
def delete_repo(self, repo_id: str, repo_type: str, endpoint: Optional[str] = None):
"""
Delete a repository from ModelScope.
Args:
repo_id (`str`):
A namespace (user or an organization) and a repo name separated
by a `/`.
repo_type (`str`):
The type of the repository. Supported types are `model` and `dataset`.
endpoint(`str`):
The endpoint to use. If not provided, the default endpoint is `https://www.modelscope.cn`
Could be set to `https://ai.modelscope.ai` for international version.
"""
if not endpoint:
endpoint = self.endpoint
if repo_type == REPO_TYPE_DATASET:
self.delete_dataset(repo_id, endpoint)
elif repo_type == REPO_TYPE_MODEL:
self.delete_model(repo_id, endpoint)
else:
raise Exception(f'Arg repo_type {repo_type} not supported.')
logger.info(f'Repo {repo_id} deleted successfully.')
@staticmethod
def _create_default_config(model_dir):
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
@@ -924,6 +951,21 @@ class HubApi:
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
return [x['Name'] for x in dataset_list]
def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None):
cookies = ModelScopeConfig.get_cookies()
if not endpoint:
endpoint = self.endpoint
if cookies is None:
raise ValueError('Token does not exist, please login first.')
path = f'{endpoint}/api/v1/datasets/{dataset_id}'
r = self.session.delete(path,
cookies=cookies,
headers=self.builder_headers(self.headers))
raise_for_http_status(r)
raise_on_error(r.json())
def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None):
""" Get the dataset id and type. """
if not endpoint:
@@ -1361,15 +1403,42 @@ class HubApi:
chinese_name: Optional[str] = '',
license: Optional[str] = Licenses.APACHE_V2,
endpoint: Optional[str] = None,
exist_ok: Optional[bool] = False,
**kwargs,
) -> str:
"""
Create a repository on the ModelScope Hub.
Args:
repo_id (str): The repo id in the format of `owner_name/repo_name`.
token (Union[str, bool, None]): The access token.
visibility (Optional[str]): The visibility of the repo,
could be `public`, `private`, `internal`, default to `public`.
repo_type (Optional[str]): The repo type, default to `model`.
chinese_name (Optional[str]): The Chinese name of the repo.
license (Optional[str]): The license of the repo, default to `apache-2.0`.
endpoint (Optional[str]): The endpoint to use.
In the format of `https://www.modelscope.cn` or 'https://www.modelscope.ai'
exist_ok (Optional[bool]): If the repo exists, whether to return the repo url directly.
**kwargs: The additional arguments.
Returns:
str: The repo url.
"""
# TODO: exist_ok
if not repo_id:
raise ValueError('Repo id cannot be empty!')
if not endpoint:
endpoint = self.endpoint
self.login(access_token=token)
repo_exists: bool = self.repo_exists(repo_id, repo_type=repo_type, endpoint=endpoint)
if repo_exists:
if exist_ok:
return f'{endpoint}/{repo_type}s/{repo_id}'
else:
raise ValueError(f'Repo {repo_id} already exists!')
self.login(access_token=token, endpoint=endpoint)
repo_id_list = repo_id.split('/')
if len(repo_id_list) != 2:
@@ -1382,31 +1451,28 @@ class HubApi:
if visibility is None:
raise ValueError(f'Invalid visibility: {visibility}, '
f'supported visibilities: `public`, `private`, `internal`')
if not self.repo_exists(repo_id, repo_type=repo_type):
repo_url: str = self.create_model(
model_id=repo_id,
visibility=visibility,
license=license,
chinese_name=chinese_name,
)
with tempfile.TemporaryDirectory() as temp_cache_dir:
from modelscope.hub.repository import Repository
repo = Repository(temp_cache_dir, repo_id)
default_config = {
'framework': 'pytorch',
'task': 'text-generation',
'allow_remote': True
}
config_json = kwargs.get('config_json')
if not config_json:
config_json = {}
config = {**default_config, **config_json}
add_content_to_file(
repo,
'configuration.json', [json.dumps(config)],
ignore_push_error=True)
else:
repo_url = f'{endpoint}/{repo_id}'
repo_url: str = self.create_model(
model_id=repo_id,
visibility=visibility,
license=license,
chinese_name=chinese_name,
)
with tempfile.TemporaryDirectory() as temp_cache_dir:
from modelscope.hub.repository import Repository
repo = Repository(temp_cache_dir, repo_id)
default_config = {
'framework': 'pytorch',
'task': 'text-generation',
'allow_remote': True
}
config_json = kwargs.get('config_json')
if not config_json:
config_json = {}
config = {**default_config, **config_json}
add_content_to_file(
repo,
'configuration.json', [json.dumps(config)],
ignore_push_error=True)
elif repo_type == REPO_TYPE_DATASET:
visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')}
@@ -1414,20 +1480,19 @@ class HubApi:
if visibility is None:
raise ValueError(f'Invalid visibility: {visibility}, '
f'supported visibilities: `public`, `private`, `internal`')
if not self.repo_exists(repo_id, repo_type=repo_type):
repo_url: str = self.create_dataset(
dataset_name=repo_name,
namespace=namespace,
chinese_name=chinese_name,
license=license,
visibility=visibility,
)
else:
repo_url = f'{endpoint}/datasets/{namespace}/{repo_name}'
repo_url: str = self.create_dataset(
dataset_name=repo_name,
namespace=namespace,
chinese_name=chinese_name,
license=license,
visibility=visibility,
)
else:
raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}')
logger.info(f'Repo created: {repo_url}')
return repo_url
def create_commit(

View File

@@ -93,7 +93,7 @@ class RDINO_Pipeline(Pipeline):
if fs != self.model_config['sample_rate']:
raise ValueError(
'modelscope error: Only support %d sample rate files'
% self.model_cfg['sample_rate'])
% self.model_config['sample_rate'])
output['data%d' %
(i + 1)] = torch.from_numpy(data).unsqueeze(0)
else:

View File

@@ -206,6 +206,7 @@ class OCRDetectionPipeline(Pipeline):
img_pad_resize = img_pad_resize - np.array(
[123.68, 116.78, 103.94], dtype=np.float32)
import tensorflow as tf
with self._graph.as_default():
resize_size = tf.stack([resize_size, resize_size])
orig_size = tf.stack([max(h, w), max(h, w)])

View File

@@ -440,7 +440,7 @@ class QWenTextGenerationPipeline(Pipeline):
class SeqGPTPipeline(Pipeline):
def __init__(self, model: Union[Model, str], **kwargs):
from modelscope.utils.hf_util import AutoTokenizer
from modelscope import AutoTokenizer
if isinstance(model, str):
model_dir = snapshot_download(

View File

@@ -323,7 +323,8 @@ class UploadInfo:
file_hash_info = file_hash_info or get_file_hash(path)
size = file_hash_info['file_size']
sha = file_hash_info['file_hash']
sample = open(path, 'rb').read(512)
with open(path, 'rb') as f:
sample = f.read(512)
return cls(sha256=sha, size=size, sample=sample)

View File

@@ -1,2 +1 @@
#funcodec>=0.2.0
ms-funcodec>=0.2.0

View File

@@ -1,5 +1,5 @@
kaldiio
kwsbp>=0.0.6
kwsbp==0.0.6
matplotlib
py_sound_connect>=0.1
scipy

View File

@@ -1,7 +1,7 @@
hdbscan
hyperpyyaml
librosa==0.10.1
MinDAEC
MinDAEC==0.0.2
mir_eval>=0.7
rotary_embedding_torch>=0.1.5
scipy

View File

@@ -5,7 +5,7 @@ bmt_clipit>=1.0
chumpy
clip>=1.0
control_ldm
ddpm_guided_diffusion
ddpm_guided_diffusion==0.0.0
diffusers
easydict
edit_distance
@@ -55,7 +55,7 @@ regex
scikit-image
scikit-learn
shapely
shotdetect_scenedetect_lgss>=0.0.4
shotdetect_scenedetect_lgss==0.0.4
smplx
tensorflow-estimator>=1.15.1
tf_slim

View File

@@ -0,0 +1,58 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
import uuid
from modelscope import HubApi
from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1
from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG
from modelscope.utils.test_utils import delete_credential, test_level
logger = get_logger()
class TestCreateRepo(unittest.TestCase):
def setUp(self):
self.api = HubApi()
self.api.login(TEST_ACCESS_TOKEN1)
self.repo_id_model: str = f'{TEST_ORG}/test_create_repo_model_{uuid.uuid4().hex[-6:]}'
self.repo_id_dataset: str = f'{TEST_ORG}/test_create_repo_dataset_{uuid.uuid4().hex[-6:]}'
def tearDown(self):
self.api.delete_repo(
repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL)
self.api.delete_repo(
repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET)
delete_credential()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_create_repo(self):
logger.info(
f'TEST: Creating repo {self.repo_id_model} and {self.repo_id_dataset} ...'
)
try:
self.api.create_repo(
repo_id=self.repo_id_model,
repo_type=REPO_TYPE_MODEL,
exist_ok=True)
except Exception as e:
logger.error(f'Failed to create repo {self.repo_id_model} !')
raise e
try:
self.api.create_repo(
repo_id=self.repo_id_dataset,
repo_type=REPO_TYPE_DATASET,
exist_ok=True)
except Exception as e:
logger.error(f'Failed to create repo {self.repo_id_dataset} !')
raise e
logger.info(
f'TEST: Created repo {self.repo_id_model} and {self.repo_id_dataset} successfully !'
)

View File

@@ -0,0 +1,138 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import struct
import tempfile
import unittest
import uuid
import json
from modelscope import HubApi
from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1
from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG
from modelscope.utils.test_utils import delete_credential, test_level
logger = get_logger()
class TestUploadFileFolder(unittest.TestCase):
def setUp(self):
self.api = HubApi()
self.api.login(TEST_ACCESS_TOKEN1)
self.repo_id_model: str = f'{TEST_ORG}/test_upload_file_folder_model_{uuid.uuid4().hex[-6:]}'
self.repo_id_dataset: str = f'{TEST_ORG}/test_upload_file_folder_dataset_{uuid.uuid4().hex[-6:]}'
self.work_dir = tempfile.mkdtemp()
self.model_file_path = f'{self.work_dir}/test_model.bin'
self.dataset_file_path = f'{self.work_dir}/test_data.jsonl'
logger.info(f'Work directory: {self.work_dir}')
self.api.create_repo(
repo_id=self.repo_id_model,
repo_type=REPO_TYPE_MODEL,
exist_ok=True)
self.api.create_repo(
repo_id=self.repo_id_dataset,
repo_type=REPO_TYPE_DATASET,
exist_ok=True)
self._construct_file()
def tearDown(self):
# Remove repositories
self.api.delete_repo(
repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL)
self.api.delete_repo(
repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET)
# Clean up the temporary credentials
delete_credential()
# Clean up the temporary directory
shutil.rmtree(self.work_dir)
def _construct_file(self):
# Construct data
data_list = [
{
'id': 1,
'value': 3.14
},
{
'id': 2,
'value': 2.71
},
{
'id': 3,
'value': 3.69
},
{
'id': 4,
'value': 9.31
},
{
'id': 5,
'value': 1.21
},
]
with open(self.model_file_path, 'wb') as f:
for entry in data_list:
packed_data = struct.pack('if', entry['id'], entry['value'])
f.write(packed_data)
logger.info(f'Constructed model file: {self.model_file_path}')
with open(self.dataset_file_path, 'w') as f:
for entry in data_list:
f.write(json.dumps(entry) + '\n')
logger.info(f'Constructed dataset file: {self.dataset_file_path}')
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_upload_file_folder(self):
"""
Test uploading file/folder to the model/dataset repository.
"""
commit_info_upload_file_model = self.api.upload_file(
path_or_fileobj=self.model_file_path,
path_in_repo=os.path.basename(self.model_file_path),
repo_id=self.repo_id_model,
repo_type=REPO_TYPE_MODEL,
commit_message='Add model file for CI_TEST',
)
self.assertTrue(commit_info_upload_file_model is not None)
commit_info_upload_file_dataset = self.api.upload_file(
path_or_fileobj=self.dataset_file_path,
path_in_repo=os.path.basename(self.dataset_file_path),
repo_id=self.repo_id_dataset,
repo_type=REPO_TYPE_DATASET,
commit_message='Add dataset file for CI_TEST',
)
self.assertTrue(commit_info_upload_file_dataset is not None)
commit_info_upload_folder_model = self.api.upload_folder(
repo_id=self.repo_id_model,
folder_path=self.work_dir,
path_in_repo='test_data',
repo_type=REPO_TYPE_MODEL,
commit_message='Add model folder for CI_TEST',
)
self.assertTrue(commit_info_upload_folder_model is not None)
commit_info_upload_folder_dataset = self.api.upload_folder(
repo_id=self.repo_id_dataset,
folder_path=self.work_dir,
path_in_repo='test_data',
repo_type=REPO_TYPE_DATASET,
commit_message='Add dataset folder for CI_TEST',
)
self.assertTrue(commit_info_upload_folder_dataset is not None)

View File

@@ -1,137 +0,0 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
import zipfile
from modelscope.msdatasets import MsDataset
from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
from modelscope.utils import logger as logging
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode,
ModelFile)
from modelscope.utils.test_utils import test_level
logger = logging.get_logger()
KEY_EXTRACTED = 'extracted'
class DatasetUploadTest(unittest.TestCase):
def setUp(self):
self.old_dir = os.getcwd()
self.dataset_name = 'small_coco_for_test'
self.dataset_file_name = self.dataset_name
self.prepared_dataset_name = 'pets_small'
self.token = os.getenv('TEST_UPLOAD_MS_TOKEN')
error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN'
self.assertIsNotNone(self.token, msg=error_msg)
from modelscope.hub.api import HubApi
from modelscope.hub.api import ModelScopeConfig
self.api = HubApi()
self.api.login(self.token)
# get user info
self.namespace, _ = ModelScopeConfig.get_user_info()
self.temp_dir = tempfile.mkdtemp()
self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name)
self.test_meta_dir = os.path.join(self.test_work_dir, 'meta')
if not os.path.exists(self.test_work_dir):
os.makedirs(self.test_work_dir)
def tearDown(self):
os.chdir(self.old_dir)
shutil.rmtree(self.temp_dir, ignore_errors=True)
logger.info(
f'Temporary directory {self.temp_dir} successfully removed!')
@staticmethod
def get_raw_downloaded_file_path(extracted_path):
raw_downloaded_file_path = ''
raw_data_dir = os.path.abspath(
os.path.join(extracted_path, '../../..'))
for root, dirs, files in os.walk(raw_data_dir):
if KEY_EXTRACTED in dirs:
for file in files:
curr_file_path = os.path.join(root, file)
if zipfile.is_zipfile(curr_file_path):
raw_downloaded_file_path = curr_file_path
return raw_downloaded_file_path
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_upload(self):
# Get the prepared data from hub, using default modelscope namespace
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
config_res = ms_ds_train._hf_ds.config_kwargs
extracted_path = config_res.get('split_config').get('train')
raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path)
MsDataset.upload(
object_name=self.dataset_file_name + '.zip',
local_file_path=raw_zipfile_path,
dataset_name=self.dataset_name,
namespace=self.namespace)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_upload_dir(self):
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
config_train = ms_ds_train._hf_ds.config_kwargs
extracted_path_train = config_train.get('split_config').get('train')
MsDataset.upload(
object_name='train',
local_file_path=os.path.join(extracted_path_train,
'Pets/images/train'),
dataset_name=self.dataset_name,
namespace=self.namespace)
MsDataset.upload(
object_name='val',
local_file_path=os.path.join(extracted_path_train,
'Pets/images/val'),
dataset_name=self.dataset_name,
namespace=self.namespace)
objects = list_dataset_objects(
hub_api=self.api,
max_limit=-1,
is_recursive=True,
dataset_name=self.dataset_name,
namespace=self.namespace,
version=DEFAULT_DATASET_REVISION)
logger.info(f'{len(objects)} objects have been uploaded: {objects}')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_download_dir(self):
test_ds = MsDataset.load(
self.dataset_name,
namespace=self.namespace,
download_mode=DownloadMode.FORCE_REDOWNLOAD)
assert test_ds.config_kwargs['split_config'].values()
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_clone_meta(self):
MsDataset.clone_meta(
dataset_work_dir=self.test_meta_dir,
dataset_id=os.path.join(self.namespace, self.dataset_name))
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_upload_meta(self):
# Clone dataset meta repo first.
MsDataset.clone_meta(
dataset_work_dir=self.test_meta_dir,
dataset_id=os.path.join(self.namespace, self.dataset_name))
with open(os.path.join(self.test_meta_dir, ModelFile.README),
'a') as f:
f.write('\nThis is a line for unit test.')
MsDataset.upload_meta(
dataset_work_dir=self.test_meta_dir,
commit_message='Update for unit test.')
if __name__ == '__main__':
unittest.main()

View File

@@ -1,5 +1,5 @@
# isolate cases in env, we can install different dependencies in each env.
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process.
isolated: # test cases that may require excessive amount of GPU memory or run long time, which will be executed in dedicated process.
- test_text_to_speech.py
- test_multi_modal_embedding.py
- test_ofa_tasks.py