mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 04:01:10 +01:00
merge master
This commit is contained in:
2
.github/workflows/docker-image.yml
vendored
2
.github/workflows/docker-image.yml
vendored
@@ -11,7 +11,7 @@ on:
|
||||
description: 'ModelScope branch to build from(release/x.xx)'
|
||||
required: true
|
||||
image_type:
|
||||
description: 'The image type to build(cpu/gpu/llm/paddle_cpu)'
|
||||
description: 'The image type to build(cpu/gpu/llm/swift/paddle_cpu)'
|
||||
required: true
|
||||
modelscope_version:
|
||||
description: 'ModelScope version to use(x.xx.x)'
|
||||
|
||||
@@ -24,7 +24,6 @@ if [ "$INSTALL_MS_DEPS" = "True" ]; then \
|
||||
pip --no-cache-dir install omegaconf==2.0.6 && \
|
||||
pip install 'editdistance==0.8.1' && \
|
||||
pip install --no-cache-dir 'cython<=0.29.36' versioneer 'numpy<2.0' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
|
||||
# pip install --no-cache-dir kwsbp==0.0.6 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
|
||||
pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
|
||||
pip install --no-cache-dir -r /var/modelscope/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
|
||||
pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
|
||||
@@ -59,7 +58,7 @@ RUN sh /tmp/install.sh {version_args} && \
|
||||
pip install --no-cache-dir xformers==0.0.27 && \
|
||||
curl -fsSL https://ollama.com/install.sh | sh && \
|
||||
pip install --no-cache-dir -U funasr scikit-learn && \
|
||||
pip install --no-cache-dir -U qwen_vl_utils pyav librosa timm transformers accelerate peft trl safetensors && \
|
||||
pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \
|
||||
cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {modelscope_branch} --single-branch https://github.com/modelscope/modelscope.git && \
|
||||
cd modelscope && pip install . -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
|
||||
cd / && rm -fr /tmp/modelscope && pip cache purge; \
|
||||
|
||||
@@ -211,14 +211,6 @@ ARG TENSORFLOW_VERSION={tf_version}
|
||||
cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && MMCV_WITH_OPS=1 MAX_JOBS=32 pip install . && cd / && rm -fr /tmp/mmcv && pip cache purge; \
|
||||
fi
|
||||
|
||||
# This limits the cuda121 version
|
||||
# RUN if [ "$USE_GPU" = "True" ] ; then \
|
||||
# pip install --no-cache-dir --force tinycudann==1.7 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
|
||||
# else \
|
||||
# echo 'cpu not install tinycudann'; \
|
||||
# fi
|
||||
|
||||
# RUN pip install --no-cache-dir fairseq # for py310
|
||||
RUN pip install --no-cache-dir https://github.com/liyaodev/fairseq/releases/download/v0.12.3.1/fairseq-0.12.3.1-cp311-cp311-linux_x86_64.whl # for py311
|
||||
|
||||
ENTRYPOINT []
|
||||
|
||||
@@ -335,6 +335,76 @@ class LLMImageBuilder(Builder):
|
||||
return os.system(f'docker push {image_tag2}')
|
||||
|
||||
|
||||
class SwiftImageBuilder(LLMImageBuilder):
|
||||
|
||||
def init_args(self, args) -> Any:
|
||||
if not args.torch_version:
|
||||
args.torch_version = '2.5.1'
|
||||
args.torchaudio_version = '2.5.1'
|
||||
args.torchvision_version = '0.20.1'
|
||||
if not args.cuda_version:
|
||||
args.cuda_version = '12.4.0'
|
||||
if not args.vllm_version:
|
||||
args.vllm_version = '0.7.3'
|
||||
return super().init_args(args)
|
||||
|
||||
def generate_dockerfile(self) -> str:
|
||||
meta_file = './docker/install.sh'
|
||||
with open('docker/Dockerfile.extra_install', 'r') as f:
|
||||
extra_content = f.read()
|
||||
extra_content = extra_content.replace('{python_version}',
|
||||
self.args.python_version)
|
||||
extra_content += """
|
||||
RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \
|
||||
pip install --no-cache-dir -U icecream soundfile pybind11 && \
|
||||
SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \
|
||||
CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
|
||||
pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
|
||||
"""
|
||||
version_args = (
|
||||
f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
|
||||
f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} '
|
||||
f'{self.args.flashattn_version}')
|
||||
with open('docker/Dockerfile.ubuntu', 'r') as f:
|
||||
content = f.read()
|
||||
content = content.replace('{base_image}', self.args.base_image)
|
||||
content = content.replace('{extra_content}', extra_content)
|
||||
content = content.replace('{meta_file}', meta_file)
|
||||
content = content.replace('{version_args}', version_args)
|
||||
content = content.replace('{cur_time}', formatted_time)
|
||||
content = content.replace('{install_ms_deps}', 'False')
|
||||
content = content.replace('{torch_version}',
|
||||
self.args.torch_version)
|
||||
content = content.replace('{torchvision_version}',
|
||||
self.args.torchvision_version)
|
||||
content = content.replace('{torchaudio_version}',
|
||||
self.args.torchaudio_version)
|
||||
content = content.replace('{index_url}', '')
|
||||
content = content.replace('{modelscope_branch}',
|
||||
self.args.modelscope_branch)
|
||||
content = content.replace('{swift_branch}', self.args.swift_branch)
|
||||
return content
|
||||
|
||||
def image(self) -> str:
|
||||
return (
|
||||
f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-'
|
||||
f'{self.args.python_tag}-torch{self.args.torch_version}-{self.args.modelscope_version}-swift-test'
|
||||
)
|
||||
|
||||
def push(self):
|
||||
ret = os.system(f'docker push {self.image()}')
|
||||
if ret != 0:
|
||||
return ret
|
||||
image_tag2 = (
|
||||
f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-'
|
||||
f'{self.args.python_tag}-torch{self.args.torch_version}-'
|
||||
f'{self.args.modelscope_version}-swift-{formatted_time}-test')
|
||||
ret = os.system(f'docker tag {self.image()} {image_tag2}')
|
||||
if ret != 0:
|
||||
return ret
|
||||
return os.system(f'docker push {image_tag2}')
|
||||
|
||||
|
||||
class PaddleCPUImageBuilder(Builder):
|
||||
|
||||
def __init__(self, args: Any, dry_run: bool):
|
||||
@@ -412,6 +482,8 @@ elif args.image_type.lower() == 'gpu':
|
||||
builder_cls = GPUImageBuilder
|
||||
elif args.image_type.lower() == 'llm':
|
||||
builder_cls = LLMImageBuilder
|
||||
elif args.image_type.lower() == 'swift':
|
||||
builder_cls = SwiftImageBuilder
|
||||
elif args.image_type.lower() == 'paddle_cpu':
|
||||
builder_cls = PaddleCPUImageBuilder
|
||||
else:
|
||||
|
||||
@@ -226,7 +226,7 @@ class HubApi:
|
||||
headers=self.builder_headers(self.headers))
|
||||
handle_http_post_error(r, path, body)
|
||||
raise_on_error(r.json())
|
||||
model_repo_url = f'{endpoint}/{model_id}'
|
||||
model_repo_url = f'{endpoint}/models/{model_id}'
|
||||
return model_repo_url
|
||||
|
||||
def delete_model(self, model_id: str, endpoint: Optional[str] = None):
|
||||
@@ -401,6 +401,33 @@ class HubApi:
|
||||
'Failed to check existence of repo: %s, make sure you have access authorization.'
|
||||
% repo_type)
|
||||
|
||||
def delete_repo(self, repo_id: str, repo_type: str, endpoint: Optional[str] = None):
|
||||
"""
|
||||
Delete a repository from ModelScope.
|
||||
|
||||
Args:
|
||||
repo_id (`str`):
|
||||
A namespace (user or an organization) and a repo name separated
|
||||
by a `/`.
|
||||
repo_type (`str`):
|
||||
The type of the repository. Supported types are `model` and `dataset`.
|
||||
endpoint(`str`):
|
||||
The endpoint to use. If not provided, the default endpoint is `https://www.modelscope.cn`
|
||||
Could be set to `https://ai.modelscope.ai` for international version.
|
||||
"""
|
||||
|
||||
if not endpoint:
|
||||
endpoint = self.endpoint
|
||||
|
||||
if repo_type == REPO_TYPE_DATASET:
|
||||
self.delete_dataset(repo_id, endpoint)
|
||||
elif repo_type == REPO_TYPE_MODEL:
|
||||
self.delete_model(repo_id, endpoint)
|
||||
else:
|
||||
raise Exception(f'Arg repo_type {repo_type} not supported.')
|
||||
|
||||
logger.info(f'Repo {repo_id} deleted successfully.')
|
||||
|
||||
@staticmethod
|
||||
def _create_default_config(model_dir):
|
||||
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
|
||||
@@ -924,6 +951,21 @@ class HubApi:
|
||||
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
|
||||
return [x['Name'] for x in dataset_list]
|
||||
|
||||
def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None):
|
||||
|
||||
cookies = ModelScopeConfig.get_cookies()
|
||||
if not endpoint:
|
||||
endpoint = self.endpoint
|
||||
if cookies is None:
|
||||
raise ValueError('Token does not exist, please login first.')
|
||||
|
||||
path = f'{endpoint}/api/v1/datasets/{dataset_id}'
|
||||
r = self.session.delete(path,
|
||||
cookies=cookies,
|
||||
headers=self.builder_headers(self.headers))
|
||||
raise_for_http_status(r)
|
||||
raise_on_error(r.json())
|
||||
|
||||
def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None):
|
||||
""" Get the dataset id and type. """
|
||||
if not endpoint:
|
||||
@@ -1361,15 +1403,42 @@ class HubApi:
|
||||
chinese_name: Optional[str] = '',
|
||||
license: Optional[str] = Licenses.APACHE_V2,
|
||||
endpoint: Optional[str] = None,
|
||||
exist_ok: Optional[bool] = False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Create a repository on the ModelScope Hub.
|
||||
|
||||
Args:
|
||||
repo_id (str): The repo id in the format of `owner_name/repo_name`.
|
||||
token (Union[str, bool, None]): The access token.
|
||||
visibility (Optional[str]): The visibility of the repo,
|
||||
could be `public`, `private`, `internal`, default to `public`.
|
||||
repo_type (Optional[str]): The repo type, default to `model`.
|
||||
chinese_name (Optional[str]): The Chinese name of the repo.
|
||||
license (Optional[str]): The license of the repo, default to `apache-2.0`.
|
||||
endpoint (Optional[str]): The endpoint to use.
|
||||
In the format of `https://www.modelscope.cn` or 'https://www.modelscope.ai'
|
||||
exist_ok (Optional[bool]): If the repo exists, whether to return the repo url directly.
|
||||
**kwargs: The additional arguments.
|
||||
|
||||
Returns:
|
||||
str: The repo url.
|
||||
"""
|
||||
|
||||
# TODO: exist_ok
|
||||
if not repo_id:
|
||||
raise ValueError('Repo id cannot be empty!')
|
||||
if not endpoint:
|
||||
endpoint = self.endpoint
|
||||
self.login(access_token=token)
|
||||
|
||||
repo_exists: bool = self.repo_exists(repo_id, repo_type=repo_type, endpoint=endpoint)
|
||||
if repo_exists:
|
||||
if exist_ok:
|
||||
return f'{endpoint}/{repo_type}s/{repo_id}'
|
||||
else:
|
||||
raise ValueError(f'Repo {repo_id} already exists!')
|
||||
|
||||
self.login(access_token=token, endpoint=endpoint)
|
||||
|
||||
repo_id_list = repo_id.split('/')
|
||||
if len(repo_id_list) != 2:
|
||||
@@ -1382,31 +1451,28 @@ class HubApi:
|
||||
if visibility is None:
|
||||
raise ValueError(f'Invalid visibility: {visibility}, '
|
||||
f'supported visibilities: `public`, `private`, `internal`')
|
||||
if not self.repo_exists(repo_id, repo_type=repo_type):
|
||||
repo_url: str = self.create_model(
|
||||
model_id=repo_id,
|
||||
visibility=visibility,
|
||||
license=license,
|
||||
chinese_name=chinese_name,
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as temp_cache_dir:
|
||||
from modelscope.hub.repository import Repository
|
||||
repo = Repository(temp_cache_dir, repo_id)
|
||||
default_config = {
|
||||
'framework': 'pytorch',
|
||||
'task': 'text-generation',
|
||||
'allow_remote': True
|
||||
}
|
||||
config_json = kwargs.get('config_json')
|
||||
if not config_json:
|
||||
config_json = {}
|
||||
config = {**default_config, **config_json}
|
||||
add_content_to_file(
|
||||
repo,
|
||||
'configuration.json', [json.dumps(config)],
|
||||
ignore_push_error=True)
|
||||
else:
|
||||
repo_url = f'{endpoint}/{repo_id}'
|
||||
repo_url: str = self.create_model(
|
||||
model_id=repo_id,
|
||||
visibility=visibility,
|
||||
license=license,
|
||||
chinese_name=chinese_name,
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as temp_cache_dir:
|
||||
from modelscope.hub.repository import Repository
|
||||
repo = Repository(temp_cache_dir, repo_id)
|
||||
default_config = {
|
||||
'framework': 'pytorch',
|
||||
'task': 'text-generation',
|
||||
'allow_remote': True
|
||||
}
|
||||
config_json = kwargs.get('config_json')
|
||||
if not config_json:
|
||||
config_json = {}
|
||||
config = {**default_config, **config_json}
|
||||
add_content_to_file(
|
||||
repo,
|
||||
'configuration.json', [json.dumps(config)],
|
||||
ignore_push_error=True)
|
||||
|
||||
elif repo_type == REPO_TYPE_DATASET:
|
||||
visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')}
|
||||
@@ -1414,20 +1480,19 @@ class HubApi:
|
||||
if visibility is None:
|
||||
raise ValueError(f'Invalid visibility: {visibility}, '
|
||||
f'supported visibilities: `public`, `private`, `internal`')
|
||||
if not self.repo_exists(repo_id, repo_type=repo_type):
|
||||
repo_url: str = self.create_dataset(
|
||||
dataset_name=repo_name,
|
||||
namespace=namespace,
|
||||
chinese_name=chinese_name,
|
||||
license=license,
|
||||
visibility=visibility,
|
||||
)
|
||||
else:
|
||||
repo_url = f'{endpoint}/datasets/{namespace}/{repo_name}'
|
||||
repo_url: str = self.create_dataset(
|
||||
dataset_name=repo_name,
|
||||
namespace=namespace,
|
||||
chinese_name=chinese_name,
|
||||
license=license,
|
||||
visibility=visibility,
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}')
|
||||
|
||||
logger.info(f'Repo created: {repo_url}')
|
||||
|
||||
return repo_url
|
||||
|
||||
def create_commit(
|
||||
|
||||
@@ -93,7 +93,7 @@ class RDINO_Pipeline(Pipeline):
|
||||
if fs != self.model_config['sample_rate']:
|
||||
raise ValueError(
|
||||
'modelscope error: Only support %d sample rate files'
|
||||
% self.model_cfg['sample_rate'])
|
||||
% self.model_config['sample_rate'])
|
||||
output['data%d' %
|
||||
(i + 1)] = torch.from_numpy(data).unsqueeze(0)
|
||||
else:
|
||||
|
||||
@@ -206,6 +206,7 @@ class OCRDetectionPipeline(Pipeline):
|
||||
img_pad_resize = img_pad_resize - np.array(
|
||||
[123.68, 116.78, 103.94], dtype=np.float32)
|
||||
|
||||
import tensorflow as tf
|
||||
with self._graph.as_default():
|
||||
resize_size = tf.stack([resize_size, resize_size])
|
||||
orig_size = tf.stack([max(h, w), max(h, w)])
|
||||
|
||||
@@ -440,7 +440,7 @@ class QWenTextGenerationPipeline(Pipeline):
|
||||
class SeqGPTPipeline(Pipeline):
|
||||
|
||||
def __init__(self, model: Union[Model, str], **kwargs):
|
||||
from modelscope.utils.hf_util import AutoTokenizer
|
||||
from modelscope import AutoTokenizer
|
||||
|
||||
if isinstance(model, str):
|
||||
model_dir = snapshot_download(
|
||||
|
||||
@@ -323,7 +323,8 @@ class UploadInfo:
|
||||
file_hash_info = file_hash_info or get_file_hash(path)
|
||||
size = file_hash_info['file_size']
|
||||
sha = file_hash_info['file_hash']
|
||||
sample = open(path, 'rb').read(512)
|
||||
with open(path, 'rb') as f:
|
||||
sample = f.read(512)
|
||||
|
||||
return cls(sha256=sha, size=size, sample=sample)
|
||||
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
#funcodec>=0.2.0
|
||||
ms-funcodec>=0.2.0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
kaldiio
|
||||
kwsbp>=0.0.6
|
||||
kwsbp==0.0.6
|
||||
matplotlib
|
||||
py_sound_connect>=0.1
|
||||
scipy
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
hdbscan
|
||||
hyperpyyaml
|
||||
librosa==0.10.1
|
||||
MinDAEC
|
||||
MinDAEC==0.0.2
|
||||
mir_eval>=0.7
|
||||
rotary_embedding_torch>=0.1.5
|
||||
scipy
|
||||
|
||||
@@ -5,7 +5,7 @@ bmt_clipit>=1.0
|
||||
chumpy
|
||||
clip>=1.0
|
||||
control_ldm
|
||||
ddpm_guided_diffusion
|
||||
ddpm_guided_diffusion==0.0.0
|
||||
diffusers
|
||||
easydict
|
||||
edit_distance
|
||||
@@ -55,7 +55,7 @@ regex
|
||||
scikit-image
|
||||
scikit-learn
|
||||
shapely
|
||||
shotdetect_scenedetect_lgss>=0.0.4
|
||||
shotdetect_scenedetect_lgss==0.0.4
|
||||
smplx
|
||||
tensorflow-estimator>=1.15.1
|
||||
tf_slim
|
||||
|
||||
58
tests/hub/test_create_repo.py
Normal file
58
tests/hub/test_create_repo.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import unittest
|
||||
import uuid
|
||||
|
||||
from modelscope import HubApi
|
||||
from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1
|
||||
from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG
|
||||
from modelscope.utils.test_utils import delete_credential, test_level
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class TestCreateRepo(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.api = HubApi()
|
||||
self.api.login(TEST_ACCESS_TOKEN1)
|
||||
|
||||
self.repo_id_model: str = f'{TEST_ORG}/test_create_repo_model_{uuid.uuid4().hex[-6:]}'
|
||||
self.repo_id_dataset: str = f'{TEST_ORG}/test_create_repo_dataset_{uuid.uuid4().hex[-6:]}'
|
||||
|
||||
def tearDown(self):
|
||||
self.api.delete_repo(
|
||||
repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL)
|
||||
self.api.delete_repo(
|
||||
repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET)
|
||||
delete_credential()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_create_repo(self):
|
||||
|
||||
logger.info(
|
||||
f'TEST: Creating repo {self.repo_id_model} and {self.repo_id_dataset} ...'
|
||||
)
|
||||
|
||||
try:
|
||||
self.api.create_repo(
|
||||
repo_id=self.repo_id_model,
|
||||
repo_type=REPO_TYPE_MODEL,
|
||||
exist_ok=True)
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to create repo {self.repo_id_model} !')
|
||||
raise e
|
||||
|
||||
try:
|
||||
self.api.create_repo(
|
||||
repo_id=self.repo_id_dataset,
|
||||
repo_type=REPO_TYPE_DATASET,
|
||||
exist_ok=True)
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to create repo {self.repo_id_dataset} !')
|
||||
raise e
|
||||
|
||||
logger.info(
|
||||
f'TEST: Created repo {self.repo_id_model} and {self.repo_id_dataset} successfully !'
|
||||
)
|
||||
138
tests/hub/test_upload_file_folder.py
Normal file
138
tests/hub/test_upload_file_folder.py
Normal file
@@ -0,0 +1,138 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
import struct
|
||||
import tempfile
|
||||
import unittest
|
||||
import uuid
|
||||
|
||||
import json
|
||||
|
||||
from modelscope import HubApi
|
||||
from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1
|
||||
from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG
|
||||
from modelscope.utils.test_utils import delete_credential, test_level
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class TestUploadFileFolder(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.api = HubApi()
|
||||
self.api.login(TEST_ACCESS_TOKEN1)
|
||||
|
||||
self.repo_id_model: str = f'{TEST_ORG}/test_upload_file_folder_model_{uuid.uuid4().hex[-6:]}'
|
||||
self.repo_id_dataset: str = f'{TEST_ORG}/test_upload_file_folder_dataset_{uuid.uuid4().hex[-6:]}'
|
||||
|
||||
self.work_dir = tempfile.mkdtemp()
|
||||
self.model_file_path = f'{self.work_dir}/test_model.bin'
|
||||
self.dataset_file_path = f'{self.work_dir}/test_data.jsonl'
|
||||
|
||||
logger.info(f'Work directory: {self.work_dir}')
|
||||
|
||||
self.api.create_repo(
|
||||
repo_id=self.repo_id_model,
|
||||
repo_type=REPO_TYPE_MODEL,
|
||||
exist_ok=True)
|
||||
self.api.create_repo(
|
||||
repo_id=self.repo_id_dataset,
|
||||
repo_type=REPO_TYPE_DATASET,
|
||||
exist_ok=True)
|
||||
|
||||
self._construct_file()
|
||||
|
||||
def tearDown(self):
|
||||
|
||||
# Remove repositories
|
||||
self.api.delete_repo(
|
||||
repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL)
|
||||
self.api.delete_repo(
|
||||
repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET)
|
||||
|
||||
# Clean up the temporary credentials
|
||||
delete_credential()
|
||||
|
||||
# Clean up the temporary directory
|
||||
shutil.rmtree(self.work_dir)
|
||||
|
||||
def _construct_file(self):
|
||||
|
||||
# Construct data
|
||||
data_list = [
|
||||
{
|
||||
'id': 1,
|
||||
'value': 3.14
|
||||
},
|
||||
{
|
||||
'id': 2,
|
||||
'value': 2.71
|
||||
},
|
||||
{
|
||||
'id': 3,
|
||||
'value': 3.69
|
||||
},
|
||||
{
|
||||
'id': 4,
|
||||
'value': 9.31
|
||||
},
|
||||
{
|
||||
'id': 5,
|
||||
'value': 1.21
|
||||
},
|
||||
]
|
||||
|
||||
with open(self.model_file_path, 'wb') as f:
|
||||
for entry in data_list:
|
||||
packed_data = struct.pack('if', entry['id'], entry['value'])
|
||||
f.write(packed_data)
|
||||
logger.info(f'Constructed model file: {self.model_file_path}')
|
||||
|
||||
with open(self.dataset_file_path, 'w') as f:
|
||||
for entry in data_list:
|
||||
f.write(json.dumps(entry) + '\n')
|
||||
logger.info(f'Constructed dataset file: {self.dataset_file_path}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_upload_file_folder(self):
|
||||
"""
|
||||
Test uploading file/folder to the model/dataset repository.
|
||||
"""
|
||||
|
||||
commit_info_upload_file_model = self.api.upload_file(
|
||||
path_or_fileobj=self.model_file_path,
|
||||
path_in_repo=os.path.basename(self.model_file_path),
|
||||
repo_id=self.repo_id_model,
|
||||
repo_type=REPO_TYPE_MODEL,
|
||||
commit_message='Add model file for CI_TEST',
|
||||
)
|
||||
self.assertTrue(commit_info_upload_file_model is not None)
|
||||
|
||||
commit_info_upload_file_dataset = self.api.upload_file(
|
||||
path_or_fileobj=self.dataset_file_path,
|
||||
path_in_repo=os.path.basename(self.dataset_file_path),
|
||||
repo_id=self.repo_id_dataset,
|
||||
repo_type=REPO_TYPE_DATASET,
|
||||
commit_message='Add dataset file for CI_TEST',
|
||||
)
|
||||
self.assertTrue(commit_info_upload_file_dataset is not None)
|
||||
|
||||
commit_info_upload_folder_model = self.api.upload_folder(
|
||||
repo_id=self.repo_id_model,
|
||||
folder_path=self.work_dir,
|
||||
path_in_repo='test_data',
|
||||
repo_type=REPO_TYPE_MODEL,
|
||||
commit_message='Add model folder for CI_TEST',
|
||||
)
|
||||
self.assertTrue(commit_info_upload_folder_model is not None)
|
||||
|
||||
commit_info_upload_folder_dataset = self.api.upload_folder(
|
||||
repo_id=self.repo_id_dataset,
|
||||
folder_path=self.work_dir,
|
||||
path_in_repo='test_data',
|
||||
repo_type=REPO_TYPE_DATASET,
|
||||
commit_message='Add dataset folder for CI_TEST',
|
||||
)
|
||||
self.assertTrue(commit_info_upload_folder_dataset is not None)
|
||||
@@ -1,137 +0,0 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
import zipfile
|
||||
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
|
||||
from modelscope.utils import logger as logging
|
||||
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode,
|
||||
ModelFile)
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
logger = logging.get_logger()
|
||||
|
||||
KEY_EXTRACTED = 'extracted'
|
||||
|
||||
|
||||
class DatasetUploadTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.old_dir = os.getcwd()
|
||||
self.dataset_name = 'small_coco_for_test'
|
||||
self.dataset_file_name = self.dataset_name
|
||||
self.prepared_dataset_name = 'pets_small'
|
||||
self.token = os.getenv('TEST_UPLOAD_MS_TOKEN')
|
||||
error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN'
|
||||
self.assertIsNotNone(self.token, msg=error_msg)
|
||||
from modelscope.hub.api import HubApi
|
||||
from modelscope.hub.api import ModelScopeConfig
|
||||
self.api = HubApi()
|
||||
self.api.login(self.token)
|
||||
|
||||
# get user info
|
||||
self.namespace, _ = ModelScopeConfig.get_user_info()
|
||||
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name)
|
||||
self.test_meta_dir = os.path.join(self.test_work_dir, 'meta')
|
||||
if not os.path.exists(self.test_work_dir):
|
||||
os.makedirs(self.test_work_dir)
|
||||
|
||||
def tearDown(self):
|
||||
os.chdir(self.old_dir)
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
logger.info(
|
||||
f'Temporary directory {self.temp_dir} successfully removed!')
|
||||
|
||||
@staticmethod
|
||||
def get_raw_downloaded_file_path(extracted_path):
|
||||
raw_downloaded_file_path = ''
|
||||
raw_data_dir = os.path.abspath(
|
||||
os.path.join(extracted_path, '../../..'))
|
||||
for root, dirs, files in os.walk(raw_data_dir):
|
||||
if KEY_EXTRACTED in dirs:
|
||||
for file in files:
|
||||
curr_file_path = os.path.join(root, file)
|
||||
if zipfile.is_zipfile(curr_file_path):
|
||||
raw_downloaded_file_path = curr_file_path
|
||||
return raw_downloaded_file_path
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_ds_upload(self):
|
||||
# Get the prepared data from hub, using default modelscope namespace
|
||||
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
|
||||
config_res = ms_ds_train._hf_ds.config_kwargs
|
||||
extracted_path = config_res.get('split_config').get('train')
|
||||
raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path)
|
||||
|
||||
MsDataset.upload(
|
||||
object_name=self.dataset_file_name + '.zip',
|
||||
local_file_path=raw_zipfile_path,
|
||||
dataset_name=self.dataset_name,
|
||||
namespace=self.namespace)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_ds_upload_dir(self):
|
||||
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
|
||||
config_train = ms_ds_train._hf_ds.config_kwargs
|
||||
extracted_path_train = config_train.get('split_config').get('train')
|
||||
|
||||
MsDataset.upload(
|
||||
object_name='train',
|
||||
local_file_path=os.path.join(extracted_path_train,
|
||||
'Pets/images/train'),
|
||||
dataset_name=self.dataset_name,
|
||||
namespace=self.namespace)
|
||||
MsDataset.upload(
|
||||
object_name='val',
|
||||
local_file_path=os.path.join(extracted_path_train,
|
||||
'Pets/images/val'),
|
||||
dataset_name=self.dataset_name,
|
||||
namespace=self.namespace)
|
||||
|
||||
objects = list_dataset_objects(
|
||||
hub_api=self.api,
|
||||
max_limit=-1,
|
||||
is_recursive=True,
|
||||
dataset_name=self.dataset_name,
|
||||
namespace=self.namespace,
|
||||
version=DEFAULT_DATASET_REVISION)
|
||||
|
||||
logger.info(f'{len(objects)} objects have been uploaded: {objects}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_ds_download_dir(self):
|
||||
test_ds = MsDataset.load(
|
||||
self.dataset_name,
|
||||
namespace=self.namespace,
|
||||
download_mode=DownloadMode.FORCE_REDOWNLOAD)
|
||||
assert test_ds.config_kwargs['split_config'].values()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_ds_clone_meta(self):
|
||||
MsDataset.clone_meta(
|
||||
dataset_work_dir=self.test_meta_dir,
|
||||
dataset_id=os.path.join(self.namespace, self.dataset_name))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_ds_upload_meta(self):
|
||||
# Clone dataset meta repo first.
|
||||
MsDataset.clone_meta(
|
||||
dataset_work_dir=self.test_meta_dir,
|
||||
dataset_id=os.path.join(self.namespace, self.dataset_name))
|
||||
|
||||
with open(os.path.join(self.test_meta_dir, ModelFile.README),
|
||||
'a') as f:
|
||||
f.write('\nThis is a line for unit test.')
|
||||
|
||||
MsDataset.upload_meta(
|
||||
dataset_work_dir=self.test_meta_dir,
|
||||
commit_message='Update for unit test.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,5 +1,5 @@
|
||||
# isolate cases in env, we can install different dependencies in each env.
|
||||
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process.
|
||||
isolated: # test cases that may require excessive amount of GPU memory or run long time, which will be executed in dedicated process.
|
||||
- test_text_to_speech.py
|
||||
- test_multi_modal_embedding.py
|
||||
- test_ofa_tasks.py
|
||||
|
||||
Reference in New Issue
Block a user