mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
Add create_repo and upload UTs (#1282)
* add delete_repo, delete_dataset, exists_ok in create_repo * add UT for create_repo
This commit is contained in:
@@ -226,7 +226,7 @@ class HubApi:
|
|||||||
headers=self.builder_headers(self.headers))
|
headers=self.builder_headers(self.headers))
|
||||||
handle_http_post_error(r, path, body)
|
handle_http_post_error(r, path, body)
|
||||||
raise_on_error(r.json())
|
raise_on_error(r.json())
|
||||||
model_repo_url = f'{endpoint}/{model_id}'
|
model_repo_url = f'{endpoint}/models/{model_id}'
|
||||||
return model_repo_url
|
return model_repo_url
|
||||||
|
|
||||||
def delete_model(self, model_id: str, endpoint: Optional[str] = None):
|
def delete_model(self, model_id: str, endpoint: Optional[str] = None):
|
||||||
@@ -401,6 +401,33 @@ class HubApi:
|
|||||||
'Failed to check existence of repo: %s, make sure you have access authorization.'
|
'Failed to check existence of repo: %s, make sure you have access authorization.'
|
||||||
% repo_type)
|
% repo_type)
|
||||||
|
|
||||||
|
def delete_repo(self, repo_id: str, repo_type: str, endpoint: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Delete a repository from ModelScope.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_id (`str`):
|
||||||
|
A namespace (user or an organization) and a repo name separated
|
||||||
|
by a `/`.
|
||||||
|
repo_type (`str`):
|
||||||
|
The type of the repository. Supported types are `model` and `dataset`.
|
||||||
|
endpoint(`str`):
|
||||||
|
The endpoint to use. If not provided, the default endpoint is `https://www.modelscope.cn`
|
||||||
|
Could be set to `https://ai.modelscope.ai` for international version.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not endpoint:
|
||||||
|
endpoint = self.endpoint
|
||||||
|
|
||||||
|
if repo_type == REPO_TYPE_DATASET:
|
||||||
|
self.delete_dataset(repo_id, endpoint)
|
||||||
|
elif repo_type == REPO_TYPE_MODEL:
|
||||||
|
self.delete_model(repo_id, endpoint)
|
||||||
|
else:
|
||||||
|
raise Exception(f'Arg repo_type {repo_type} not supported.')
|
||||||
|
|
||||||
|
logger.info(f'Repo {repo_id} deleted successfully.')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _create_default_config(model_dir):
|
def _create_default_config(model_dir):
|
||||||
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
|
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
|
||||||
@@ -924,6 +951,21 @@ class HubApi:
|
|||||||
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
|
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
|
||||||
return [x['Name'] for x in dataset_list]
|
return [x['Name'] for x in dataset_list]
|
||||||
|
|
||||||
|
def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None):
|
||||||
|
|
||||||
|
cookies = ModelScopeConfig.get_cookies()
|
||||||
|
if not endpoint:
|
||||||
|
endpoint = self.endpoint
|
||||||
|
if cookies is None:
|
||||||
|
raise ValueError('Token does not exist, please login first.')
|
||||||
|
|
||||||
|
path = f'{endpoint}/api/v1/datasets/{dataset_id}'
|
||||||
|
r = self.session.delete(path,
|
||||||
|
cookies=cookies,
|
||||||
|
headers=self.builder_headers(self.headers))
|
||||||
|
raise_for_http_status(r)
|
||||||
|
raise_on_error(r.json())
|
||||||
|
|
||||||
def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None):
|
def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None):
|
||||||
""" Get the dataset id and type. """
|
""" Get the dataset id and type. """
|
||||||
if not endpoint:
|
if not endpoint:
|
||||||
@@ -1361,15 +1403,42 @@ class HubApi:
|
|||||||
chinese_name: Optional[str] = '',
|
chinese_name: Optional[str] = '',
|
||||||
license: Optional[str] = Licenses.APACHE_V2,
|
license: Optional[str] = Licenses.APACHE_V2,
|
||||||
endpoint: Optional[str] = None,
|
endpoint: Optional[str] = None,
|
||||||
|
exist_ok: Optional[bool] = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
"""
|
||||||
|
Create a repository on the ModelScope Hub.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_id (str): The repo id in the format of `owner_name/repo_name`.
|
||||||
|
token (Union[str, bool, None]): The access token.
|
||||||
|
visibility (Optional[str]): The visibility of the repo,
|
||||||
|
could be `public`, `private`, `internal`, default to `public`.
|
||||||
|
repo_type (Optional[str]): The repo type, default to `model`.
|
||||||
|
chinese_name (Optional[str]): The Chinese name of the repo.
|
||||||
|
license (Optional[str]): The license of the repo, default to `apache-2.0`.
|
||||||
|
endpoint (Optional[str]): The endpoint to use.
|
||||||
|
In the format of `https://www.modelscope.cn` or 'https://www.modelscope.ai'
|
||||||
|
exist_ok (Optional[bool]): If the repo exists, whether to return the repo url directly.
|
||||||
|
**kwargs: The additional arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The repo url.
|
||||||
|
"""
|
||||||
|
|
||||||
# TODO: exist_ok
|
|
||||||
if not repo_id:
|
if not repo_id:
|
||||||
raise ValueError('Repo id cannot be empty!')
|
raise ValueError('Repo id cannot be empty!')
|
||||||
if not endpoint:
|
if not endpoint:
|
||||||
endpoint = self.endpoint
|
endpoint = self.endpoint
|
||||||
self.login(access_token=token)
|
|
||||||
|
repo_exists: bool = self.repo_exists(repo_id, repo_type=repo_type, endpoint=endpoint)
|
||||||
|
if repo_exists:
|
||||||
|
if exist_ok:
|
||||||
|
return f'{endpoint}/{repo_type}s/{repo_id}'
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Repo {repo_id} already exists!')
|
||||||
|
|
||||||
|
self.login(access_token=token, endpoint=endpoint)
|
||||||
|
|
||||||
repo_id_list = repo_id.split('/')
|
repo_id_list = repo_id.split('/')
|
||||||
if len(repo_id_list) != 2:
|
if len(repo_id_list) != 2:
|
||||||
@@ -1382,7 +1451,6 @@ class HubApi:
|
|||||||
if visibility is None:
|
if visibility is None:
|
||||||
raise ValueError(f'Invalid visibility: {visibility}, '
|
raise ValueError(f'Invalid visibility: {visibility}, '
|
||||||
f'supported visibilities: `public`, `private`, `internal`')
|
f'supported visibilities: `public`, `private`, `internal`')
|
||||||
if not self.repo_exists(repo_id, repo_type=repo_type):
|
|
||||||
repo_url: str = self.create_model(
|
repo_url: str = self.create_model(
|
||||||
model_id=repo_id,
|
model_id=repo_id,
|
||||||
visibility=visibility,
|
visibility=visibility,
|
||||||
@@ -1405,8 +1473,6 @@ class HubApi:
|
|||||||
repo,
|
repo,
|
||||||
'configuration.json', [json.dumps(config)],
|
'configuration.json', [json.dumps(config)],
|
||||||
ignore_push_error=True)
|
ignore_push_error=True)
|
||||||
else:
|
|
||||||
repo_url = f'{endpoint}/{repo_id}'
|
|
||||||
|
|
||||||
elif repo_type == REPO_TYPE_DATASET:
|
elif repo_type == REPO_TYPE_DATASET:
|
||||||
visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')}
|
visibilities = {k: v for k, v in DatasetVisibility.__dict__.items() if not k.startswith('__')}
|
||||||
@@ -1414,7 +1480,6 @@ class HubApi:
|
|||||||
if visibility is None:
|
if visibility is None:
|
||||||
raise ValueError(f'Invalid visibility: {visibility}, '
|
raise ValueError(f'Invalid visibility: {visibility}, '
|
||||||
f'supported visibilities: `public`, `private`, `internal`')
|
f'supported visibilities: `public`, `private`, `internal`')
|
||||||
if not self.repo_exists(repo_id, repo_type=repo_type):
|
|
||||||
repo_url: str = self.create_dataset(
|
repo_url: str = self.create_dataset(
|
||||||
dataset_name=repo_name,
|
dataset_name=repo_name,
|
||||||
namespace=namespace,
|
namespace=namespace,
|
||||||
@@ -1422,12 +1487,12 @@ class HubApi:
|
|||||||
license=license,
|
license=license,
|
||||||
visibility=visibility,
|
visibility=visibility,
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
repo_url = f'{endpoint}/datasets/{namespace}/{repo_name}'
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}')
|
raise ValueError(f'Invalid repo type: {repo_type}, supported repos: {REPO_TYPE_SUPPORT}')
|
||||||
|
|
||||||
|
logger.info(f'Repo created: {repo_url}')
|
||||||
|
|
||||||
return repo_url
|
return repo_url
|
||||||
|
|
||||||
def create_commit(
|
def create_commit(
|
||||||
|
|||||||
@@ -323,7 +323,8 @@ class UploadInfo:
|
|||||||
file_hash_info = file_hash_info or get_file_hash(path)
|
file_hash_info = file_hash_info or get_file_hash(path)
|
||||||
size = file_hash_info['file_size']
|
size = file_hash_info['file_size']
|
||||||
sha = file_hash_info['file_hash']
|
sha = file_hash_info['file_hash']
|
||||||
sample = open(path, 'rb').read(512)
|
with open(path, 'rb') as f:
|
||||||
|
sample = f.read(512)
|
||||||
|
|
||||||
return cls(sha256=sha, size=size, sample=sample)
|
return cls(sha256=sha, size=size, sample=sample)
|
||||||
|
|
||||||
|
|||||||
58
tests/hub/test_create_repo.py
Normal file
58
tests/hub/test_create_repo.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||||
|
import unittest
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from modelscope import HubApi
|
||||||
|
from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL
|
||||||
|
from modelscope.utils.logger import get_logger
|
||||||
|
from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1
|
||||||
|
from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG
|
||||||
|
from modelscope.utils.test_utils import delete_credential, test_level
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class TestCreateRepo(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.api = HubApi()
|
||||||
|
self.api.login(TEST_ACCESS_TOKEN1)
|
||||||
|
|
||||||
|
self.repo_id_model: str = f'{TEST_ORG}/test_create_repo_model_{uuid.uuid4().hex[-6:]}'
|
||||||
|
self.repo_id_dataset: str = f'{TEST_ORG}/test_create_repo_dataset_{uuid.uuid4().hex[-6:]}'
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.api.delete_repo(
|
||||||
|
repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL)
|
||||||
|
self.api.delete_repo(
|
||||||
|
repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET)
|
||||||
|
delete_credential()
|
||||||
|
|
||||||
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||||
|
def test_create_repo(self):
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f'TEST: Creating repo {self.repo_id_model} and {self.repo_id_dataset} ...'
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.api.create_repo(
|
||||||
|
repo_id=self.repo_id_model,
|
||||||
|
repo_type=REPO_TYPE_MODEL,
|
||||||
|
exist_ok=True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'Failed to create repo {self.repo_id_model} !')
|
||||||
|
raise e
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.api.create_repo(
|
||||||
|
repo_id=self.repo_id_dataset,
|
||||||
|
repo_type=REPO_TYPE_DATASET,
|
||||||
|
exist_ok=True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'Failed to create repo {self.repo_id_dataset} !')
|
||||||
|
raise e
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f'TEST: Created repo {self.repo_id_model} and {self.repo_id_dataset} successfully !'
|
||||||
|
)
|
||||||
138
tests/hub/test_upload_file_folder.py
Normal file
138
tests/hub/test_upload_file_folder.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import struct
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from modelscope import HubApi
|
||||||
|
from modelscope.utils.constant import REPO_TYPE_DATASET, REPO_TYPE_MODEL
|
||||||
|
from modelscope.utils.logger import get_logger
|
||||||
|
from modelscope.utils.test_utils import TEST_ACCESS_TOKEN1
|
||||||
|
from modelscope.utils.test_utils import TEST_MODEL_ORG as TEST_ORG
|
||||||
|
from modelscope.utils.test_utils import delete_credential, test_level
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class TestUploadFileFolder(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.api = HubApi()
|
||||||
|
self.api.login(TEST_ACCESS_TOKEN1)
|
||||||
|
|
||||||
|
self.repo_id_model: str = f'{TEST_ORG}/test_upload_file_folder_model_{uuid.uuid4().hex[-6:]}'
|
||||||
|
self.repo_id_dataset: str = f'{TEST_ORG}/test_upload_file_folder_dataset_{uuid.uuid4().hex[-6:]}'
|
||||||
|
|
||||||
|
self.work_dir = tempfile.mkdtemp()
|
||||||
|
self.model_file_path = f'{self.work_dir}/test_model.bin'
|
||||||
|
self.dataset_file_path = f'{self.work_dir}/test_data.jsonl'
|
||||||
|
|
||||||
|
logger.info(f'Work directory: {self.work_dir}')
|
||||||
|
|
||||||
|
self.api.create_repo(
|
||||||
|
repo_id=self.repo_id_model,
|
||||||
|
repo_type=REPO_TYPE_MODEL,
|
||||||
|
exist_ok=True)
|
||||||
|
self.api.create_repo(
|
||||||
|
repo_id=self.repo_id_dataset,
|
||||||
|
repo_type=REPO_TYPE_DATASET,
|
||||||
|
exist_ok=True)
|
||||||
|
|
||||||
|
self._construct_file()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
|
||||||
|
# Remove repositories
|
||||||
|
self.api.delete_repo(
|
||||||
|
repo_id=self.repo_id_model, repo_type=REPO_TYPE_MODEL)
|
||||||
|
self.api.delete_repo(
|
||||||
|
repo_id=self.repo_id_dataset, repo_type=REPO_TYPE_DATASET)
|
||||||
|
|
||||||
|
# Clean up the temporary credentials
|
||||||
|
delete_credential()
|
||||||
|
|
||||||
|
# Clean up the temporary directory
|
||||||
|
shutil.rmtree(self.work_dir)
|
||||||
|
|
||||||
|
def _construct_file(self):
|
||||||
|
|
||||||
|
# Construct data
|
||||||
|
data_list = [
|
||||||
|
{
|
||||||
|
'id': 1,
|
||||||
|
'value': 3.14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'id': 2,
|
||||||
|
'value': 2.71
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'id': 3,
|
||||||
|
'value': 3.69
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'id': 4,
|
||||||
|
'value': 9.31
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'id': 5,
|
||||||
|
'value': 1.21
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
with open(self.model_file_path, 'wb') as f:
|
||||||
|
for entry in data_list:
|
||||||
|
packed_data = struct.pack('if', entry['id'], entry['value'])
|
||||||
|
f.write(packed_data)
|
||||||
|
logger.info(f'Constructed model file: {self.model_file_path}')
|
||||||
|
|
||||||
|
with open(self.dataset_file_path, 'w') as f:
|
||||||
|
for entry in data_list:
|
||||||
|
f.write(json.dumps(entry) + '\n')
|
||||||
|
logger.info(f'Constructed dataset file: {self.dataset_file_path}')
|
||||||
|
|
||||||
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||||
|
def test_upload_file_folder(self):
|
||||||
|
"""
|
||||||
|
Test uploading file/folder to the model/dataset repository.
|
||||||
|
"""
|
||||||
|
|
||||||
|
commit_info_upload_file_model = self.api.upload_file(
|
||||||
|
path_or_fileobj=self.model_file_path,
|
||||||
|
path_in_repo=os.path.basename(self.model_file_path),
|
||||||
|
repo_id=self.repo_id_model,
|
||||||
|
repo_type=REPO_TYPE_MODEL,
|
||||||
|
commit_message='Add model file for CI_TEST',
|
||||||
|
)
|
||||||
|
self.assertTrue(commit_info_upload_file_model is not None)
|
||||||
|
|
||||||
|
commit_info_upload_file_dataset = self.api.upload_file(
|
||||||
|
path_or_fileobj=self.dataset_file_path,
|
||||||
|
path_in_repo=os.path.basename(self.dataset_file_path),
|
||||||
|
repo_id=self.repo_id_dataset,
|
||||||
|
repo_type=REPO_TYPE_DATASET,
|
||||||
|
commit_message='Add dataset file for CI_TEST',
|
||||||
|
)
|
||||||
|
self.assertTrue(commit_info_upload_file_dataset is not None)
|
||||||
|
|
||||||
|
commit_info_upload_folder_model = self.api.upload_folder(
|
||||||
|
repo_id=self.repo_id_model,
|
||||||
|
folder_path=self.work_dir,
|
||||||
|
path_in_repo='test_data',
|
||||||
|
repo_type=REPO_TYPE_MODEL,
|
||||||
|
commit_message='Add model folder for CI_TEST',
|
||||||
|
)
|
||||||
|
self.assertTrue(commit_info_upload_folder_model is not None)
|
||||||
|
|
||||||
|
commit_info_upload_folder_dataset = self.api.upload_folder(
|
||||||
|
repo_id=self.repo_id_dataset,
|
||||||
|
folder_path=self.work_dir,
|
||||||
|
path_in_repo='test_data',
|
||||||
|
repo_type=REPO_TYPE_DATASET,
|
||||||
|
commit_message='Add dataset folder for CI_TEST',
|
||||||
|
)
|
||||||
|
self.assertTrue(commit_info_upload_folder_dataset is not None)
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import tempfile
|
|
||||||
import unittest
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
from modelscope.msdatasets import MsDataset
|
|
||||||
from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
|
|
||||||
from modelscope.utils import logger as logging
|
|
||||||
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode,
|
|
||||||
ModelFile)
|
|
||||||
from modelscope.utils.test_utils import test_level
|
|
||||||
|
|
||||||
logger = logging.get_logger()
|
|
||||||
|
|
||||||
KEY_EXTRACTED = 'extracted'
|
|
||||||
|
|
||||||
|
|
||||||
class DatasetUploadTest(unittest.TestCase):
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self.old_dir = os.getcwd()
|
|
||||||
self.dataset_name = 'small_coco_for_test'
|
|
||||||
self.dataset_file_name = self.dataset_name
|
|
||||||
self.prepared_dataset_name = 'pets_small'
|
|
||||||
self.token = os.getenv('TEST_UPLOAD_MS_TOKEN')
|
|
||||||
error_msg = 'The modelscope token can not be empty, please set env variable: TEST_UPLOAD_MS_TOKEN'
|
|
||||||
self.assertIsNotNone(self.token, msg=error_msg)
|
|
||||||
from modelscope.hub.api import HubApi
|
|
||||||
from modelscope.hub.api import ModelScopeConfig
|
|
||||||
self.api = HubApi()
|
|
||||||
self.api.login(self.token)
|
|
||||||
|
|
||||||
# get user info
|
|
||||||
self.namespace, _ = ModelScopeConfig.get_user_info()
|
|
||||||
|
|
||||||
self.temp_dir = tempfile.mkdtemp()
|
|
||||||
self.test_work_dir = os.path.join(self.temp_dir, self.dataset_name)
|
|
||||||
self.test_meta_dir = os.path.join(self.test_work_dir, 'meta')
|
|
||||||
if not os.path.exists(self.test_work_dir):
|
|
||||||
os.makedirs(self.test_work_dir)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
os.chdir(self.old_dir)
|
|
||||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
||||||
logger.info(
|
|
||||||
f'Temporary directory {self.temp_dir} successfully removed!')
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_raw_downloaded_file_path(extracted_path):
|
|
||||||
raw_downloaded_file_path = ''
|
|
||||||
raw_data_dir = os.path.abspath(
|
|
||||||
os.path.join(extracted_path, '../../..'))
|
|
||||||
for root, dirs, files in os.walk(raw_data_dir):
|
|
||||||
if KEY_EXTRACTED in dirs:
|
|
||||||
for file in files:
|
|
||||||
curr_file_path = os.path.join(root, file)
|
|
||||||
if zipfile.is_zipfile(curr_file_path):
|
|
||||||
raw_downloaded_file_path = curr_file_path
|
|
||||||
return raw_downloaded_file_path
|
|
||||||
|
|
||||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
||||||
def test_ds_upload(self):
|
|
||||||
# Get the prepared data from hub, using default modelscope namespace
|
|
||||||
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
|
|
||||||
config_res = ms_ds_train._hf_ds.config_kwargs
|
|
||||||
extracted_path = config_res.get('split_config').get('train')
|
|
||||||
raw_zipfile_path = self.get_raw_downloaded_file_path(extracted_path)
|
|
||||||
|
|
||||||
MsDataset.upload(
|
|
||||||
object_name=self.dataset_file_name + '.zip',
|
|
||||||
local_file_path=raw_zipfile_path,
|
|
||||||
dataset_name=self.dataset_name,
|
|
||||||
namespace=self.namespace)
|
|
||||||
|
|
||||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
||||||
def test_ds_upload_dir(self):
|
|
||||||
ms_ds_train = MsDataset.load(self.prepared_dataset_name, split='train')
|
|
||||||
config_train = ms_ds_train._hf_ds.config_kwargs
|
|
||||||
extracted_path_train = config_train.get('split_config').get('train')
|
|
||||||
|
|
||||||
MsDataset.upload(
|
|
||||||
object_name='train',
|
|
||||||
local_file_path=os.path.join(extracted_path_train,
|
|
||||||
'Pets/images/train'),
|
|
||||||
dataset_name=self.dataset_name,
|
|
||||||
namespace=self.namespace)
|
|
||||||
MsDataset.upload(
|
|
||||||
object_name='val',
|
|
||||||
local_file_path=os.path.join(extracted_path_train,
|
|
||||||
'Pets/images/val'),
|
|
||||||
dataset_name=self.dataset_name,
|
|
||||||
namespace=self.namespace)
|
|
||||||
|
|
||||||
objects = list_dataset_objects(
|
|
||||||
hub_api=self.api,
|
|
||||||
max_limit=-1,
|
|
||||||
is_recursive=True,
|
|
||||||
dataset_name=self.dataset_name,
|
|
||||||
namespace=self.namespace,
|
|
||||||
version=DEFAULT_DATASET_REVISION)
|
|
||||||
|
|
||||||
logger.info(f'{len(objects)} objects have been uploaded: {objects}')
|
|
||||||
|
|
||||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
||||||
def test_ds_download_dir(self):
|
|
||||||
test_ds = MsDataset.load(
|
|
||||||
self.dataset_name,
|
|
||||||
namespace=self.namespace,
|
|
||||||
download_mode=DownloadMode.FORCE_REDOWNLOAD)
|
|
||||||
assert test_ds.config_kwargs['split_config'].values()
|
|
||||||
|
|
||||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
||||||
def test_ds_clone_meta(self):
|
|
||||||
MsDataset.clone_meta(
|
|
||||||
dataset_work_dir=self.test_meta_dir,
|
|
||||||
dataset_id=os.path.join(self.namespace, self.dataset_name))
|
|
||||||
|
|
||||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
||||||
def test_ds_upload_meta(self):
|
|
||||||
# Clone dataset meta repo first.
|
|
||||||
MsDataset.clone_meta(
|
|
||||||
dataset_work_dir=self.test_meta_dir,
|
|
||||||
dataset_id=os.path.join(self.namespace, self.dataset_name))
|
|
||||||
|
|
||||||
with open(os.path.join(self.test_meta_dir, ModelFile.README),
|
|
||||||
'a') as f:
|
|
||||||
f.write('\nThis is a line for unit test.')
|
|
||||||
|
|
||||||
MsDataset.upload_meta(
|
|
||||||
dataset_work_dir=self.test_meta_dir,
|
|
||||||
commit_message='Update for unit test.')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
# isolate cases in env, we can install different dependencies in each env.
|
# isolate cases in env, we can install different dependencies in each env.
|
||||||
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process.
|
isolated: # test cases that may require excessive amount of GPU memory or run long time, which will be executed in dedicated process.
|
||||||
- test_text_to_speech.py
|
- test_text_to_speech.py
|
||||||
- test_multi_modal_embedding.py
|
- test_multi_modal_embedding.py
|
||||||
- test_ofa_tasks.py
|
- test_ofa_tasks.py
|
||||||
|
|||||||
Reference in New Issue
Block a user