mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 12:10:09 +01:00
[to #42322933]MsDataset upload bugfix for 0830 version.
CR link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10109035#tab=changes&file=8348e8153b2f4a6dbd52e471b4980542355408ed Please refer to aone links: 1. https://aone.alibaba-inc.com/v2/project/1162242/bug#viewIdentifier=b622c099e2199bc034401fbe&openWorkitemIdentifier=44889184 2. https://aone.alibaba-inc.com/v2/project/1162242/bug#viewIdentifier=b622c099e2199bc034401fbe&openWorkitemIdentifier=44858810 3. https://aone.alibaba-inc.com/v2/project/1162242/bug#viewIdentifier=b622c099e2199bc034401fbe&openWorkitemIdentifier=44857728 4. https://aone.alibaba-inc.com/v2/project/1162242/bug#viewIdentifier=b622c099e2199bc034401fbe&openWorkitemIdentifier=44658972
This commit is contained in:
@@ -389,7 +389,7 @@ class HubApi:
|
||||
cookies = requests.utils.dict_from_cookiejar(cookies)
|
||||
r = requests.get(url=datahub_url, cookies=cookies)
|
||||
resp = r.json()
|
||||
datahub_raise_on_error(datahub_url, resp)
|
||||
raise_on_error(resp)
|
||||
return resp['Data']
|
||||
|
||||
def on_dataset_download(self, dataset_name: str, namespace: str) -> None:
|
||||
|
||||
@@ -60,7 +60,7 @@ def raise_on_error(rsp):
|
||||
Args:
|
||||
rsp (_type_): The server response
|
||||
"""
|
||||
if rsp['Code'] == HTTPStatus.OK and rsp['Success']:
|
||||
if rsp['Code'] == HTTPStatus.OK:
|
||||
return True
|
||||
else:
|
||||
raise RequestError(rsp['Message'])
|
||||
|
||||
@@ -51,12 +51,16 @@ class GitCommandWrapper(metaclass=Singleton):
|
||||
response.check_returncode()
|
||||
return response
|
||||
except subprocess.CalledProcessError as error:
|
||||
logger.error(
|
||||
'There are error run git command, you may need to login first.'
|
||||
)
|
||||
raise GitError(
|
||||
'stdout: %s, stderr: %s' %
|
||||
(response.stdout.decode('utf8'), error.stderr.decode('utf8')))
|
||||
if response.returncode == 1:
|
||||
logger.info('Nothing to commit.')
|
||||
return response
|
||||
else:
|
||||
logger.error(
|
||||
'There are error run git command, you may need to login first.'
|
||||
)
|
||||
raise GitError('stdout: %s, stderr: %s' %
|
||||
(response.stdout.decode('utf8'),
|
||||
error.stderr.decode('utf8')))
|
||||
|
||||
def config_auth_token(self, repo_dir, auth_token):
|
||||
url = self.get_repo_remote_url(repo_dir)
|
||||
|
||||
@@ -40,6 +40,11 @@ class Repository:
|
||||
self.model_dir = model_dir
|
||||
self.model_base_dir = os.path.dirname(model_dir)
|
||||
self.model_repo_name = os.path.basename(model_dir)
|
||||
|
||||
if not revision:
|
||||
err_msg = 'a non-default value of revision cannot be empty.'
|
||||
raise InvalidParameter(err_msg)
|
||||
|
||||
if auth_token:
|
||||
self.auth_token = auth_token
|
||||
else:
|
||||
@@ -145,10 +150,21 @@ class DatasetRepository:
|
||||
The git command line path, if None, we use 'git'
|
||||
"""
|
||||
self.dataset_id = dataset_id
|
||||
self.repo_work_dir = repo_work_dir
|
||||
self.repo_base_dir = os.path.dirname(repo_work_dir)
|
||||
self.repo_name = os.path.basename(repo_work_dir)
|
||||
if not repo_work_dir or not isinstance(repo_work_dir, str):
|
||||
err_msg = 'dataset_work_dir must be provided!'
|
||||
raise InvalidParameter(err_msg)
|
||||
self.repo_work_dir = repo_work_dir.rstrip('/')
|
||||
if not self.repo_work_dir:
|
||||
err_msg = 'dataset_work_dir can not be root dir!'
|
||||
raise InvalidParameter(err_msg)
|
||||
self.repo_base_dir = os.path.dirname(self.repo_work_dir)
|
||||
self.repo_name = os.path.basename(self.repo_work_dir)
|
||||
|
||||
if not revision:
|
||||
err_msg = 'a non-default value of revision cannot be empty.'
|
||||
raise InvalidParameter(err_msg)
|
||||
self.revision = revision
|
||||
|
||||
if auth_token:
|
||||
self.auth_token = auth_token
|
||||
else:
|
||||
@@ -199,7 +215,9 @@ class DatasetRepository:
|
||||
self.git_wrapper.config_auth_token(self.repo_work_dir, self.auth_token)
|
||||
self.git_wrapper.add_user_info(self.repo_base_dir, self.repo_name)
|
||||
|
||||
remote_url = self.git_wrapper.get_repo_remote_url(self.repo_work_dir)
|
||||
remote_url = self._get_remote_url()
|
||||
remote_url = self.git_wrapper.remove_token_from_url(remote_url)
|
||||
|
||||
self.git_wrapper.pull(self.repo_work_dir)
|
||||
self.git_wrapper.add(self.repo_work_dir, all_files=True)
|
||||
self.git_wrapper.commit(self.repo_work_dir, commit_message)
|
||||
|
||||
@@ -220,18 +220,23 @@ class MsDataset:
|
||||
api = HubApi()
|
||||
download_dataset = ''
|
||||
if isinstance(dataset_name, str):
|
||||
download_dataset = dataset_name
|
||||
dataset_formation = DatasetFormations.native
|
||||
if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \
|
||||
(os.path.isfile(dataset_name) and dataset_name.endswith('.py')):
|
||||
if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(
|
||||
dataset_name):
|
||||
dataset_formation = DatasetFormations.hf_compatible
|
||||
elif os.path.isfile(dataset_name) and dataset_name.endswith('.py'):
|
||||
dataset_formation = DatasetFormations.hf_compatible
|
||||
file_name = os.path.basename(dataset_name)
|
||||
download_dataset = os.path.splitext(file_name)[0]
|
||||
elif is_relative_path(dataset_name) and dataset_name.count(
|
||||
'/') == 0:
|
||||
download_dataset = dataset_name
|
||||
dataset_scripts, dataset_formation, download_dir = api.fetch_dataset_scripts(
|
||||
dataset_name, namespace, download_mode, version)
|
||||
# dataset organized to be compatible with hf format
|
||||
if dataset_formation == DatasetFormations.hf_compatible:
|
||||
dataset_name = dataset_scripts['.py'][0]
|
||||
download_dataset = dataset_name
|
||||
else:
|
||||
raise FileNotFoundError(
|
||||
f"Couldn't find a dataset script at {relative_to_absolute_path(dataset_name)} "
|
||||
@@ -268,8 +273,11 @@ class MsDataset:
|
||||
f' {type(dataset_name)}')
|
||||
|
||||
if download_dataset:
|
||||
api.on_dataset_download(
|
||||
dataset_name=download_dataset, namespace=namespace)
|
||||
try:
|
||||
api.on_dataset_download(
|
||||
dataset_name=download_dataset, namespace=namespace)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
return MsDataset.from_hf_dataset(dataset, target=target)
|
||||
|
||||
@@ -587,7 +595,7 @@ class MsDataset:
|
||||
"""Clone meta-file of dataset from the ModelScope Hub.
|
||||
Args:
|
||||
dataset_work_dir (str): Current git working directory.
|
||||
dataset_id (str): Dataset id, It should be like your-namespace/your-dataset-name .
|
||||
dataset_id (str): Dataset id, in the form of your-namespace/your-dataset-name .
|
||||
revision(`Optional[str]`):
|
||||
revision of the model you want to clone from. Can be any of a branch, tag or commit hash
|
||||
auth_token(`Optional[str]`):
|
||||
@@ -609,11 +617,11 @@ class MsDataset:
|
||||
if clone_work_dir:
|
||||
logger.info('Already cloned repo to: {}'.format(clone_work_dir))
|
||||
else:
|
||||
logger.warning('The repo working dir is already ex.')
|
||||
logger.warning(
|
||||
'Repo dir already exists: {}'.format(clone_work_dir))
|
||||
|
||||
@staticmethod
|
||||
def upload_meta(dataset_work_dir: str,
|
||||
dataset_id: str,
|
||||
commit_message: str,
|
||||
revision: Optional[str] = DEFAULT_DATASET_REVISION,
|
||||
auth_token: Optional[str] = None,
|
||||
@@ -623,7 +631,6 @@ class MsDataset:
|
||||
|
||||
Args:
|
||||
dataset_work_dir (str): Current working directory.
|
||||
dataset_id (str): Dataset id, It should be like your-namespace/your-dataset-name .
|
||||
commit_message (str): Commit message.
|
||||
revision(`Optional[str]`):
|
||||
revision of the model you want to clone from. Can be any of a branch, tag or commit hash
|
||||
@@ -640,7 +647,7 @@ class MsDataset:
|
||||
"""
|
||||
_repo = DatasetRepository(
|
||||
repo_work_dir=dataset_work_dir,
|
||||
dataset_id=dataset_id,
|
||||
dataset_id='',
|
||||
revision=revision,
|
||||
auth_token=auth_token,
|
||||
git_path=git_path)
|
||||
|
||||
@@ -87,7 +87,6 @@ class DatasetUploadTest(unittest.TestCase):
|
||||
|
||||
MsDataset.upload_meta(
|
||||
dataset_work_dir=self.test_meta_dir,
|
||||
dataset_id=os.path.join(self.namespace, self.dataset_name),
|
||||
commit_message='Update for unit test.')
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user