mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 08:17:45 +01:00
Update upload func (#1204)
* update upload * update * temp * fix pr comments * update api.py
This commit is contained in:
@@ -5,9 +5,6 @@ from argparse import ArgumentParser, _SubParsersAction
|
||||
from modelscope.cli.base import CLICommand
|
||||
from modelscope.hub.api import HubApi, ModelScopeConfig
|
||||
from modelscope.utils.constant import REPO_TYPE_MODEL, REPO_TYPE_SUPPORT
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def subparser_func(args):
|
||||
@@ -153,7 +150,7 @@ class UploadCMD(CLICommand):
|
||||
)
|
||||
|
||||
if os.path.isfile(self.local_path):
|
||||
commit_info = api.upload_file(
|
||||
api.upload_file(
|
||||
path_or_fileobj=self.local_path,
|
||||
path_in_repo=self.path_in_repo,
|
||||
repo_id=self.repo_id,
|
||||
@@ -162,7 +159,7 @@ class UploadCMD(CLICommand):
|
||||
commit_description=self.args.commit_description,
|
||||
)
|
||||
elif os.path.isdir(self.local_path):
|
||||
commit_info = api.upload_folder(
|
||||
api.upload_folder(
|
||||
repo_id=self.repo_id,
|
||||
folder_path=self.local_path,
|
||||
path_in_repo=self.path_in_repo,
|
||||
@@ -176,4 +173,4 @@ class UploadCMD(CLICommand):
|
||||
else:
|
||||
raise ValueError(f'{self.local_path} is not a valid local path')
|
||||
|
||||
logger.info(f'Upload finished, commit info: {commit_info}')
|
||||
print(f'Finished uploading to {self.repo_id}')
|
||||
|
||||
@@ -1368,11 +1368,13 @@ class HubApi:
|
||||
add_operation: CommitOperationAdd = CommitOperationAdd(
|
||||
path_in_repo=path_in_repo,
|
||||
path_or_fileobj=path_or_fileobj,
|
||||
file_hash_info=hash_info_d,
|
||||
)
|
||||
add_operation._upload_mode = 'lfs' if self.upload_checker.is_lfs(path_or_fileobj, repo_type) else 'normal'
|
||||
add_operation._is_uploaded = upload_res['is_uploaded']
|
||||
operations = [add_operation]
|
||||
|
||||
print(f'Committing file to {repo_id} ...')
|
||||
commit_info: CommitInfo = self.create_commit(
|
||||
repo_id=repo_id,
|
||||
operations=operations,
|
||||
@@ -1459,6 +1461,7 @@ class HubApi:
|
||||
'file_path_in_repo': file_path_in_repo,
|
||||
'file_path': file_path,
|
||||
'is_uploaded': upload_res['is_uploaded'],
|
||||
'file_hash_info': hash_info_d,
|
||||
}
|
||||
|
||||
uploaded_items_list = _upload_items(
|
||||
@@ -1472,8 +1475,6 @@ class HubApi:
|
||||
disable_tqdm=False,
|
||||
)
|
||||
|
||||
logger.info(f'Uploading folder to {repo_id} finished')
|
||||
|
||||
# Construct commit info and create commit
|
||||
operations = []
|
||||
|
||||
@@ -1481,9 +1482,11 @@ class HubApi:
|
||||
prepared_path_in_repo: str = item_d['file_path_in_repo']
|
||||
prepared_file_path: str = item_d['file_path']
|
||||
is_uploaded: bool = item_d['is_uploaded']
|
||||
file_hash_info: dict = item_d['file_hash_info']
|
||||
opt = CommitOperationAdd(
|
||||
path_in_repo=prepared_path_in_repo,
|
||||
path_or_fileobj=prepared_file_path,
|
||||
file_hash_info=file_hash_info,
|
||||
)
|
||||
|
||||
# check normal or lfs
|
||||
@@ -1491,7 +1494,8 @@ class HubApi:
|
||||
opt._is_uploaded = is_uploaded
|
||||
operations.append(opt)
|
||||
|
||||
self.create_commit(
|
||||
print(f'Committing folder to {repo_id} ...')
|
||||
commit_info: CommitInfo = self.create_commit(
|
||||
repo_id=repo_id,
|
||||
operations=operations,
|
||||
commit_message=commit_message,
|
||||
@@ -1500,13 +1504,7 @@ class HubApi:
|
||||
repo_type=repo_type,
|
||||
)
|
||||
|
||||
# Construct commit info
|
||||
commit_url = f'{self.endpoint}/api/v1/{repo_type}s/{repo_id}/commit/{DEFAULT_REPOSITORY_REVISION}'
|
||||
return CommitInfo(
|
||||
commit_url=commit_url,
|
||||
commit_message=commit_message,
|
||||
commit_description=commit_description,
|
||||
oid='')
|
||||
return commit_info
|
||||
|
||||
def _upload_blob(
|
||||
self,
|
||||
@@ -1539,7 +1537,7 @@ class HubApi:
|
||||
upload_object = upload_objects[0] if len(upload_objects) == 1 else None
|
||||
|
||||
if upload_object is None:
|
||||
logger.info(f'Blob {sha256} has already uploaded, reuse it.')
|
||||
logger.info(f'Blob {sha256[:8]} has already uploaded, reuse it.')
|
||||
res_d['is_uploaded'] = True
|
||||
return res_d
|
||||
|
||||
|
||||
@@ -22,10 +22,10 @@ DEFAULT_IGNORE_PATTERNS = [
|
||||
'.git/*',
|
||||
'*/.git',
|
||||
'**/.git/**',
|
||||
'.cache/modelscope',
|
||||
'.cache/modelscope/*',
|
||||
'*/.cache/modelscope',
|
||||
'**/.cache/modelscope/**',
|
||||
'.cache',
|
||||
'.cache/*',
|
||||
'*/.cache',
|
||||
'**/.cache/**',
|
||||
]
|
||||
# Forbidden to commit these folders
|
||||
FORBIDDEN_FOLDERS = ['.git', '.cache']
|
||||
@@ -338,9 +338,8 @@ class UploadInfo:
|
||||
sample: bytes
|
||||
|
||||
@classmethod
|
||||
def from_path(cls, path: str):
|
||||
|
||||
file_hash_info: dict = get_file_hash(path)
|
||||
def from_path(cls, path: str, file_hash_info: dict = None):
|
||||
file_hash_info = file_hash_info or get_file_hash(path)
|
||||
size = file_hash_info['file_size']
|
||||
sha = file_hash_info['file_hash']
|
||||
sample = open(path, 'rb').read(512)
|
||||
@@ -348,17 +347,18 @@ class UploadInfo:
|
||||
return cls(sha256=sha, size=size, sample=sample)
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, data: bytes):
|
||||
sha = get_file_hash(data)['file_hash']
|
||||
def from_bytes(cls, data: bytes, file_hash_info: dict = None):
|
||||
file_hash_info = file_hash_info or get_file_hash(data)
|
||||
sha = file_hash_info['file_hash']
|
||||
return cls(size=len(data), sample=data[:512], sha256=sha)
|
||||
|
||||
@classmethod
|
||||
def from_fileobj(cls, fileobj: BinaryIO):
|
||||
fileobj_info: dict = get_file_hash(fileobj)
|
||||
def from_fileobj(cls, fileobj: BinaryIO, file_hash_info: dict = None):
|
||||
file_hash_info: dict = file_hash_info or get_file_hash(fileobj)
|
||||
sample = fileobj.read(512)
|
||||
return cls(
|
||||
sha256=fileobj_info['file_hash'],
|
||||
size=fileobj_info['file_size'],
|
||||
sha256=file_hash_info['file_hash'],
|
||||
size=file_hash_info['file_size'],
|
||||
sample=sample)
|
||||
|
||||
|
||||
@@ -369,6 +369,7 @@ class CommitOperationAdd:
|
||||
path_in_repo: str
|
||||
path_or_fileobj: Union[str, Path, bytes, BinaryIO]
|
||||
upload_info: UploadInfo = field(init=False, repr=False)
|
||||
file_hash_info: dict = field(default_factory=dict)
|
||||
|
||||
# Internal attributes
|
||||
|
||||
@@ -394,6 +395,8 @@ class CommitOperationAdd:
|
||||
def __post_init__(self) -> None:
|
||||
"""Validates `path_or_fileobj` and compute `upload_info`."""
|
||||
|
||||
self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
|
||||
|
||||
# Validate `path_or_fileobj` value
|
||||
if isinstance(self.path_or_fileobj, Path):
|
||||
self.path_or_fileobj = str(self.path_or_fileobj)
|
||||
@@ -420,11 +423,14 @@ class CommitOperationAdd:
|
||||
|
||||
# Compute "upload_info" attribute
|
||||
if isinstance(self.path_or_fileobj, str):
|
||||
self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
|
||||
self.upload_info = UploadInfo.from_path(self.path_or_fileobj,
|
||||
self.file_hash_info)
|
||||
elif isinstance(self.path_or_fileobj, bytes):
|
||||
self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
|
||||
self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj,
|
||||
self.file_hash_info)
|
||||
else:
|
||||
self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
|
||||
self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj,
|
||||
self.file_hash_info)
|
||||
|
||||
@contextmanager
|
||||
def as_file(self) -> Iterator[BinaryIO]:
|
||||
@@ -476,4 +482,22 @@ class CommitOperationAdd:
|
||||
return git_hash(file.read())
|
||||
|
||||
|
||||
def _validate_path_in_repo(path_in_repo: str) -> str:
|
||||
# Validate `path_in_repo` value to prevent a server-side issue
|
||||
if path_in_repo.startswith('/'):
|
||||
path_in_repo = path_in_repo[1:]
|
||||
if path_in_repo == '.' or path_in_repo == '..' or path_in_repo.startswith(
|
||||
'../'):
|
||||
raise ValueError(
|
||||
f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
|
||||
if path_in_repo.startswith('./'):
|
||||
path_in_repo = path_in_repo[2:]
|
||||
for forbidden in FORBIDDEN_FOLDERS:
|
||||
if any(part == forbidden for part in path_in_repo.split('/')):
|
||||
raise ValueError(
|
||||
f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
|
||||
f" '{path_in_repo}').")
|
||||
return path_in_repo
|
||||
|
||||
|
||||
CommitOperation = Union[CommitOperationAdd, ]
|
||||
|
||||
Reference in New Issue
Block a user