Update upload func (#1204)

* update upload

* update

* temp

* fix pr comments

* update api.py
This commit is contained in:
Xingjun.Wang
2025-02-05 20:09:18 +08:00
committed by GitHub
parent d53dc6d703
commit 50726e01d5
3 changed files with 52 additions and 33 deletions

View File

@@ -5,9 +5,6 @@ from argparse import ArgumentParser, _SubParsersAction
from modelscope.cli.base import CLICommand
from modelscope.hub.api import HubApi, ModelScopeConfig
from modelscope.utils.constant import REPO_TYPE_MODEL, REPO_TYPE_SUPPORT
from modelscope.utils.logger import get_logger
logger = get_logger()
def subparser_func(args):
@@ -153,7 +150,7 @@ class UploadCMD(CLICommand):
)
if os.path.isfile(self.local_path):
commit_info = api.upload_file(
api.upload_file(
path_or_fileobj=self.local_path,
path_in_repo=self.path_in_repo,
repo_id=self.repo_id,
@@ -162,7 +159,7 @@ class UploadCMD(CLICommand):
commit_description=self.args.commit_description,
)
elif os.path.isdir(self.local_path):
commit_info = api.upload_folder(
api.upload_folder(
repo_id=self.repo_id,
folder_path=self.local_path,
path_in_repo=self.path_in_repo,
@@ -176,4 +173,4 @@ class UploadCMD(CLICommand):
else:
raise ValueError(f'{self.local_path} is not a valid local path')
logger.info(f'Upload finished, commit info: {commit_info}')
print(f'Finished uploading to {self.repo_id}')

View File

@@ -1368,11 +1368,13 @@ class HubApi:
add_operation: CommitOperationAdd = CommitOperationAdd(
path_in_repo=path_in_repo,
path_or_fileobj=path_or_fileobj,
file_hash_info=hash_info_d,
)
add_operation._upload_mode = 'lfs' if self.upload_checker.is_lfs(path_or_fileobj, repo_type) else 'normal'
add_operation._is_uploaded = upload_res['is_uploaded']
operations = [add_operation]
print(f'Committing file to {repo_id} ...')
commit_info: CommitInfo = self.create_commit(
repo_id=repo_id,
operations=operations,
@@ -1459,6 +1461,7 @@ class HubApi:
'file_path_in_repo': file_path_in_repo,
'file_path': file_path,
'is_uploaded': upload_res['is_uploaded'],
'file_hash_info': hash_info_d,
}
uploaded_items_list = _upload_items(
@@ -1472,8 +1475,6 @@ class HubApi:
disable_tqdm=False,
)
logger.info(f'Uploading folder to {repo_id} finished')
# Construct commit info and create commit
operations = []
@@ -1481,9 +1482,11 @@ class HubApi:
prepared_path_in_repo: str = item_d['file_path_in_repo']
prepared_file_path: str = item_d['file_path']
is_uploaded: bool = item_d['is_uploaded']
file_hash_info: dict = item_d['file_hash_info']
opt = CommitOperationAdd(
path_in_repo=prepared_path_in_repo,
path_or_fileobj=prepared_file_path,
file_hash_info=file_hash_info,
)
# check normal or lfs
@@ -1491,7 +1494,8 @@ class HubApi:
opt._is_uploaded = is_uploaded
operations.append(opt)
self.create_commit(
print(f'Committing folder to {repo_id} ...')
commit_info: CommitInfo = self.create_commit(
repo_id=repo_id,
operations=operations,
commit_message=commit_message,
@@ -1500,13 +1504,7 @@ class HubApi:
repo_type=repo_type,
)
# Construct commit info
commit_url = f'{self.endpoint}/api/v1/{repo_type}s/{repo_id}/commit/{DEFAULT_REPOSITORY_REVISION}'
return CommitInfo(
commit_url=commit_url,
commit_message=commit_message,
commit_description=commit_description,
oid='')
return commit_info
def _upload_blob(
self,
@@ -1539,7 +1537,7 @@ class HubApi:
upload_object = upload_objects[0] if len(upload_objects) == 1 else None
if upload_object is None:
logger.info(f'Blob {sha256} has already uploaded, reuse it.')
logger.info(f'Blob {sha256[:8]} has already uploaded, reuse it.')
res_d['is_uploaded'] = True
return res_d

View File

@@ -22,10 +22,10 @@ DEFAULT_IGNORE_PATTERNS = [
'.git/*',
'*/.git',
'**/.git/**',
'.cache/modelscope',
'.cache/modelscope/*',
'*/.cache/modelscope',
'**/.cache/modelscope/**',
'.cache',
'.cache/*',
'*/.cache',
'**/.cache/**',
]
# Forbidden to commit these folders
FORBIDDEN_FOLDERS = ['.git', '.cache']
@@ -338,9 +338,8 @@ class UploadInfo:
sample: bytes
@classmethod
def from_path(cls, path: str):
file_hash_info: dict = get_file_hash(path)
def from_path(cls, path: str, file_hash_info: dict = None):
file_hash_info = file_hash_info or get_file_hash(path)
size = file_hash_info['file_size']
sha = file_hash_info['file_hash']
sample = open(path, 'rb').read(512)
@@ -348,17 +347,18 @@ class UploadInfo:
return cls(sha256=sha, size=size, sample=sample)
@classmethod
def from_bytes(cls, data: bytes):
sha = get_file_hash(data)['file_hash']
def from_bytes(cls, data: bytes, file_hash_info: dict = None):
file_hash_info = file_hash_info or get_file_hash(data)
sha = file_hash_info['file_hash']
return cls(size=len(data), sample=data[:512], sha256=sha)
@classmethod
def from_fileobj(cls, fileobj: BinaryIO):
fileobj_info: dict = get_file_hash(fileobj)
def from_fileobj(cls, fileobj: BinaryIO, file_hash_info: dict = None):
file_hash_info: dict = file_hash_info or get_file_hash(fileobj)
sample = fileobj.read(512)
return cls(
sha256=fileobj_info['file_hash'],
size=fileobj_info['file_size'],
sha256=file_hash_info['file_hash'],
size=file_hash_info['file_size'],
sample=sample)
@@ -369,6 +369,7 @@ class CommitOperationAdd:
path_in_repo: str
path_or_fileobj: Union[str, Path, bytes, BinaryIO]
upload_info: UploadInfo = field(init=False, repr=False)
file_hash_info: dict = field(default_factory=dict)
# Internal attributes
@@ -394,6 +395,8 @@ class CommitOperationAdd:
def __post_init__(self) -> None:
"""Validates `path_or_fileobj` and compute `upload_info`."""
self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
# Validate `path_or_fileobj` value
if isinstance(self.path_or_fileobj, Path):
self.path_or_fileobj = str(self.path_or_fileobj)
@@ -420,11 +423,14 @@ class CommitOperationAdd:
# Compute "upload_info" attribute
if isinstance(self.path_or_fileobj, str):
self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
self.upload_info = UploadInfo.from_path(self.path_or_fileobj,
self.file_hash_info)
elif isinstance(self.path_or_fileobj, bytes):
self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj,
self.file_hash_info)
else:
self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj,
self.file_hash_info)
@contextmanager
def as_file(self) -> Iterator[BinaryIO]:
@@ -476,4 +482,22 @@ class CommitOperationAdd:
return git_hash(file.read())
def _validate_path_in_repo(path_in_repo: str) -> str:
# Validate `path_in_repo` value to prevent a server-side issue
if path_in_repo.startswith('/'):
path_in_repo = path_in_repo[1:]
if path_in_repo == '.' or path_in_repo == '..' or path_in_repo.startswith(
'../'):
raise ValueError(
f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
if path_in_repo.startswith('./'):
path_in_repo = path_in_repo[2:]
for forbidden in FORBIDDEN_FOLDERS:
if any(part == forbidden for part in path_in_repo.split('/')):
raise ValueError(
f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
f" '{path_in_repo}').")
return path_in_repo
CommitOperation = Union[CommitOperationAdd, ]