improve upload model, remove requirment for configuration.json

This commit is contained in:
Yingda Chen
2024-11-04 10:42:34 +08:00
parent 83f1e20e80
commit 2efd31c5c1
4 changed files with 72 additions and 22 deletions

View File

@@ -22,6 +22,8 @@ import requests
from requests import Session
from requests.adapters import HTTPAdapter, Retry
from modelscope import utils
from modelscope.fileio import io
from modelscope.hub.constants import (API_HTTP_CLIENT_MAX_RETRIES,
API_HTTP_CLIENT_TIMEOUT,
API_RESPONSE_FIELD_DATA,
@@ -48,13 +50,14 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
DEFAULT_MODEL_REVISION,
DEFAULT_REPOSITORY_REVISION,
MASTER_MODEL_BRANCH, META_FILES_FORMAT,
REPO_TYPE_MODEL, ConfigFields,
DatasetFormations, DatasetMetaFormats,
DatasetVisibilityMap, DownloadChannel,
DownloadMode, ModelFile,
VirgoDatasetConfig)
DownloadMode, Frameworks, ModelFile,
Tasks, VirgoDatasetConfig)
from modelscope.utils.logger import get_logger
from .utils.utils import (get_endpoint, get_release_datetime,
model_id_to_group_owner_name)
from .utils.utils import (get_endpoint, get_readable_folder_size,
get_release_datetime, model_id_to_group_owner_name)
logger = get_logger()
@@ -268,7 +271,7 @@ class HubApi:
Returns:
True if the repository exists, False otherwise.
"""
if (repo_type is not None) and repo_type.lower != 'model':
if (repo_type is not None) and repo_type.lower != REPO_TYPE_MODEL:
raise Exception('Not support repo-type: %s' % repo_type)
if (repo_id is None) or repo_id.count('/') != 1:
raise Exception('Invalid repo_id: %s, must be of format namespace/name' % repo_type)
@@ -280,16 +283,25 @@ class HubApi:
r = self.session.get(path, cookies=cookies,
headers=self.builder_headers(self.headers))
code = handle_http_response(r, logger, cookies, repo_id, False)
logger.info(f'check repo_exists status code {code}.')
if code == 200:
return True
elif code == 404:
return False
else:
logger.warn(f'Check repo_exists return status code {code}.')
raise Exception(
'Failed to check existence of repo: %s, make sure you have access authorization.'
% repo_type)
@staticmethod
def _create_default_config(model_dir):
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
cfg = {
ConfigFields.framework: Frameworks.torch,
ConfigFields.task: Tasks.other,
}
io.dump(cfg, cfg_file)
def push_model(self,
model_id: str,
model_dir: str,
@@ -357,23 +369,23 @@ class HubApi:
raise InvalidParameter('model_dir must be a valid directory.')
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
if not os.path.exists(cfg_file):
raise ValueError(f'{model_dir} must contain a configuration.json.')
logger.warning(
f'No {ModelFile.CONFIGURATION} file found in {model_dir}, creating a default one.')
HubApi._create_default_config(model_dir)
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise NotLoginException('Must login before upload!')
files_to_save = os.listdir(model_dir)
folder_size = get_readable_folder_size(model_dir)
if ignore_file_pattern is None:
ignore_file_pattern = []
if isinstance(ignore_file_pattern, str):
ignore_file_pattern = [ignore_file_pattern]
try:
self.get_model(model_id=model_id)
except Exception:
if visibility is None or license is None:
raise InvalidParameter(
'visibility and license cannot be empty if want to create new repo'
)
logger.info('Create new model %s' % model_id)
if visibility is None or license is None:
raise InvalidParameter('Visibility and License cannot be empty for new model.')
if not self.repo_exists(model_id):
logger.info('Creating new model [%s]' % model_id)
self.create_model(
model_id=model_id,
visibility=visibility,
@@ -382,11 +394,13 @@ class HubApi:
original_model_id=original_model_id)
tmp_dir = tempfile.mkdtemp()
git_wrapper = GitCommandWrapper()
logger.info(f'Pushing folder {model_dir} as model {model_id}.')
logger.info(f'Total folder size {folder_size}, this may take a while depending on actual pushing size...')
try:
repo = Repository(model_dir=tmp_dir, clone_from=model_id)
branches = git_wrapper.get_remote_branches(tmp_dir)
if revision not in branches:
logger.info('Create new branch %s' % revision)
logger.info('Creating new branch %s' % revision)
git_wrapper.new_branch(tmp_dir, revision)
git_wrapper.checkout(tmp_dir, revision)
files_in_repo = os.listdir(tmp_dir)

View File

@@ -56,11 +56,18 @@ class GitCommandWrapper(metaclass=Singleton):
response.check_returncode()
return response
except subprocess.CalledProcessError as error:
output = 'stdout: %s, stderr: %s' % (
response.stdout.decode('utf8'), error.stderr.decode('utf8'))
logger.error('Running git command: %s failed, output: %s.' %
(command, output))
raise GitError(output)
std_out = response.stdout.decode('utf8')
std_err = error.stderr.decode('utf8')
if 'nothing to commit' in std_out:
logger.info(
'Nothing to commit, your local repo is upto date with remote'
)
return response
else:
logger.error(
'Running git command: %s failed \n stdout: %s \n stderr: %s'
% (command, std_out, std_err))
raise GitError(std_err)
def config_auth_token(self, repo_dir, auth_token):
url = self.get_repo_remote_url(repo_dir)

View File

@@ -29,6 +29,30 @@ def model_id_to_group_owner_name(model_id):
return group_or_owner, name
def convert_readable_size(size_bytes):
import math
if size_bytes == 0:
return '0B'
size_name = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return f'{s} {size_name[i]}'
def get_folder_size(folder_path):
total_size = 0
for path in Path(folder_path).rglob('*'):
if path.is_file():
total_size += path.stat().st_size
return total_size
# return a readable string that describe size of for a given folder (MB, GB etc.)
def get_readable_folder_size(folder_path) -> str:
return convert_readable_size(get_folder_size(folder_path=folder_path))
def get_cache_dir(model_id: Optional[str] = None):
"""cache dir precedence:
function parameter > environment > ~/.cache/modelscope/hub

View File

@@ -293,6 +293,10 @@ class ScienceTasks(object):
protein_structure = 'protein-structure'
class Other(object):
other = 'other'
class TasksIODescriptions(object):
image_to_image = 'image_to_image',
images_to_image = 'images_to_image',
@@ -310,7 +314,8 @@ class TasksIODescriptions(object):
efficient_diffusion_tuning = 'efficient_diffusion_tuning'
class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks):
class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks,
Other):
""" Names for tasks supported by modelscope.
Holds the standard task name to use for identifying different tasks.