diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 36e1fc7d..5d0b499c 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -102,13 +102,15 @@ class HubApi: def __init__(self, endpoint: Optional[str] = None, timeout=API_HTTP_CLIENT_TIMEOUT, - max_retries=API_HTTP_CLIENT_MAX_RETRIES): + max_retries=API_HTTP_CLIENT_MAX_RETRIES, + token: Optional[str] = None): """The ModelScope HubApi。 Args: endpoint (str, optional): The modelscope server http|https address. Defaults to None. """ self.endpoint = endpoint if endpoint is not None else get_endpoint() + self.token = token self.headers = {'user-agent': ModelScopeConfig.get_user_agent()} self.session = Session() retry = Retry( @@ -154,12 +156,12 @@ class HubApi: path='/') return jar - def get_cookies(self, access_token, cookies_required: Optional[bool] = False): + def get_cookies(self, access_token: Optional[str] = None, cookies_required: Optional[bool] = False): """ Get cookies for authentication from local cache or access_token. Args: - access_token (str): user access token on ModelScope + access_token (Optional[str]): user access token on ModelScope. If not provided, try to get from local cache. cookies_required (bool): whether to raise error if no cookies found, defaults to `False`. Returns: @@ -168,8 +170,9 @@ class HubApi: Raises: ValueError: If no credentials found and cookies_required is True. """ - if access_token: - cookies = self._get_cookies(access_token=access_token) + token = access_token if access_token is not None else self.token + if token: + cookies = self._get_cookies(access_token=token) else: cookies = ModelScopeConfig.get_cookies() @@ -203,8 +206,7 @@ class HubApi: Note: You only have to login once within 30 days. """ - if access_token is None: - access_token = os.environ.get('MODELSCOPE_API_TOKEN') + access_token = access_token or self.token or os.environ.get('MODELSCOPE_API_TOKEN') if not access_token: return None, None if not endpoint: @@ -423,12 +425,13 @@ class HubApi: tag_url = f'{endpoint}/models/{model_id}/tags/{tag_name}' return tag_url - def delete_model(self, model_id: str, endpoint: Optional[str] = None): + def delete_model(self, model_id: str, endpoint: Optional[str] = None, token: Optional[str] = None): """Delete model_id from ModelScope. Args: model_id (str): The model id. endpoint: the endpoint to use, default to None to use endpoint specified in the class + token (str, optional): access token for authentication Raises: ValueError: If not login. @@ -436,7 +439,7 @@ class HubApi: Note: model_id = {owner}/{name} """ - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) if not endpoint: endpoint = self.endpoint if cookies is None: @@ -458,7 +461,8 @@ class HubApi: self, model_id: str, revision: Optional[str] = DEFAULT_MODEL_REVISION, - endpoint: Optional[str] = None + endpoint: Optional[str] = None, + token: Optional[str] = None, ) -> dict: """Get model information at ModelScope @@ -466,6 +470,7 @@ class HubApi: model_id (str): The model id. revision (str optional): revision of model. endpoint: the endpoint to use, default to None to use endpoint specified in the class + token (str, optional): access token for authentication Returns: The model detail information. @@ -476,7 +481,7 @@ class HubApi: Note: model_id = {owner}/{name} """ - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=False) owner_or_group, name = model_id_to_group_owner_name(model_id) if not endpoint: endpoint = self.endpoint @@ -744,7 +749,8 @@ class HubApi: revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, original_model_id: Optional[str] = None, ignore_file_pattern: Optional[Union[List[str], str]] = None, - lfs_suffix: Optional[Union[str, List[str]]] = None): + lfs_suffix: Optional[Union[str, List[str]]] = None, + token: Optional[str] = None): warnings.warn( 'This function is deprecated and will be removed in future versions. ' 'Please use git command directly or use HubApi().upload_folder instead', @@ -810,7 +816,7 @@ class HubApi: f'No {ModelFile.CONFIGURATION} file found in {model_dir}, creating a default one.') HubApi._create_default_config(model_dir) - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) if cookies is None: raise NotLoginException('Must login before upload!') files_to_save = os.listdir(model_dir) @@ -881,7 +887,8 @@ class HubApi: owner_or_group: str, page_number: Optional[int] = 1, page_size: Optional[int] = 10, - endpoint: Optional[str] = None) -> dict: + endpoint: Optional[str] = None, + token: Optional[str] = None) -> dict: """List models in owner or group. Args: @@ -889,6 +896,7 @@ class HubApi: page_number(int, optional): The page number, default: 1 page_size(int, optional): The page size, default: 10 endpoint: the endpoint to use, default to None to use endpoint specified in the class + token (str, optional): access token for authentication Raises: RequestError: The request error. @@ -896,7 +904,7 @@ class HubApi: Returns: dict: {"models": "list of models", "TotalCount": total_number_of_models_in_owner_or_group} """ - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=False) if not endpoint: endpoint = self.endpoint path = f'{endpoint}/api/v1/models/' @@ -925,7 +933,7 @@ class HubApi: sort: Optional[str] = None, search: Optional[str] = None, endpoint: Optional[str] = None, - ) -> dict: + token: Optional[str] = None) -> dict: """List datasets via OpenAPI with pagination, filtering and sorting. Args: @@ -937,6 +945,7 @@ class HubApi: search (str, optional): Search by substring keywords in the dataset's Chinese name, English name, and authors (including organizations and individuals). endpoint (str, optional): Hub endpoint to use. When None, use the endpoint specified in the class. + token (str, optional): Access token for authentication. Returns: dict: The OpenAPI data payload, e.g. @@ -966,7 +975,7 @@ class HubApi: if owner_or_group: params['author'] = owner_or_group - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=False) headers = self.builder_headers(self.headers) r = self.session.get( @@ -991,9 +1000,7 @@ class HubApi: if isinstance(use_cookies, CookieJar): cookies = use_cookies elif isinstance(use_cookies, bool): - cookies = ModelScopeConfig.get_cookies() - if use_cookies and cookies is None: - raise ValueError('Token does not exist, please login first.') + cookies = self.get_cookies(cookies_required=use_cookies) return cookies def list_model_revisions( @@ -1251,6 +1258,7 @@ class HubApi: filename: str, *, revision: Optional[str] = None, + token: Optional[str] = None, ): """Get if the specified file exists @@ -1259,10 +1267,11 @@ class HubApi: filename (`str`): The queried filename, if the file exists in a sub folder, please pass / revision (`Optional[str]`): The repo revision + token (`Optional[str]`): The access token Returns: The query result in bool value """ - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) files = self.get_model_files( repo_id, recursive=True, @@ -1279,14 +1288,13 @@ class HubApi: license: Optional[str] = Licenses.APACHE_V2, visibility: Optional[int] = DatasetVisibility.PUBLIC, description: Optional[str] = '', - endpoint: Optional[str] = None, ) -> str: + endpoint: Optional[str] = None, + token: Optional[str] = None) -> str: if dataset_name is None or namespace is None: raise InvalidParameter('dataset_name and namespace are required!') - cookies = ModelScopeConfig.get_cookies() - if cookies is None: - raise ValueError('Token does not exist, please login first.') + cookies = self.get_cookies(access_token=token, cookies_required=True) if not endpoint: endpoint = self.endpoint path = f'{endpoint}/api/v1/datasets' @@ -1312,9 +1320,9 @@ class HubApi: logger.info(f'Create dataset success: {dataset_repo_url}') return dataset_repo_url - def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None): + def delete_dataset(self, dataset_id: str, endpoint: Optional[str] = None, token: Optional[str] = None): - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) if not endpoint: endpoint = self.endpoint if cookies is None: @@ -1327,12 +1335,13 @@ class HubApi: raise_for_http_status(r) raise_on_error(r.json()) - def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None): + def get_dataset_id_and_type(self, dataset_name: str, namespace: str, endpoint: Optional[str] = None, + token: Optional[str] = None): """ Get the dataset id and type. """ if not endpoint: endpoint = self.endpoint datahub_url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}' - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) r = self.session.get(datahub_url, cookies=cookies) resp = r.json() datahub_raise_on_error(datahub_url, resp, r) @@ -1348,7 +1357,8 @@ class HubApi: recursive: bool = True, page_number: int = 1, page_size: int = 100, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): """ @deprecated: Use `get_dataset_files` instead. """ @@ -1356,7 +1366,7 @@ class HubApi: DeprecationWarning) dataset_hub_id, dataset_type = self.get_dataset_id_and_type( - dataset_name=dataset_name, namespace=namespace, endpoint=endpoint) + dataset_name=dataset_name, namespace=namespace, endpoint=endpoint, token=token) recursive = 'True' if recursive else 'False' if not endpoint: @@ -1365,7 +1375,7 @@ class HubApi: params = {'Revision': revision if revision else 'master', 'Root': root_path if root_path else '/', 'Recursive': recursive, 'PageNumber': page_number, 'PageSize': page_size} - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) r = self.session.get(datahub_url, params=params, cookies=cookies) resp = r.json() @@ -1380,7 +1390,8 @@ class HubApi: revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, page_number: int = 1, page_size: int = 50, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): """ Get the commit history for a repository. @@ -1391,6 +1402,7 @@ class HubApi: page_number (int): The page number for pagination. Defaults to 1. page_size (int): The number of commits per page. Defaults to 50. endpoint (Optional[str]): The endpoint to use, defaults to None to use the endpoint specified in the class. + token (Optional[str]): The access token. Returns: CommitHistoryResponse: The commit history response. @@ -1420,7 +1432,7 @@ class HubApi: 'PageNumber': page_number, 'PageSize': page_size } - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) try: r = self.session.get(commits_url, params=params, @@ -1443,7 +1455,8 @@ class HubApi: recursive: bool = True, page_number: int = 1, page_size: int = 100, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): """ Get the dataset files. @@ -1455,6 +1468,7 @@ class HubApi: page_number (int): The page number for pagination. Defaults to 1. page_size (int): The number of items per page. Defaults to 100. endpoint (Optional[str]): The endpoint to use, defaults to None to use the endpoint specified in the class. + token (Optional[str]): The access token. Returns: List: The response containing the dataset repository tree information. @@ -1468,7 +1482,7 @@ class HubApi: raise ValueError(f'Invalid repo_id: {repo_id} !') dataset_hub_id, dataset_type = self.get_dataset_id_and_type( - dataset_name=_dataset_name, namespace=_owner, endpoint=endpoint) + dataset_name=_dataset_name, namespace=_owner, endpoint=endpoint, token=token) if not endpoint: endpoint = self.endpoint @@ -1480,7 +1494,7 @@ class HubApi: 'PageNumber': page_number, 'PageSize': page_size } - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) r = self.session.get(datahub_url, params=params, cookies=cookies) resp = r.json() @@ -1492,7 +1506,8 @@ class HubApi: self, dataset_id: str, revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, - endpoint: Optional[str] = None + endpoint: Optional[str] = None, + token: Optional[str] = None ): """ Get the dataset information. @@ -1501,11 +1516,12 @@ class HubApi: dataset_id (str): The dataset id. revision (Optional[str]): The revision of the dataset. endpoint (Optional[str]): The endpoint to use, defaults to None to use the endpoint specified in the class. + token (Optional[str]): The access token. Returns: dict: The dataset information. """ - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) if not endpoint: endpoint = self.endpoint @@ -1522,12 +1538,13 @@ class HubApi: return resp[API_RESPONSE_FIELD_DATA] def get_dataset_meta_file_list(self, dataset_name: str, namespace: str, - dataset_id: str, revision: str, endpoint: Optional[str] = None): + dataset_id: str, revision: str, endpoint: Optional[str] = None, + token: Optional[str] = None): """ Get the meta file-list of the dataset. """ if not endpoint: endpoint = self.endpoint datahub_url = f'{endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={revision}' - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) r = self.session.get(datahub_url, cookies=cookies, headers=self.builder_headers(self.headers)) @@ -1559,11 +1576,12 @@ class HubApi: namespace: str, revision: str, meta_cache_dir: str, dataset_type: int, file_list: list, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): local_paths = defaultdict(list) dataset_formation = DatasetFormations(dataset_type) dataset_meta_format = DatasetMetaFormats[dataset_formation] - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) # Dump the data_type as a local file HubApi.dump_datatype_file(dataset_type=dataset_type, meta_cache_dir=meta_cache_dir) @@ -1591,7 +1609,8 @@ class HubApi: return local_paths, dataset_formation @staticmethod - def fetch_meta_files_from_url(url, out_path, chunk_size=1024, mode=DownloadMode.REUSE_DATASET_IF_EXISTS): + def fetch_meta_files_from_url(url, out_path, chunk_size=1024, mode=DownloadMode.REUSE_DATASET_IF_EXISTS, + token: Optional[str] = None): """ Fetch the meta-data files from the url, e.g. csv/jsonl files. """ @@ -1605,7 +1624,7 @@ class HubApi: if os.path.exists(out_path): logger.info(f'Reusing cached meta-data file: {out_path}') return out_path - cookies = ModelScopeConfig.get_cookies() + cookies = HubApi().get_cookies(access_token=token) # Make the request and get the response content as TextIO logger.info('Loading meta-data file ...') @@ -1697,12 +1716,13 @@ class HubApi: dataset_name: str, namespace: str, revision: Optional[str] = DEFAULT_DATASET_REVISION, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): if not endpoint: endpoint = self.endpoint datahub_url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/' \ f'ststoken?Revision={revision}' - return self.datahub_remote_call(datahub_url) + return self.datahub_remote_call(datahub_url, token=token) def get_dataset_access_config_session( self, @@ -1710,7 +1730,8 @@ class HubApi: namespace: str, check_cookie: bool, revision: Optional[str] = DEFAULT_DATASET_REVISION, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): if not endpoint: endpoint = self.endpoint @@ -1719,7 +1740,7 @@ class HubApi: if check_cookie: cookies = self._check_cookie(use_cookies=True) else: - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) r = self.session.get( url=datahub_url, @@ -1729,7 +1750,7 @@ class HubApi: raise_on_error(resp) return resp['Data'] - def get_virgo_meta(self, dataset_id: str, version: int = 1) -> dict: + def get_virgo_meta(self, dataset_id: str, version: int = 1, token: Optional[str] = None) -> dict: """ Get virgo dataset meta info. """ @@ -1738,7 +1759,7 @@ class HubApi: raise RuntimeError(f'Virgo endpoint is not set in env: {VirgoDatasetConfig.env_virgo_endpoint}') virgo_dataset_url = f'{virgo_endpoint}/data/set/download' - cookies = requests.utils.dict_from_cookiejar(ModelScopeConfig.get_cookies()) + cookies = requests.utils.dict_from_cookiejar(self.get_cookies(access_token=token)) dataset_info = dict( dataSetId=dataset_id, @@ -1763,11 +1784,12 @@ class HubApi: namespace: str, revision: str, zip_file_name: str, - endpoint: Optional[str] = None): + endpoint: Optional[str] = None, + token: Optional[str] = None): if not endpoint: endpoint = self.endpoint datahub_url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}' - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) r = self.session.get(url=datahub_url, cookies=cookies, headers=self.builder_headers(self.headers)) resp = r.json() @@ -1787,13 +1809,14 @@ class HubApi: return data_sts def list_oss_dataset_objects(self, dataset_name, namespace, max_limit, - is_recursive, is_filter_dir, revision, endpoint: Optional[str] = None): + is_recursive, is_filter_dir, revision, endpoint: Optional[str] = None, + token: Optional[str] = None): if not endpoint: endpoint = self.endpoint url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \ f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}' - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) resp = self.session.get(url=url, cookies=cookies, timeout=1800) resp = resp.json() raise_on_error(resp) @@ -1801,14 +1824,15 @@ class HubApi: return resp def delete_oss_dataset_object(self, object_name: str, dataset_name: str, - namespace: str, revision: str, endpoint: Optional[str] = None) -> str: + namespace: str, revision: str, endpoint: Optional[str] = None, + token: Optional[str] = None) -> str: if not object_name or not dataset_name or not namespace or not revision: raise ValueError('Args cannot be empty!') if not endpoint: endpoint = self.endpoint url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss?Path={object_name}&Revision={revision}' - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) resp = self.session.delete(url=url, cookies=cookies) resp = resp.json() raise_on_error(resp) @@ -1816,7 +1840,8 @@ class HubApi: return resp def delete_oss_dataset_dir(self, object_name: str, dataset_name: str, - namespace: str, revision: str, endpoint: Optional[str] = None) -> str: + namespace: str, revision: str, endpoint: Optional[str] = None, + token: Optional[str] = None) -> str: if not object_name or not dataset_name or not namespace or not revision: raise ValueError('Args cannot be empty!') if not endpoint: @@ -1824,15 +1849,15 @@ class HubApi: url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/prefix?Prefix={object_name}/' \ f'&Revision={revision}' - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) resp = self.session.delete(url=url, cookies=cookies) resp = resp.json() - raise_on_error(resp) + raise_on_http_response(resp, logger, cookies, url) resp = resp['Message'] return resp - def datahub_remote_call(self, url): - cookies = ModelScopeConfig.get_cookies() + def datahub_remote_call(self, url, token: Optional[str] = None): + cookies = self.get_cookies(access_token=token) r = self.session.get( url, cookies=cookies, @@ -1842,13 +1867,14 @@ class HubApi: return resp['Data'] def dataset_download_statistics(self, dataset_name: str, namespace: str, - use_streaming: bool = False, endpoint: Optional[str] = None) -> None: + use_streaming: bool = False, endpoint: Optional[str] = None, + token: Optional[str] = None) -> None: is_ci_test = os.getenv('CI_TEST') == 'True' if not endpoint: endpoint = self.endpoint if dataset_name and namespace and not is_ci_test and not use_streaming: try: - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token) # Download count download_count_url = f'{endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/increase' @@ -2219,6 +2245,7 @@ class HubApi: data=path_or_fileobj, disable_tqdm=disable_tqdm, tqdm_desc=tqdm_desc, + token=token, ) # Construct commit info and create commit @@ -2370,6 +2397,7 @@ class HubApi: data=file_path, disable_tqdm=file_size <= UPLOAD_BLOB_TQDM_DISABLE_THRESHOLD, tqdm_desc='[Uploading ' + file_path_in_repo + ']', + token=token, ) return { @@ -2446,6 +2474,7 @@ class HubApi: disable_tqdm: Optional[bool] = False, tqdm_desc: Optional[str] = '[Uploading]', buffer_size_mb: Optional[int] = 1, + token: Optional[str] = None, ) -> dict: res_d: dict = dict( @@ -2460,6 +2489,7 @@ class HubApi: repo_id=repo_id, repo_type=repo_type, objects=objects, + token=token, ) # upload_object: {'url': 'xxx', 'oid': 'xxx'} @@ -2470,7 +2500,7 @@ class HubApi: res_d['is_uploaded'] = True return res_d - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) cookies = dict(cookies) if cookies else None if cookies is None: raise ValueError('Token does not exist, please login first.') @@ -2536,7 +2566,8 @@ class HubApi: repo_id: str, repo_type: str, objects: List[Dict[str, Any]], - endpoint: Optional[str] = None + endpoint: Optional[str] = None, + token: Optional[str] = None, ) -> List[Dict[str, Any]]: """ Check the blob has already uploaded. @@ -2549,6 +2580,7 @@ class HubApi: oid (str): The sha256 hash value. size (int): The size of the blob. endpoint: the endpoint to use, default to None to use endpoint specified in the class + token (str): The access token. Returns: List[Dict[str, Any]]: The result of the check. @@ -2565,9 +2597,7 @@ class HubApi: 'objects': objects, } - cookies = ModelScopeConfig.get_cookies() - if cookies is None: - raise ValueError('Token does not exist, please login first.') + cookies = self.get_cookies(access_token=token, cookies_required=True) response = requests.post( url, headers=self.builder_headers(self.headers), @@ -2755,7 +2785,8 @@ class HubApi: delete_patterns: Union[str, List[str]], *, revision: Optional[str] = DEFAULT_MODEL_REVISION, - endpoint: Optional[str] = None) -> Dict[str, Any]: + endpoint: Optional[str] = None, + token: Optional[str] = None) -> Dict[str, Any]: """ Delete files in batch using glob (wildcard) patterns, e.g. '*.py', 'data/*.csv', 'foo*', etc. @@ -2780,6 +2811,7 @@ class HubApi: delete_patterns (str or List[str]): List of glob patterns, e.g. '*.py', 'data/*.csv', 'foo*' revision (str, optional): Branch or tag name endpoint (str, optional): API endpoint + token (str, optional): Access token Returns: dict: Deletion result """ @@ -2790,7 +2822,7 @@ class HubApi: if isinstance(delete_patterns, str): delete_patterns = [delete_patterns] - cookies = ModelScopeConfig.get_cookies() + cookies = self.get_cookies(access_token=token, cookies_required=True) if not endpoint: endpoint = self.endpoint if cookies is None: diff --git a/modelscope/hub/check_model.py b/modelscope/hub/check_model.py index 781ba588..77172efd 100644 --- a/modelscope/hub/check_model.py +++ b/modelscope/hub/check_model.py @@ -33,6 +33,7 @@ def get_model_id_from_cache(model_root_path: str, ) -> str: def check_local_model_is_latest( model_root_path: str, user_agent: Optional[Union[Dict, str]] = None, + token: Optional[str] = None, ): """Check local model repo is latest. Check local model repo is same as hub latest version. @@ -45,7 +46,8 @@ def check_local_model_is_latest( 'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, ) } - cookies = ModelScopeConfig.get_cookies() + _api = HubApi(timeout=20, token=token) + cookies = _api.get_cookies() snapshot_header = headers if 'CI_TEST' in os.environ else { **headers, @@ -53,7 +55,6 @@ def check_local_model_is_latest( 'Snapshot': 'True' } } - _api = HubApi(timeout=20) try: _, revisions = _api.get_model_branches_and_tags( model_id=model_id, use_cookies=cookies) diff --git a/modelscope/hub/deploy.py b/modelscope/hub/deploy.py index 9eefe61b..1da5ab8a 100644 --- a/modelscope/hub/deploy.py +++ b/modelscope/hub/deploy.py @@ -7,7 +7,7 @@ import json import requests from attrs import asdict, define, field, validators -from modelscope.hub.api import ModelScopeConfig +from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA, API_RESPONSE_FIELD_MESSAGE) from modelscope.hub.errors import (NotLoginException, NotSupportError, @@ -188,13 +188,11 @@ class ServiceDeployer(object): """Facilitate model deployment on to supported service provider(s). """ - def __init__(self, endpoint=None): + def __init__(self, endpoint=None, token: Optional[str] = None): self.endpoint = endpoint if endpoint is not None else get_endpoint() self.headers = {'user-agent': ModelScopeConfig.get_user_agent()} - self.cookies = ModelScopeConfig.get_cookies() - if self.cookies is None: - raise NotLoginException( - 'Token does not exist, please login with HubApi first.') + self.cookies = HubApi().get_cookies( + access_token=token, cookies_required=True) # deploy_model def create(self, model_id: str, revision: str, instance_name: str, diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 2906450b..f5742e1e 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -49,6 +49,7 @@ def model_file_download( local_files_only: Optional[bool] = False, cookies: Optional[CookieJar] = None, local_dir: Optional[str] = None, + token: Optional[str] = None, ) -> Optional[str]: # pragma: no cover """Download from a given URL and cache it if it's not already present in the local cache. @@ -67,6 +68,7 @@ def model_file_download( local cached file if it exists. if `False`, download the file anyway even it exists. cookies (CookieJar, optional): The cookie of download request. local_dir (str, optional): Specific local directory path to which the file will be downloaded. + token (str, optional): The user token. Returns: string: string of local file or if networking is off, last version of @@ -95,7 +97,8 @@ def model_file_download( user_agent=user_agent, local_files_only=local_files_only, cookies=cookies, - local_dir=local_dir) + local_dir=local_dir, + token=token) def dataset_file_download( @@ -107,6 +110,7 @@ def dataset_file_download( user_agent: Optional[Union[Dict, str]] = None, local_files_only: Optional[bool] = False, cookies: Optional[CookieJar] = None, + token: Optional[str] = None, ) -> str: """Download raw files of a dataset. Downloads all files at the specified revision. This @@ -129,6 +133,7 @@ def dataset_file_download( local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the local cached file if it exists. cookies (CookieJar, optional): The cookie of the request, default None. + token (str, optional): The user token. Raises: ValueError: the value details. @@ -153,7 +158,8 @@ def dataset_file_download( user_agent=user_agent, local_files_only=local_files_only, cookies=cookies, - local_dir=local_dir) + local_dir=local_dir, + token=token) def _repo_file_download( @@ -168,6 +174,7 @@ def _repo_file_download( cookies: Optional[CookieJar] = None, local_dir: Optional[str] = None, disable_tqdm: bool = False, + token: Optional[str] = None, ) -> Optional[str]: # pragma: no cover if not repo_type: @@ -194,7 +201,7 @@ def _repo_file_download( ' traffic has been disabled. To enable look-ups and downloads' " online, set 'local_files_only' to False.") - _api = HubApi() + _api = HubApi(token=token) headers = { 'user-agent': ModelScopeConfig.get_user_agent(user_agent=user_agent, ), @@ -212,7 +219,7 @@ def _repo_file_download( headers['x-aliyun-region-id'] = region_id if cookies is None: - cookies = ModelScopeConfig.get_cookies() + cookies = _api.get_cookies() repo_files = [] endpoint = _api.get_endpoint_for_read(repo_id=repo_id, repo_type=repo_type) file_to_download_meta = None diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index e25daaa3..0e21de4f 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -48,6 +48,7 @@ def snapshot_download( repo_type: Optional[str] = REPO_TYPE_MODEL, enable_file_lock: Optional[bool] = None, progress_callbacks: List[Type[ProgressCallback]] = None, + token: Optional[str] = None, ) -> str: """Download all files of a repo. Downloads a whole snapshot of a repo's files at the specified revision. This @@ -88,6 +89,7 @@ def snapshot_download( change `MODELSCOPE_HUB_FILE_LOCK` env to `false`. progress_callbacks (`List[Type[ProgressCallback]]`, **optional**, default to `None`): progress callbacks to track the download progress. + token (str, optional): The user token. Raises: ValueError: the value details. @@ -146,7 +148,8 @@ def snapshot_download( ignore_patterns=ignore_patterns, allow_patterns=allow_patterns, max_workers=max_workers, - progress_callbacks=progress_callbacks) + progress_callbacks=progress_callbacks, + token=token) def dataset_snapshot_download( @@ -163,6 +166,7 @@ def dataset_snapshot_download( ignore_patterns: Optional[Union[List[str], str]] = None, enable_file_lock: Optional[bool] = None, max_workers: int = 8, + token: Optional[str] = None, ) -> str: """Download raw files of a dataset. Downloads all files at the specified revision. This @@ -199,6 +203,7 @@ def dataset_snapshot_download( If you find something wrong with file lock and have a problem modifying your code, change `MODELSCOPE_HUB_FILE_LOCK` env to `false`. max_workers (`int`): The maximum number of workers to download files, default 8. + token (str, optional): The user token. Raises: ValueError: the value details. @@ -241,7 +246,8 @@ def dataset_snapshot_download( local_dir=local_dir, ignore_patterns=ignore_patterns, allow_patterns=allow_patterns, - max_workers=max_workers) + max_workers=max_workers, + token=token) def _snapshot_download( @@ -260,6 +266,7 @@ def _snapshot_download( ignore_patterns: Optional[Union[List[str], str]] = None, max_workers: int = 8, progress_callbacks: List[Type[ProgressCallback]] = None, + token: Optional[str] = None, ): if not repo_type: repo_type = REPO_TYPE_MODEL @@ -299,11 +306,11 @@ def _snapshot_download( ) headers['x-aliyun-region-id'] = region_id - _api = HubApi() + _api = HubApi(token=token) endpoint = _api.get_endpoint_for_read( repo_id=repo_id, repo_type=repo_type) if cookies is None: - cookies = ModelScopeConfig.get_cookies() + cookies = _api.get_cookies() if repo_type == REPO_TYPE_MODEL: if local_dir: directory = os.path.abspath(local_dir) diff --git a/modelscope/msdatasets/context/dataset_context_config.py b/modelscope/msdatasets/context/dataset_context_config.py index fce262b0..a7b909be 100644 --- a/modelscope/msdatasets/context/dataset_context_config.py +++ b/modelscope/msdatasets/context/dataset_context_config.py @@ -1,6 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from typing import Mapping, Sequence, Union +from typing import Mapping, Optional, Sequence, Union from modelscope.msdatasets.auth.auth_config import BaseAuthConfig from modelscope.msdatasets.download.download_config import DataDownloadConfig @@ -11,14 +11,24 @@ from modelscope.utils.constant import DownloadMode, Hubs class DatasetContextConfig: """Context configuration of dataset.""" - def __init__(self, dataset_name: Union[str, list], namespace: str, - version: str, subset_name: str, split: Union[str, list], - target: str, hub: Hubs, data_dir: str, + def __init__(self, + dataset_name: Union[str, list], + namespace: str, + version: str, + subset_name: str, + split: Union[str, list], + target: str, + hub: Hubs, + data_dir: str, data_files: Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]], - download_mode: DownloadMode, cache_root_dir: str, - use_streaming: bool, stream_batch_size: int, - trust_remote_code: bool, **kwargs): + download_mode: DownloadMode, + cache_root_dir: str, + use_streaming: bool, + stream_batch_size: int, + trust_remote_code: bool, + token: Optional[str] = None, + **kwargs): self._download_config = None self._data_meta_config = None @@ -32,6 +42,7 @@ class DatasetContextConfig: # General arguments for dataset self.hub = hub + self.token = token self.download_mode = download_mode self.dataset_name = dataset_name self.namespace = namespace diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py index 40f9ef60..fa1c2ac9 100644 --- a/modelscope/msdatasets/data_loader/data_loader.py +++ b/modelscope/msdatasets/data_loader/data_loader.py @@ -8,7 +8,7 @@ from datasets import (Dataset, DatasetBuilder, DatasetDict, IterableDataset, IterableDatasetDict) from datasets import load_dataset as hf_load_dataset -from modelscope.hub.api import ModelScopeConfig +from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.msdatasets.auth.auth_config import OssAuthConfig from modelscope.msdatasets.context.dataset_context_config import \ DatasetContextConfig @@ -86,7 +86,8 @@ class OssDownloader(BaseDownloader): def _authorize(self) -> None: """ Authorization of target dataset. Get credentials from cache and send to the modelscope-hub in the future. """ - cookies = ModelScopeConfig.get_cookies() + cookies = HubApi().get_cookies( + access_token=self.dataset_context_config.token) git_token = ModelScopeConfig.get_token() user_info = ModelScopeConfig.get_user_info() @@ -178,7 +179,8 @@ class VirgoDownloader(BaseDownloader): """Authorization of virgo dataset.""" from modelscope.msdatasets.auth.auth_config import VirgoAuthConfig - cookies = ModelScopeConfig.get_cookies() + cookies = HubApi().get_cookies( + access_token=self.dataset_context_config.token) user_info = ModelScopeConfig.get_user_info() if not self.dataset_context_config.auth_config: diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py index b64a8926..011b6e47 100644 --- a/modelscope/msdatasets/data_loader/data_loader_manager.py +++ b/modelscope/msdatasets/data_loader/data_loader_manager.py @@ -97,7 +97,8 @@ class RemoteDataLoaderManager(DataLoaderManager): def __init__(self, dataset_context_config: DatasetContextConfig): super().__init__(dataset_context_config=dataset_context_config) - self.api = HubApi() + + self.api = HubApi(token=dataset_context_config.token) def load_dataset(self, data_loader_type: enum.Enum): # Get args from context @@ -112,6 +113,7 @@ class RemoteDataLoaderManager(DataLoaderManager): use_streaming = self.dataset_context_config.use_streaming input_config_kwargs = self.dataset_context_config.config_kwargs trust_remote_code = self.dataset_context_config.trust_remote_code + token = self.dataset_context_config.token # To use the huggingface data loader if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER: @@ -129,6 +131,7 @@ class RemoteDataLoaderManager(DataLoaderManager): download_mode=download_mode_val, streaming=use_streaming, trust_remote_code=trust_remote_code, + token=token, **input_config_kwargs) # download statistics self.api.dataset_download_statistics( diff --git a/modelscope/msdatasets/utils/hf_file_utils.py b/modelscope/msdatasets/utils/hf_file_utils.py index 18e274b5..2424dc50 100644 --- a/modelscope/msdatasets/utils/hf_file_utils.py +++ b/modelscope/msdatasets/utils/hf_file_utils.py @@ -24,7 +24,7 @@ from filelock import FileLock from modelscope.utils.config_ds import MS_DATASETS_CACHE from modelscope.utils.logger import get_logger -from modelscope.hub.api import ModelScopeConfig +from modelscope.hub.api import HubApi from modelscope import __version__ @@ -219,7 +219,7 @@ def get_from_cache_ms( etag = (response.get('ETag', None) or response.get('etag', None)) if use_etag else None connected = True try: - cookies = ModelScopeConfig.get_cookies() + cookies = HubApi().get_cookies(access_token=token) response = http_head_ms( url, allow_redirects=True,