support master branch version and add http request id

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14072105
* support master branch version and add http request id

* modify no revision use master

* add specified revision="master"

* error message add request id
This commit is contained in:
mulin.lyh
2023-09-20 19:29:30 +08:00
committed by wenmeng.zwm
parent 9c4cdb15d0
commit 8c80b0c3f5
5 changed files with 91 additions and 40 deletions

View File

@@ -30,7 +30,7 @@ from modelscope.hub.constants import (API_HTTP_CLIENT_TIMEOUT,
DEFAULT_CREDENTIALS_PATH,
MODELSCOPE_CLOUD_ENVIRONMENT,
MODELSCOPE_CLOUD_USERNAME,
ONE_YEAR_SECONDS,
MODELSCOPE_REQUEST_ID, ONE_YEAR_SECONDS,
REQUESTS_API_HTTP_METHOD, Licenses,
ModelVisibility)
from modelscope.hub.errors import (InvalidParameter, NotExistError,
@@ -105,7 +105,9 @@ class HubApi:
"""
path = f'{self.endpoint}/api/v1/login'
r = self.session.post(
path, json={'AccessToken': access_token}, headers=self.headers)
path,
json={'AccessToken': access_token},
headers=self.builder_headers(self.headers))
raise_for_http_status(r)
d = r.json()
raise_on_error(d)
@@ -166,7 +168,10 @@ class HubApi:
'TrainId': os.environ.get('MODELSCOPE_TRAIN_ID', ''),
}
r = self.session.post(
path, json=body, cookies=cookies, headers=self.headers)
path,
json=body,
cookies=cookies,
headers=self.builder_headers(self.headers))
handle_http_post_error(r, path, body)
raise_on_error(r.json())
model_repo_url = f'{get_endpoint()}/{model_id}'
@@ -189,7 +194,9 @@ class HubApi:
raise ValueError('Token does not exist, please login first.')
path = f'{self.endpoint}/api/v1/models/{model_id}'
r = self.session.delete(path, cookies=cookies, headers=self.headers)
r = self.session.delete(path,
cookies=cookies,
headers=self.builder_headers(self.headers))
raise_for_http_status(r)
raise_on_error(r.json())
@@ -223,7 +230,8 @@ class HubApi:
else:
path = f'{self.endpoint}/api/v1/models/{owner_or_group}/{name}'
r = self.session.get(path, cookies=cookies, headers=self.headers)
r = self.session.get(path, cookies=cookies,
headers=self.builder_headers(self.headers))
handle_http_response(r, logger, cookies, model_id)
if r.status_code == HTTPStatus.OK:
if is_ok(r.json()):
@@ -384,7 +392,7 @@ class HubApi:
data='{"Path":"%s", "PageNumber":%s, "PageSize": %s}' %
(owner_or_group, page_number, page_size),
cookies=cookies,
headers=self.headers)
headers=self.builder_headers(self.headers))
handle_http_response(r, logger, cookies, 'list_model')
if r.status_code == HTTPStatus.OK:
if is_ok(r.json()):
@@ -429,7 +437,8 @@ class HubApi:
if cutoff_timestamp is None:
cutoff_timestamp = get_release_datetime()
path = f'{self.endpoint}/api/v1/models/{model_id}/revisions?EndTime=%s' % cutoff_timestamp
r = self.session.get(path, cookies=cookies, headers=self.headers)
r = self.session.get(path, cookies=cookies,
headers=self.builder_headers(self.headers))
handle_http_response(r, logger, cookies, model_id)
d = r.json()
raise_on_error(d)
@@ -466,13 +475,15 @@ class HubApi:
cutoff_timestamp=release_timestamp,
use_cookies=False if cookies is None else cookies)
if len(revisions) == 0:
raise NoValidRevisionError(
'The model: %s has no valid revision!' % model_id)
# tags (revisions) returned from backend are guaranteed to be ordered by create-time
# we shall obtain the latest revision created earlier than release version of this branch
revision = revisions[0]
logger.warning(('There is no version specified and there is no version in the model repository,'
'use the master branch, which is fragile, please use it with caution!'))
revision = MASTER_MODEL_BRANCH
else:
# tags (revisions) returned from backend are guaranteed to be ordered by create-time
# we shall obtain the latest revision created earlier than release version of this branch
revision = revisions[0]
logger.info(
'Model revision not specified, use the latest revision: %s'
'Model revision not specified, use revision: %s'
% revision)
else:
# use user-specified revision
@@ -481,8 +492,11 @@ class HubApi:
cutoff_timestamp=current_timestamp,
use_cookies=False if cookies is None else cookies)
if revision not in revisions:
raise NotExistError('The model: %s has no revision: %s !' %
(model_id, revision))
if revision == MASTER_MODEL_BRANCH:
logger.warning('Using the master branch is fragile, please use it with caution!')
else:
raise NotExistError('The model: %s has no revision: %s !' %
(model_id, revision))
logger.info('Use user-specified model revision: %s' % revision)
return revision
@@ -504,7 +518,8 @@ class HubApi:
cookies = self._check_cookie(use_cookies)
path = f'{self.endpoint}/api/v1/models/{model_id}/revisions'
r = self.session.get(path, cookies=cookies, headers=self.headers)
r = self.session.get(path, cookies=cookies,
headers=self.builder_headers(self.headers))
handle_http_response(r, logger, cookies, model_id)
d = r.json()
raise_on_error(d)
@@ -546,6 +561,7 @@ class HubApi:
if root is not None:
path = path + f'&Root={root}'
headers = self.headers if headers is None else headers
headers['X-Request-ID'] = str(uuid.uuid4().hex)
r = self.session.get(
path, cookies=cookies, headers=headers)
@@ -564,7 +580,8 @@ class HubApi:
def list_datasets(self):
path = f'{self.endpoint}/api/v1/datasets'
params = {}
r = self.session.get(path, params=params, headers=self.headers)
r = self.session.get(path, params=params,
headers=self.builder_headers(self.headers))
raise_for_http_status(r)
dataset_list = r.json()[API_RESPONSE_FIELD_DATA]
return [x['Name'] for x in dataset_list]
@@ -584,7 +601,9 @@ class HubApi:
""" Get the meta file-list of the dataset. """
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={revision}'
cookies = ModelScopeConfig.get_cookies()
r = self.session.get(datahub_url, cookies=cookies, headers=self.headers)
r = self.session.get(datahub_url,
cookies=cookies,
headers=self.builder_headers(self.headers))
resp = r.json()
datahub_raise_on_error(datahub_url, resp)
file_list = resp['Data']
@@ -730,7 +749,9 @@ class HubApi:
cookies = ModelScopeConfig.get_cookies()
r = self.session.get(
url=datahub_url, cookies=cookies, headers=self.headers)
url=datahub_url,
cookies=cookies,
headers=self.builder_headers(self.headers))
resp = r.json()
raise_on_error(resp)
return resp['Data']
@@ -753,7 +774,11 @@ class HubApi:
data = dict(
data=dataset_info,
)
r = self.session.post(url=virgo_dataset_url, json=data, cookies=cookies, headers=self.headers, timeout=900)
r = self.session.post(url=virgo_dataset_url,
json=data,
cookies=cookies,
headers=self.builder_headers(self.headers),
timeout=900)
resp = r.json()
if resp['code'] != 0:
raise RuntimeError(f'Failed to get virgo dataset: {resp}')
@@ -767,7 +792,8 @@ class HubApi:
zip_file_name: str):
datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}'
cookies = ModelScopeConfig.get_cookies()
r = self.session.get(url=datahub_url, cookies=cookies, headers=self.headers)
r = self.session.get(url=datahub_url, cookies=cookies,
headers=self.builder_headers(self.headers))
resp = r.json()
# get visibility of the dataset
raise_on_error(resp)
@@ -775,7 +801,8 @@ class HubApi:
visibility = DatasetVisibilityMap.get(data['Visibility'])
datahub_sts_url = f'{datahub_url}/ststoken?Revision={revision}'
r_sts = self.session.get(url=datahub_sts_url, cookies=cookies, headers=self.headers)
r_sts = self.session.get(url=datahub_sts_url, cookies=cookies,
headers=self.builder_headers(self.headers))
resp_sts = r_sts.json()
raise_on_error(resp_sts)
data_sts = resp_sts['Data']
@@ -842,7 +869,8 @@ class HubApi:
# Download count
download_count_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/increase'
download_count_resp = self.session.post(download_count_url, cookies=cookies, headers=self.headers)
download_count_resp = self.session.post(download_count_url, cookies=cookies,
headers=self.builder_headers(self.headers))
raise_for_http_status(download_count_resp)
# Download uv
@@ -854,13 +882,18 @@ class HubApi:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]
download_uv_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/' \
f'{channel}?user={user_name}'
download_uv_resp = self.session.post(download_uv_url, cookies=cookies, headers=self.headers)
download_uv_resp = self.session.post(download_uv_url, cookies=cookies,
headers=self.builder_headers(self.headers))
download_uv_resp = download_uv_resp.json()
raise_on_error(download_uv_resp)
except Exception as e:
logger.error(e)
def builder_headers(self, headers):
return {MODELSCOPE_REQUEST_ID: str(uuid.uuid4().hex),
**headers}
class ModelScopeConfig:
path_credential = expanduser(DEFAULT_CREDENTIALS_PATH)

View File

@@ -31,6 +31,7 @@ MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG'
ONE_YEAR_SECONDS = 24 * 365 * 60 * 60
MODEL_META_FILE_NAME = '.mdl'
MODEL_META_MODEL_ID = 'id'
MODELSCOPE_REQUEST_ID = 'X-Request-ID'
class Licenses(object):

View File

@@ -5,6 +5,7 @@ from http import HTTPStatus
import requests
from requests.exceptions import HTTPError
from modelscope.hub.constants import MODELSCOPE_REQUEST_ID
from modelscope.utils.logger import get_logger
logger = get_logger()
@@ -46,6 +47,13 @@ class FileDownloadError(Exception):
pass
def get_request_id(response: requests.Response):
if MODELSCOPE_REQUEST_ID in response.request.headers:
return response.request.headers[MODELSCOPE_REQUEST_ID]
else:
return ''
def is_ok(rsp):
""" Check the request is ok
@@ -71,12 +79,14 @@ def handle_http_post_error(response, url, request_body):
response.raise_for_status()
except HTTPError as error:
message = _decode_response_error(response)
raise HTTPError('Request %s with body: %s exception, '
'Response details: %s' %
(url, request_body, message)) from error
raise HTTPError(
'Request %s with body: %s exception, '
'Response details: %s, request id: %s' %
(url, request_body, message, get_request_id(response))) from error
def handle_http_response(response, logger, cookies, model_id):
def handle_http_response(response: requests.Response, logger, cookies,
model_id):
try:
response.raise_for_status()
except HTTPError as error:
@@ -85,7 +95,8 @@ def handle_http_response(response, logger, cookies, model_id):
f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \
private. Please login first.')
message = _decode_response_error(response)
raise HTTPError('Response details: %s' % message) from error
raise HTTPError('Response details: %s, Request id: %s' %
(message, get_request_id(response))) from error
def raise_on_error(rsp):
@@ -122,9 +133,10 @@ def datahub_raise_on_error(url, rsp):
if rsp.get('Code') == HTTPStatus.OK:
return True
else:
request_id = get_request_id(rsp)
raise RequestError(
f"Url = {url}, Message = {rsp.get('Message')}, Please specify correct dataset_name and namespace."
)
f"Url = {url}, Request id={request_id} Message = {rsp.get('Message')},\
Please specify correct dataset_name and namespace.")
def raise_for_http_status(rsp):
@@ -146,14 +158,14 @@ def raise_for_http_status(rsp):
reason = rsp.reason.decode('iso-8859-1')
else:
reason = rsp.reason
request_id = get_request_id(rsp)
if 400 <= rsp.status_code < 500:
http_error_msg = u'%s Client Error: %s for url: %s' % (rsp.status_code,
reason, rsp.url)
http_error_msg = u'%s Client Error: %s, Request id: %s for url: %s' % (
rsp.status_code, reason, request_id, rsp.url)
elif 500 <= rsp.status_code < 600:
http_error_msg = u'%s Server Error: %s for url: %s' % (rsp.status_code,
reason, rsp.url)
http_error_msg = u'%s Server Error: %s, Request id: %s, for url: %s' % (
rsp.status_code, reason, request_id, rsp.url)
if http_error_msg:
req = rsp.request

View File

@@ -4,6 +4,7 @@ import copy
import os
import tempfile
import threading
import uuid
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from http.cookiejar import CookieJar
@@ -192,6 +193,7 @@ def download_part_with_retry(params):
progress, start, end, url, file_name, cookies, headers = params
get_headers = {} if headers is None else copy.deepcopy(headers)
get_headers['Range'] = 'bytes=%s-%s' % (start, end)
get_headers['X-Request-ID'] = str(uuid.uuid4().hex)
retry = Retry(
total=API_FILE_DOWNLOAD_RETRY_TIMES,
backoff_factor=1,
@@ -289,6 +291,7 @@ def http_get_file(
temp_file_manager = partial(
tempfile.NamedTemporaryFile, mode='wb', dir=local_dir, delete=False)
get_headers = {} if headers is None else copy.deepcopy(headers)
get_headers['X-Request-ID'] = str(uuid.uuid4().hex)
with temp_file_manager() as temp_file:
logger.debug('downloading %s to %s', url, temp_file.name)
# retry sleep 0.5s, 1s, 2s, 4s

View File

@@ -52,11 +52,13 @@ class HubRevisionTest(unittest.TestCase):
self.repo.tag_and_push(self.revision, 'Test revision')
def test_no_tag(self):
with self.assertRaises(NoValidRevisionError):
snapshot_download(self.model_id, None)
# no tag will download master
snapshot_download(self.model_id, None)
# not specified tag will use master
model_file_download(self.model_id, ModelFile.README)
with self.assertRaises(NoValidRevisionError):
model_file_download(self.model_id, ModelFile.README)
# specified master branch
snapshot_download(self.model_id, 'master')
def test_with_only_one_tag(self):
self.prepare_repo_data()