From f451ff8905e1615ec3adb3110fac89d8fe9bb492 Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 11:22:46 +0800 Subject: [PATCH 01/21] api tagging for pipeline/train/evaluate --- modelscope/hub/api.py | 24 ++++++++++++++++++++++++ modelscope/pipelines/base.py | 5 ++++- modelscope/trainers/trainer.py | 7 +++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 7468e5e3..36c246f1 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -646,6 +646,30 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) + def create_library_statistics(self, + method: str, + name: str, + cn_name: Optional[str]): + """ + create library statistics. called by train()/evaluate()/pipeline() + + Args: + method (str): called methed name,i.e train/evaluate/pipeline + name (str): model name, for example: damo/cv_unet_person-image-cartoon_compound-models + cn_name (str): model name in chinese, for example: 达摩卡通化模型 + Raises: + ValueError: If user_cookies is True, but no local cookie. + + Returns: + None + """ + path = f'{self.endpoint}/api/v1/statistics/library' + headers = {'user-agent': ModelScopeConfig.get_user_agent()} + params = {"Method": method, "Name": name, "CnName": cn_name} + r = requests.post(path, params=params, headers=headers) + r.raise_for_status() + return + class ModelScopeConfig: path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index bca80502..b8856dea 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -23,6 +23,7 @@ from modelscope.utils.hub import read_config, snapshot_download from modelscope.utils.import_utils import is_tf_available, is_torch_available from modelscope.utils.logger import get_logger from modelscope.utils.torch_utils import _find_free_port, _is_free_port +from modelscope.hub.api import HubApi from .util import is_model, is_official_hub_path if is_torch_available(): @@ -151,7 +152,9 @@ class Pipeline(ABC): **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function - + _api = HubApi() + model_name = self.cfg.task + _api.create_library_statistics("pipeline", model_name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): if not self._model_prepare: diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 3556badf..6e5f4180 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -39,6 +39,7 @@ from modelscope.utils.logger import get_logger from modelscope.utils.registry import build_from_cfg from modelscope.utils.torch_utils import (get_dist_info, get_local_rank, init_dist, set_random_seed) +from modelscope.hub.api import HubApi from .base import BaseTrainer from .builder import TRAINERS from .default_config import merge_cfg @@ -436,6 +437,9 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN + _api = HubApi() + model_name = self.cfg.task + _api.create_library_statistics("train", model_name, None) if self.train_dataset is None: self.train_dataloader = self.get_train_dataloader() @@ -456,6 +460,9 @@ class EpochBasedTrainer(BaseTrainer): self.train_loop(self.train_dataloader) def evaluate(self, checkpoint_path=None): + _api = HubApi() + model_name = self.cfg.task + _api.create_library_statistics("evaluate", model_name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook CheckpointHook.load_checkpoint(checkpoint_path, self) From a79a900e94d2bff8fd4e3d8843ff065f35ca6096 Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 11:35:28 +0800 Subject: [PATCH 02/21] change api to utils --- modelscope/hub/api.py | 23 ----------------------- modelscope/hub/utils/utils.py | 13 +++++++++++++ modelscope/pipelines/base.py | 5 ++--- modelscope/trainers/trainer.py | 8 +++----- 4 files changed, 18 insertions(+), 31 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 36c246f1..224c55ff 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -646,29 +646,6 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) - def create_library_statistics(self, - method: str, - name: str, - cn_name: Optional[str]): - """ - create library statistics. called by train()/evaluate()/pipeline() - - Args: - method (str): called methed name,i.e train/evaluate/pipeline - name (str): model name, for example: damo/cv_unet_person-image-cartoon_compound-models - cn_name (str): model name in chinese, for example: 达摩卡通化模型 - Raises: - ValueError: If user_cookies is True, but no local cookie. - - Returns: - None - """ - path = f'{self.endpoint}/api/v1/statistics/library' - headers = {'user-agent': ModelScopeConfig.get_user_agent()} - params = {"Method": method, "Name": name, "CnName": cn_name} - r = requests.post(path, params=params, headers=headers) - r.raise_for_status() - return class ModelScopeConfig: diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index a54f3413..8d5db579 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -4,6 +4,7 @@ import hashlib import os from datetime import datetime from typing import Optional +import requests from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, DEFAULT_MODELSCOPE_GROUP, @@ -12,6 +13,7 @@ from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, from modelscope.hub.errors import FileIntegrityError from modelscope.utils.file_utils import get_default_cache_dir from modelscope.utils.logger import get_logger +from modelscope.hub.api import ModelScopeConfig logger = get_logger() @@ -85,3 +87,14 @@ def file_integrity_validation(file_path, expected_sha256): msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path logger.error(msg) raise FileIntegrityError(msg) + + +def create_library_statistics(method: str, + name: str, + cn_name: Optional[str]): + path = f'{get_endpoint()}/api/v1/statistics/library' + headers = {'user-agent': ModelScopeConfig.get_user_agent()} + params = {"Method": method, "Name": name, "CnName": cn_name} + r = requests.post(path, params=params, headers=headers) + r.raise_for_status() + return diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index b8856dea..a56ee934 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -23,7 +23,7 @@ from modelscope.utils.hub import read_config, snapshot_download from modelscope.utils.import_utils import is_tf_available, is_torch_available from modelscope.utils.logger import get_logger from modelscope.utils.torch_utils import _find_free_port, _is_free_port -from modelscope.hub.api import HubApi +from modelscope.hub.utils.utils import create_library_statistics from .util import is_model, is_official_hub_path if is_torch_available(): @@ -152,9 +152,8 @@ class Pipeline(ABC): **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function - _api = HubApi() model_name = self.cfg.task - _api.create_library_statistics("pipeline", model_name, None) + create_library_statistics("pipeline", model_name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): if not self._model_prepare: diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 6e5f4180..92541252 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -39,7 +39,7 @@ from modelscope.utils.logger import get_logger from modelscope.utils.registry import build_from_cfg from modelscope.utils.torch_utils import (get_dist_info, get_local_rank, init_dist, set_random_seed) -from modelscope.hub.api import HubApi +from modelscope.hub.utils.utils import create_library_statistics from .base import BaseTrainer from .builder import TRAINERS from .default_config import merge_cfg @@ -437,9 +437,8 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN - _api = HubApi() model_name = self.cfg.task - _api.create_library_statistics("train", model_name, None) + create_library_statistics("train", model_name, None) if self.train_dataset is None: self.train_dataloader = self.get_train_dataloader() @@ -460,9 +459,8 @@ class EpochBasedTrainer(BaseTrainer): self.train_loop(self.train_dataloader) def evaluate(self, checkpoint_path=None): - _api = HubApi() model_name = self.cfg.task - _api.create_library_statistics("evaluate", model_name, None) + create_library_statistics("evaluate", model_name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook CheckpointHook.load_checkpoint(checkpoint_path, self) From 60af6b701b453fdb09cf1f326f8cfac35fcfa27f Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 11:59:59 +0800 Subject: [PATCH 03/21] fix task to model; handle exception --- modelscope/hub/utils/utils.py | 13 ++++++++----- modelscope/pipelines/base.py | 2 +- modelscope/trainers/trainer.py | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 8d5db579..5c915998 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -92,9 +92,12 @@ def file_integrity_validation(file_path, expected_sha256): def create_library_statistics(method: str, name: str, cn_name: Optional[str]): - path = f'{get_endpoint()}/api/v1/statistics/library' - headers = {'user-agent': ModelScopeConfig.get_user_agent()} - params = {"Method": method, "Name": name, "CnName": cn_name} - r = requests.post(path, params=params, headers=headers) - r.raise_for_status() + try: + path = f'{get_endpoint()}/api/v1/statistics/library' + headers = {'user-agent': ModelScopeConfig.get_user_agent()} + params = {"Method": method, "Name": name, "CnName": cn_name} + r = requests.post(path, params=params, headers=headers) + r.raise_for_status() + except Exception: + pass return diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index a56ee934..9280cc09 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -152,7 +152,7 @@ class Pipeline(ABC): **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function - model_name = self.cfg.task + model_name = self.cfg.model.type create_library_statistics("pipeline", model_name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 92541252..522405ff 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -437,7 +437,7 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN - model_name = self.cfg.task + model_name = self.cfg.model.type create_library_statistics("train", model_name, None) if self.train_dataset is None: @@ -459,7 +459,7 @@ class EpochBasedTrainer(BaseTrainer): self.train_loop(self.train_dataloader) def evaluate(self, checkpoint_path=None): - model_name = self.cfg.task + model_name = self.cfg.model.type create_library_statistics("evaluate", model_name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook From 4080f8071e96d4dbcc5ae8af10b051e14fea30ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 12:57:04 +0800 Subject: [PATCH 04/21] temp --- modelscope/hub/api.py | 11 +++++++++++ modelscope/msdatasets/ms_dataset.py | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 7468e5e3..0262fc1d 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -646,6 +646,17 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) + def count_uv_by_channel(self, dataset_name: str, namespace: str, channel: str): + # todo: 1. check args 2. + + url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}' + cookies = ModelScopeConfig.get_cookies() + r = requests.post(url, cookies=cookies, headers=self.headers) + resp = r.json() + raise_on_error(resp) + print(resp) + return resp['Message'] + class ModelScopeConfig: path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 0c537df7..a7d29990 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -727,3 +727,17 @@ class MsDataset: resp_msg = _delete_manager.delete(object_name=object_name) logger.info(f'Object {object_name} successfully removed!') return resp_msg + + +if __name__ == '__main__': + from modelscope.hub.api import HubApi + api = HubApi() + # api.login('c252d64a-ce7b-4c0c-b583-7bedf628c7da') # online + # api.login('aa14716f-e2de-4f26-bf49-254d81eb8ac6') # test + + channel = 'local' # dsw + dataset_name = 'small_coco_for_test' + namespace = 'wangxingjun778test' + resp = api.count_uv_by_channel( + dataset_name=dataset_name, namespace=namespace, channel=channel) + print(resp) From f5c31b33198288405f209773cd41a5efa1991e50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B9=B2=E5=8A=B2?= Date: Tue, 1 Nov 2022 13:31:25 +0800 Subject: [PATCH 05/21] Add miss init --- .../models/science/unifold/modules/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 modelscope/models/science/unifold/modules/__init__.py diff --git a/modelscope/models/science/unifold/modules/__init__.py b/modelscope/models/science/unifold/modules/__init__.py new file mode 100644 index 00000000..9821d212 --- /dev/null +++ b/modelscope/models/science/unifold/modules/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Data pipeline for model features.""" From 943478de635393e957bb0bf6ad677fdd189ac5c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B9=B2=E5=8A=B2?= Date: Tue, 1 Nov 2022 13:32:57 +0800 Subject: [PATCH 06/21] Update --- .../models/science/unifold/modules/__init__.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/modelscope/models/science/unifold/modules/__init__.py b/modelscope/models/science/unifold/modules/__init__.py index 9821d212..63aa84ed 100644 --- a/modelscope/models/science/unifold/modules/__init__.py +++ b/modelscope/models/science/unifold/modules/__init__.py @@ -1,14 +1,3 @@ -# Copyright 2021 DeepMind Technologies Limited -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Data pipeline for model features.""" +# The Uni-fold implementation is also open-sourced by the authors under Apache-2.0 license, +# and is publicly available at https://github.com/dptech-corp/Uni-Fold. +"""Unifold Modules.""" From 2759d538bb30c8c82d0dd32ea3b4bcd7606d41d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B9=B2=E5=8A=B2?= Date: Tue, 1 Nov 2022 14:59:45 +0800 Subject: [PATCH 07/21] fix ut level for unifold --- tests/pipelines/test_unifold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/test_unifold.py b/tests/pipelines/test_unifold.py index df35dc5e..47bb7874 100644 --- a/tests/pipelines/test_unifold.py +++ b/tests/pipelines/test_unifold.py @@ -19,7 +19,7 @@ class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck): self.protein_multimer = 'GAMGLPEEPSSPQESTLKALSLYEAHLSSYIMYLQTFLVKTKQKVNNKNYPEFTLFDTSKLKKDQTLKSIKT' + \ 'NIAALKNHIDKIKPIAMQIYKKYSKNIP' - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_by_direct_model_download(self): model_dir = snapshot_download(self.model_id) mono_pipeline_ins = pipeline(task=self.task, model=model_dir) From cc76d900bcf2a7aae0a41d02d861f1865aba4b2c Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 15:31:08 +0800 Subject: [PATCH 08/21] add model name to baseModel. use model name as tag --- modelscope/hub/t_jy.py | 16 ++++++++++++++++ modelscope/models/base/base_model.py | 2 ++ modelscope/pipelines/base.py | 5 +++-- modelscope/trainers/trainer.py | 8 ++++---- 4 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 modelscope/hub/t_jy.py diff --git a/modelscope/hub/t_jy.py b/modelscope/hub/t_jy.py new file mode 100644 index 00000000..baf84f46 --- /dev/null +++ b/modelscope/hub/t_jy.py @@ -0,0 +1,16 @@ +def dec(param1): + print(param1) + + def in_dec(func): + def in_func(name): + return func(name) + return in_func + return in_dec + + +@dec("dec1") +def aa(param): + print(param) + return + +aa("heell") \ No newline at end of file diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py index 1ca7e030..721478c3 100644 --- a/modelscope/models/base/base_model.py +++ b/modelscope/models/base/base_model.py @@ -131,6 +131,8 @@ class Model(ABC): if not hasattr(model, 'cfg'): model.cfg = cfg + + model.name = model_name_or_path return model def save_pretrained(self, diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index 9280cc09..b9a4a25c 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -152,8 +152,9 @@ class Pipeline(ABC): **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function - model_name = self.cfg.model.type - create_library_statistics("pipeline", model_name, None) + for single_model in self.models: + if hasattr(single_model, 'name'): + create_library_statistics("pipeline", single_model.name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): if not self._model_prepare: diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 522405ff..2e79667f 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -437,8 +437,8 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN - model_name = self.cfg.model.type - create_library_statistics("train", model_name, None) + if hasattr(self.model, 'name'): + create_library_statistics("train", self.model.name, None) if self.train_dataset is None: self.train_dataloader = self.get_train_dataloader() @@ -459,8 +459,8 @@ class EpochBasedTrainer(BaseTrainer): self.train_loop(self.train_dataloader) def evaluate(self, checkpoint_path=None): - model_name = self.cfg.model.type - create_library_statistics("evaluate", model_name, None) + if hasattr(self.model, 'name'): + create_library_statistics("evaluate", self.model.name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook CheckpointHook.load_checkpoint(checkpoint_path, self) From 184c35f80031574d53019124d56637ddfca4aa66 Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 15:32:04 +0800 Subject: [PATCH 09/21] rm useless --- modelscope/hub/t_jy.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 modelscope/hub/t_jy.py diff --git a/modelscope/hub/t_jy.py b/modelscope/hub/t_jy.py deleted file mode 100644 index baf84f46..00000000 --- a/modelscope/hub/t_jy.py +++ /dev/null @@ -1,16 +0,0 @@ -def dec(param1): - print(param1) - - def in_dec(func): - def in_func(name): - return func(name) - return in_func - return in_dec - - -@dec("dec1") -def aa(param): - print(param) - return - -aa("heell") \ No newline at end of file From 84032f90e3f2b4a183725ceda16a4b1dc204c2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:34:58 +0800 Subject: [PATCH 10/21] add event tracking --- modelscope/hub/api.py | 20 ++++++++++++++------ modelscope/msdatasets/ms_dataset.py | 16 ++-------------- modelscope/utils/constant.py | 8 ++++++++ requirements/framework.txt | 2 +- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 0262fc1d..f2ff822d 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -39,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION, DEFAULT_REPOSITORY_REVISION, MASTER_MODEL_BRANCH, DatasetFormations, - DatasetMetaFormats, DownloadMode, - ModelFile) + DatasetMetaFormats, DownloadChannel, + DownloadMode, ModelFile) from modelscope.utils.logger import get_logger from .utils.utils import (get_endpoint, get_release_datetime, model_id_to_group_owner_name) @@ -646,15 +646,23 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) - def count_uv_by_channel(self, dataset_name: str, namespace: str, channel: str): - # todo: 1. check args 2. + def dataset_download_uv(self, dataset_name: str, namespace: str): + if not dataset_name or not namespace: + raise ValueError('dataset_name or namespace cannot be empty!') - url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}' + # get channel and user_name + channel = DownloadChannel.LOCAL.value + user_name = '' + if MODELSCOPE_ENVIRONMENT in os.environ: + channel = os.environ[MODELSCOPE_ENVIRONMENT] + if MODELSCOPE_USERNAME in os.environ: + user_name = os.environ[MODELSCOPE_USERNAME] + + url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' cookies = ModelScopeConfig.get_cookies() r = requests.post(url, cookies=cookies, headers=self.headers) resp = r.json() raise_on_error(resp) - print(resp) return resp['Message'] diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index a7d29990..5c8ea59f 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -274,6 +274,8 @@ class MsDataset: try: api.on_dataset_download( dataset_name=download_dataset, namespace=namespace) + api.dataset_download_uv( + dataset_name=download_dataset, namespace=namespace) except Exception as e: logger.error(e) @@ -727,17 +729,3 @@ class MsDataset: resp_msg = _delete_manager.delete(object_name=object_name) logger.info(f'Object {object_name} successfully removed!') return resp_msg - - -if __name__ == '__main__': - from modelscope.hub.api import HubApi - api = HubApi() - # api.login('c252d64a-ce7b-4c0c-b583-7bedf628c7da') # online - # api.login('aa14716f-e2de-4f26-bf49-254d81eb8ac6') # test - - channel = 'local' # dsw - dataset_name = 'small_coco_for_test' - namespace = 'wangxingjun778test' - resp = api.count_uv_by_channel( - dataset_name=dataset_name, namespace=namespace, channel=channel) - print(resp) diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 2729b75a..f0a97dbd 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -238,6 +238,14 @@ class DownloadMode(enum.Enum): FORCE_REDOWNLOAD = 'force_redownload' +class DownloadChannel(enum.Enum): + """ Channels of datasets downloading for uv/pv counting. + """ + LOCAL = 'local' + DSW = 'dsw' + EAIS = 'eais' + + class UploadMode(enum.Enum): """ How to upload object to remote. """ diff --git a/requirements/framework.txt b/requirements/framework.txt index 17fbd8a3..e78bc9a9 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,7 +1,7 @@ addict attrs # version beyond 2.6.0 introduces compatbility issue and is being resolved -datasets<=2.6.0 +datasets<=2.5.2 easydict einops filelock>=3.3.0 From 79c44a68102e182b3194e3b9e6244d4891859274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:41:01 +0800 Subject: [PATCH 11/21] add event tracking --- requirements/framework.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index e78bc9a9..a86c0cc5 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -# version beyond 2.6.0 introduces compatbility issue and is being resolved +# version beyond 2.5.2 introduces compatbility issue and is being resolved datasets<=2.5.2 easydict einops From 63a08e7be68bce218eb6ca755ecbc821017d83b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:49:21 +0800 Subject: [PATCH 12/21] add event tracking --- tests/msdatasets/test_dataset_upload.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py index 3d35d480..b67c2ebb 100644 --- a/tests/msdatasets/test_dataset_upload.py +++ b/tests/msdatasets/test_dataset_upload.py @@ -104,7 +104,11 @@ class DatasetUploadTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ds_download_dir(self): - test_ds = MsDataset.load(self.dataset_name, self.namespace) + from modelscope.utils.constant import DownloadMode + test_ds = MsDataset.load( + self.dataset_name, + namespace=self.namespace, + download_mode=DownloadMode.FORCE_REDOWNLOAD) assert test_ds.config_kwargs['split_config'].values() @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') From e45ab2c32d66a3ae8014be045d773719b82cb0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:51:00 +0800 Subject: [PATCH 13/21] add event tracking --- tests/msdatasets/test_dataset_upload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py index b67c2ebb..d91f24d7 100644 --- a/tests/msdatasets/test_dataset_upload.py +++ b/tests/msdatasets/test_dataset_upload.py @@ -8,7 +8,8 @@ import zipfile from modelscope.msdatasets import MsDataset from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects from modelscope.utils import logger as logging -from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile +from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, + ModelFile) from modelscope.utils.test_utils import test_level logger = logging.get_logger(__name__) @@ -104,7 +105,6 @@ class DatasetUploadTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ds_download_dir(self): - from modelscope.utils.constant import DownloadMode test_ds = MsDataset.load( self.dataset_name, namespace=self.namespace, From 5f3c9433fc83bc13fb00d552270e5dc8d6933854 Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 16:35:46 +0800 Subject: [PATCH 14/21] fix format --- modelscope/hub/api.py | 1 - modelscope/hub/utils/utils.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 224c55ff..7468e5e3 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -647,7 +647,6 @@ class HubApi: return self._check_cookie(use_cookies=use_cookies) - class ModelScopeConfig: path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) COOKIES_FILE_NAME = 'cookies' diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 5c915998..312647c2 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -95,7 +95,7 @@ def create_library_statistics(method: str, try: path = f'{get_endpoint()}/api/v1/statistics/library' headers = {'user-agent': ModelScopeConfig.get_user_agent()} - params = {"Method": method, "Name": name, "CnName": cn_name} + params = {'Method': method, 'Name': name, 'CnName': cn_name} r = requests.post(path, params=params, headers=headers) r.raise_for_status() except Exception: From 76bb518d75818ce8e19afa0f0b775b00ac9a72cd Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 16:59:47 +0800 Subject: [PATCH 15/21] fix format --- modelscope/hub/utils/utils.py | 8 +++----- modelscope/trainers/trainer.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 312647c2..f9a75cce 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -2,10 +2,11 @@ import hashlib import os +import requests from datetime import datetime from typing import Optional -import requests +from modelscope.hub.api import ModelScopeConfig from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, @@ -13,7 +14,6 @@ from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, from modelscope.hub.errors import FileIntegrityError from modelscope.utils.file_utils import get_default_cache_dir from modelscope.utils.logger import get_logger -from modelscope.hub.api import ModelScopeConfig logger = get_logger() @@ -89,9 +89,7 @@ def file_integrity_validation(file_path, expected_sha256): raise FileIntegrityError(msg) -def create_library_statistics(method: str, - name: str, - cn_name: Optional[str]): +def create_library_statistics(method: str, name: str, cn_name: Optional[str]): try: path = f'{get_endpoint()}/api/v1/statistics/library' headers = {'user-agent': ModelScopeConfig.get_user_agent()} diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 2e79667f..d59c3dfc 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -14,6 +14,7 @@ from torch.utils.data import DataLoader, Dataset from torch.utils.data.dataloader import default_collate from torch.utils.data.distributed import DistributedSampler +from modelscope.hub.utils.utils import create_library_statistics from modelscope.hub.snapshot_download import snapshot_download from modelscope.metainfo import Trainers from modelscope.metrics import build_metric, task_default_metrics @@ -39,7 +40,6 @@ from modelscope.utils.logger import get_logger from modelscope.utils.registry import build_from_cfg from modelscope.utils.torch_utils import (get_dist_info, get_local_rank, init_dist, set_random_seed) -from modelscope.hub.utils.utils import create_library_statistics from .base import BaseTrainer from .builder import TRAINERS from .default_config import merge_cfg From 30c8c27145261a3e5c7606976e11faef733d3f49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B9=B2=E5=8A=B2?= Date: Tue, 1 Nov 2022 17:06:30 +0800 Subject: [PATCH 16/21] up requirements --- requirements/science.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements/science.txt b/requirements/science.txt index 72994f72..c345da99 100644 --- a/requirements/science.txt +++ b/requirements/science.txt @@ -4,3 +4,5 @@ ml_collections scipy tensorboardX tokenizers +biopython +ipdb From 853e5235d56bf35922cde0db843cb62353e19a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B9=B2=E5=8A=B2?= Date: Tue, 1 Nov 2022 17:32:04 +0800 Subject: [PATCH 17/21] fix requirements --- requirements/science.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/science.txt b/requirements/science.txt index c345da99..636f98f4 100644 --- a/requirements/science.txt +++ b/requirements/science.txt @@ -1,8 +1,8 @@ -iopath +biopython lmdb ml_collections scipy tensorboardX tokenizers -biopython -ipdb +iopath +ipdb \ No newline at end of file From 9ae5b67204e5648eb54e1ea43ca741623c87e1da Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Tue, 1 Nov 2022 17:40:28 +0800 Subject: [PATCH 18/21] fix style issues --- modelscope/hub/utils/utils.py | 3 ++- modelscope/pipelines/base.py | 4 ++-- modelscope/trainers/trainer.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index f9a75cce..d0a87cbd 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -2,10 +2,11 @@ import hashlib import os -import requests from datetime import datetime from typing import Optional +import requests + from modelscope.hub.api import ModelScopeConfig from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, DEFAULT_MODELSCOPE_GROUP, diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index b9a4a25c..68010012 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -10,6 +10,7 @@ from typing import Any, Dict, Generator, List, Mapping, Union import numpy as np +from modelscope.hub.utils.utils import create_library_statistics from modelscope.models.base import Model from modelscope.msdatasets import MsDataset from modelscope.outputs import TASK_OUTPUTS @@ -23,7 +24,6 @@ from modelscope.utils.hub import read_config, snapshot_download from modelscope.utils.import_utils import is_tf_available, is_torch_available from modelscope.utils.logger import get_logger from modelscope.utils.torch_utils import _find_free_port, _is_free_port -from modelscope.hub.utils.utils import create_library_statistics from .util import is_model, is_official_hub_path if is_torch_available(): @@ -154,7 +154,7 @@ class Pipeline(ABC): # modelscope library developer will handle this function for single_model in self.models: if hasattr(single_model, 'name'): - create_library_statistics("pipeline", single_model.name, None) + create_library_statistics('pipeline', single_model.name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): if not self._model_prepare: diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index d59c3dfc..12c25f30 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -14,8 +14,8 @@ from torch.utils.data import DataLoader, Dataset from torch.utils.data.dataloader import default_collate from torch.utils.data.distributed import DistributedSampler -from modelscope.hub.utils.utils import create_library_statistics from modelscope.hub.snapshot_download import snapshot_download +from modelscope.hub.utils.utils import create_library_statistics from modelscope.metainfo import Trainers from modelscope.metrics import build_metric, task_default_metrics from modelscope.models.base import Model, TorchModel @@ -438,7 +438,7 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN if hasattr(self.model, 'name'): - create_library_statistics("train", self.model.name, None) + create_library_statistics('train', self.model.name, None) if self.train_dataset is None: self.train_dataloader = self.get_train_dataloader() @@ -460,7 +460,7 @@ class EpochBasedTrainer(BaseTrainer): def evaluate(self, checkpoint_path=None): if hasattr(self.model, 'name'): - create_library_statistics("evaluate", self.model.name, None) + create_library_statistics('evaluate', self.model.name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook CheckpointHook.load_checkpoint(checkpoint_path, self) From 420b63f03b55d5c2a591fd69cd060ed3a8141ef4 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Tue, 1 Nov 2022 17:44:18 +0800 Subject: [PATCH 19/21] fix style issues --- requirements/science.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/science.txt b/requirements/science.txt index 636f98f4..c30ff644 100644 --- a/requirements/science.txt +++ b/requirements/science.txt @@ -1,8 +1,8 @@ biopython +iopath +ipdb lmdb ml_collections scipy tensorboardX tokenizers -iopath -ipdb \ No newline at end of file From aecb88044eba1789a675f22a32cc6f2eed71b91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B9=B2=E5=8A=B2?= Date: Tue, 1 Nov 2022 17:44:37 +0800 Subject: [PATCH 20/21] up --- requirements/science.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/science.txt b/requirements/science.txt index 636f98f4..c30ff644 100644 --- a/requirements/science.txt +++ b/requirements/science.txt @@ -1,8 +1,8 @@ biopython +iopath +ipdb lmdb ml_collections scipy tensorboardX tokenizers -iopath -ipdb \ No newline at end of file From f2faf3acb38e3ccb6e62379e4314f00c844db36f Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Tue, 1 Nov 2022 18:04:48 +0800 Subject: [PATCH 21/21] fix import bug --- modelscope/hub/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index d0a87cbd..61d560fa 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -7,7 +7,6 @@ from typing import Optional import requests -from modelscope.hub.api import ModelScopeConfig from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, @@ -92,6 +91,7 @@ def file_integrity_validation(file_path, expected_sha256): def create_library_statistics(method: str, name: str, cn_name: Optional[str]): try: + from modelscope.hub.api import ModelScopeConfig path = f'{get_endpoint()}/api/v1/statistics/library' headers = {'user-agent': ModelScopeConfig.get_user_agent()} params = {'Method': method, 'Name': name, 'CnName': cn_name}