diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 7468e5e3..f2ff822d 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -39,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION, DEFAULT_REPOSITORY_REVISION, MASTER_MODEL_BRANCH, DatasetFormations, - DatasetMetaFormats, DownloadMode, - ModelFile) + DatasetMetaFormats, DownloadChannel, + DownloadMode, ModelFile) from modelscope.utils.logger import get_logger from .utils.utils import (get_endpoint, get_release_datetime, model_id_to_group_owner_name) @@ -646,6 +646,25 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) + def dataset_download_uv(self, dataset_name: str, namespace: str): + if not dataset_name or not namespace: + raise ValueError('dataset_name or namespace cannot be empty!') + + # get channel and user_name + channel = DownloadChannel.LOCAL.value + user_name = '' + if MODELSCOPE_ENVIRONMENT in os.environ: + channel = os.environ[MODELSCOPE_ENVIRONMENT] + if MODELSCOPE_USERNAME in os.environ: + user_name = os.environ[MODELSCOPE_USERNAME] + + url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' + cookies = ModelScopeConfig.get_cookies() + r = requests.post(url, cookies=cookies, headers=self.headers) + resp = r.json() + raise_on_error(resp) + return resp['Message'] + class ModelScopeConfig: path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index a54f3413..61d560fa 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -5,6 +5,8 @@ import os from datetime import datetime from typing import Optional +import requests + from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, DEFAULT_MODELSCOPE_GROUP, MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, @@ -85,3 +87,16 @@ def file_integrity_validation(file_path, expected_sha256): msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path logger.error(msg) raise FileIntegrityError(msg) + + +def create_library_statistics(method: str, name: str, cn_name: Optional[str]): + try: + from modelscope.hub.api import ModelScopeConfig + path = f'{get_endpoint()}/api/v1/statistics/library' + headers = {'user-agent': ModelScopeConfig.get_user_agent()} + params = {'Method': method, 'Name': name, 'CnName': cn_name} + r = requests.post(path, params=params, headers=headers) + r.raise_for_status() + except Exception: + pass + return diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py index 1ca7e030..721478c3 100644 --- a/modelscope/models/base/base_model.py +++ b/modelscope/models/base/base_model.py @@ -131,6 +131,8 @@ class Model(ABC): if not hasattr(model, 'cfg'): model.cfg = cfg + + model.name = model_name_or_path return model def save_pretrained(self, diff --git a/modelscope/models/science/unifold/modules/__init__.py b/modelscope/models/science/unifold/modules/__init__.py new file mode 100644 index 00000000..63aa84ed --- /dev/null +++ b/modelscope/models/science/unifold/modules/__init__.py @@ -0,0 +1,3 @@ +# The Uni-fold implementation is also open-sourced by the authors under Apache-2.0 license, +# and is publicly available at https://github.com/dptech-corp/Uni-Fold. +"""Unifold Modules.""" diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 0c537df7..5c8ea59f 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -274,6 +274,8 @@ class MsDataset: try: api.on_dataset_download( dataset_name=download_dataset, namespace=namespace) + api.dataset_download_uv( + dataset_name=download_dataset, namespace=namespace) except Exception as e: logger.error(e) diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index bca80502..68010012 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -10,6 +10,7 @@ from typing import Any, Dict, Generator, List, Mapping, Union import numpy as np +from modelscope.hub.utils.utils import create_library_statistics from modelscope.models.base import Model from modelscope.msdatasets import MsDataset from modelscope.outputs import TASK_OUTPUTS @@ -151,7 +152,9 @@ class Pipeline(ABC): **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function - + for single_model in self.models: + if hasattr(single_model, 'name'): + create_library_statistics('pipeline', single_model.name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): if not self._model_prepare: diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 3556badf..12c25f30 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -15,6 +15,7 @@ from torch.utils.data.dataloader import default_collate from torch.utils.data.distributed import DistributedSampler from modelscope.hub.snapshot_download import snapshot_download +from modelscope.hub.utils.utils import create_library_statistics from modelscope.metainfo import Trainers from modelscope.metrics import build_metric, task_default_metrics from modelscope.models.base import Model, TorchModel @@ -436,6 +437,8 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN + if hasattr(self.model, 'name'): + create_library_statistics('train', self.model.name, None) if self.train_dataset is None: self.train_dataloader = self.get_train_dataloader() @@ -456,6 +459,8 @@ class EpochBasedTrainer(BaseTrainer): self.train_loop(self.train_dataloader) def evaluate(self, checkpoint_path=None): + if hasattr(self.model, 'name'): + create_library_statistics('evaluate', self.model.name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook CheckpointHook.load_checkpoint(checkpoint_path, self) diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 2729b75a..f0a97dbd 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -238,6 +238,14 @@ class DownloadMode(enum.Enum): FORCE_REDOWNLOAD = 'force_redownload' +class DownloadChannel(enum.Enum): + """ Channels of datasets downloading for uv/pv counting. + """ + LOCAL = 'local' + DSW = 'dsw' + EAIS = 'eais' + + class UploadMode(enum.Enum): """ How to upload object to remote. """ diff --git a/requirements/framework.txt b/requirements/framework.txt index 17fbd8a3..a86c0cc5 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,7 +1,7 @@ addict attrs -# version beyond 2.6.0 introduces compatbility issue and is being resolved -datasets<=2.6.0 +# version beyond 2.5.2 introduces compatbility issue and is being resolved +datasets<=2.5.2 easydict einops filelock>=3.3.0 diff --git a/requirements/science.txt b/requirements/science.txt index 72994f72..c30ff644 100644 --- a/requirements/science.txt +++ b/requirements/science.txt @@ -1,4 +1,6 @@ +biopython iopath +ipdb lmdb ml_collections scipy diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py index 3d35d480..d91f24d7 100644 --- a/tests/msdatasets/test_dataset_upload.py +++ b/tests/msdatasets/test_dataset_upload.py @@ -8,7 +8,8 @@ import zipfile from modelscope.msdatasets import MsDataset from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects from modelscope.utils import logger as logging -from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile +from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, + ModelFile) from modelscope.utils.test_utils import test_level logger = logging.get_logger(__name__) @@ -104,7 +105,10 @@ class DatasetUploadTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ds_download_dir(self): - test_ds = MsDataset.load(self.dataset_name, self.namespace) + test_ds = MsDataset.load( + self.dataset_name, + namespace=self.namespace, + download_mode=DownloadMode.FORCE_REDOWNLOAD) assert test_ds.config_kwargs['split_config'].values() @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') diff --git a/tests/pipelines/test_unifold.py b/tests/pipelines/test_unifold.py index df35dc5e..47bb7874 100644 --- a/tests/pipelines/test_unifold.py +++ b/tests/pipelines/test_unifold.py @@ -19,7 +19,7 @@ class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck): self.protein_multimer = 'GAMGLPEEPSSPQESTLKALSLYEAHLSSYIMYLQTFLVKTKQKVNNKNYPEFTLFDTSKLKKDQTLKSIKT' + \ 'NIAALKNHIDKIKPIAMQIYKKYSKNIP' - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_by_direct_model_download(self): model_dir = snapshot_download(self.model_id) mono_pipeline_ins = pipeline(task=self.task, model=model_dir)