Merge branch 'master' into release/1.0

2026-02-24 20:19:51 +01:00 · 2022-11-01 21:27:32 +08:00
parent 413957fdf8 21383fa25e
commit c407edd445
12 changed files with 71 additions and 8 deletions
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -39,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
                                       DEFAULT_MODEL_REVISION,
                                       DEFAULT_REPOSITORY_REVISION,
                                       MASTER_MODEL_BRANCH, DatasetFormations,
-                                       DatasetMetaFormats, DownloadMode,
-                                       ModelFile)
+                                       DatasetMetaFormats, DownloadChannel,
+                                       DownloadMode, ModelFile)
 from modelscope.utils.logger import get_logger
 from .utils.utils import (get_endpoint, get_release_datetime,
                          model_id_to_group_owner_name)
@@ -646,6 +646,25 @@ class HubApi:
    def check_local_cookies(self, use_cookies) -> CookieJar:
        return self._check_cookie(use_cookies=use_cookies)

+    def dataset_download_uv(self, dataset_name: str, namespace: str):
+        if not dataset_name or not namespace:
+            raise ValueError('dataset_name or namespace cannot be empty!')
+
+        # get channel and user_name
+        channel = DownloadChannel.LOCAL.value
+        user_name = ''
+        if MODELSCOPE_ENVIRONMENT in os.environ:
+            channel = os.environ[MODELSCOPE_ENVIRONMENT]
+        if MODELSCOPE_USERNAME in os.environ:
+            user_name = os.environ[MODELSCOPE_USERNAME]
+
+        url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}'
+        cookies = ModelScopeConfig.get_cookies()
+        r = requests.post(url, cookies=cookies, headers=self.headers)
+        resp = r.json()
+        raise_on_error(resp)
+        return resp['Message']
+

 class ModelScopeConfig:
    path_credential = expanduser(DEFAULT_CREDENTIALS_PATH)
--- a/modelscope/hub/utils/utils.py
+++ b/modelscope/hub/utils/utils.py
@@ -5,6 +5,8 @@ import os
 from datetime import datetime
 from typing import Optional

+import requests
+
 from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN,
                                      DEFAULT_MODELSCOPE_GROUP,
                                      MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG,
@@ -85,3 +87,16 @@ def file_integrity_validation(file_path, expected_sha256):
        msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path
        logger.error(msg)
        raise FileIntegrityError(msg)
+
+
+def create_library_statistics(method: str, name: str, cn_name: Optional[str]):
+    try:
+        from modelscope.hub.api import ModelScopeConfig
+        path = f'{get_endpoint()}/api/v1/statistics/library'
+        headers = {'user-agent': ModelScopeConfig.get_user_agent()}
+        params = {'Method': method, 'Name': name, 'CnName': cn_name}
+        r = requests.post(path, params=params, headers=headers)
+        r.raise_for_status()
+    except Exception:
+        pass
+    return
--- a/modelscope/models/base/base_model.py
+++ b/modelscope/models/base/base_model.py
@@ -131,6 +131,8 @@ class Model(ABC):

        if not hasattr(model, 'cfg'):
            model.cfg = cfg
+
+        model.name = model_name_or_path
        return model

    def save_pretrained(self,
--- a/modelscope/models/science/unifold/modules/init.py
+++ b/modelscope/models/science/unifold/modules/init.py
@@ -0,0 +1,3 @@
+# The Uni-fold implementation is also open-sourced by the authors under Apache-2.0 license,
+# and is publicly available at https://github.com/dptech-corp/Uni-Fold.
+"""Unifold Modules."""
--- a/modelscope/msdatasets/ms_dataset.py
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -274,6 +274,8 @@ class MsDataset:
            try:
                api.on_dataset_download(
                    dataset_name=download_dataset, namespace=namespace)
+                api.dataset_download_uv(
+                    dataset_name=download_dataset, namespace=namespace)
            except Exception as e:
                logger.error(e)

--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -10,6 +10,7 @@ from typing import Any, Dict, Generator, List, Mapping, Union

 import numpy as np

+from modelscope.hub.utils.utils import create_library_statistics
 from modelscope.models.base import Model
 from modelscope.msdatasets import MsDataset
 from modelscope.outputs import TASK_OUTPUTS
@@ -151,7 +152,9 @@ class Pipeline(ABC):
                 **kwargs) -> Union[Dict[str, Any], Generator]:
        # model provider should leave it as it is
        # modelscope library developer will handle this function
-
+        for single_model in self.models:
+            if hasattr(single_model, 'name'):
+                create_library_statistics('pipeline', single_model.name, None)
        # place model to cpu or gpu
        if (self.model or (self.has_multiple_models and self.models[0])):
            if not self._model_prepare:
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -15,6 +15,7 @@ from torch.utils.data.dataloader import default_collate
 from torch.utils.data.distributed import DistributedSampler

 from modelscope.hub.snapshot_download import snapshot_download
+from modelscope.hub.utils.utils import create_library_statistics
 from modelscope.metainfo import Trainers
 from modelscope.metrics import build_metric, task_default_metrics
 from modelscope.models.base import Model, TorchModel
@@ -436,6 +437,8 @@ class EpochBasedTrainer(BaseTrainer):

    def train(self, checkpoint_path=None, *args, **kwargs):
        self._mode = ModeKeys.TRAIN
+        if hasattr(self.model, 'name'):
+            create_library_statistics('train', self.model.name, None)

        if self.train_dataset is None:
            self.train_dataloader = self.get_train_dataloader()
@@ -456,6 +459,8 @@ class EpochBasedTrainer(BaseTrainer):
        self.train_loop(self.train_dataloader)

    def evaluate(self, checkpoint_path=None):
+        if hasattr(self.model, 'name'):
+            create_library_statistics('evaluate', self.model.name, None)
        if checkpoint_path is not None and os.path.isfile(checkpoint_path):
            from modelscope.trainers.hooks import CheckpointHook
            CheckpointHook.load_checkpoint(checkpoint_path, self)
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -238,6 +238,14 @@ class DownloadMode(enum.Enum):
    FORCE_REDOWNLOAD = 'force_redownload'


+class DownloadChannel(enum.Enum):
+    """ Channels of datasets downloading for uv/pv counting.
+    """
+    LOCAL = 'local'
+    DSW = 'dsw'
+    EAIS = 'eais'
+
+
 class UploadMode(enum.Enum):
    """ How to upload object to remote.
    """
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -1,7 +1,7 @@
 addict
 attrs
-# version beyond 2.6.0 introduces compatbility issue and is being resolved
-datasets<=2.6.0
+# version beyond 2.5.2 introduces compatbility issue and is being resolved
+datasets<=2.5.2
 easydict
 einops
 filelock>=3.3.0
--- a/requirements/science.txt
+++ b/requirements/science.txt
@@ -1,4 +1,6 @@
+biopython
 iopath
+ipdb
 lmdb
 ml_collections
 scipy
--- a/tests/msdatasets/test_dataset_upload.py
+++ b/tests/msdatasets/test_dataset_upload.py
@@ -8,7 +8,8 @@ import zipfile
 from modelscope.msdatasets import MsDataset
 from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
 from modelscope.utils import logger as logging
-from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile
+from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode,
+                                       ModelFile)
 from modelscope.utils.test_utils import test_level

 logger = logging.get_logger(__name__)
@@ -104,7 +105,10 @@ class DatasetUploadTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_ds_download_dir(self):
-        test_ds = MsDataset.load(self.dataset_name, self.namespace)
+        test_ds = MsDataset.load(
+            self.dataset_name,
+            namespace=self.namespace,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD)
        assert test_ds.config_kwargs['split_config'].values()

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
--- a/tests/pipelines/test_unifold.py
+++ b/tests/pipelines/test_unifold.py
@@ -19,7 +19,7 @@ class UnifoldProteinStructureTest(unittest.TestCase, DemoCompatibilityCheck):
        self.protein_multimer = 'GAMGLPEEPSSPQESTLKALSLYEAHLSSYIMYLQTFLVKTKQKVNNKNYPEFTLFDTSKLKKDQTLKSIKT' + \
            'NIAALKNHIDKIKPIAMQIYKKYSKNIP'

-    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_by_direct_model_download(self):
        model_dir = snapshot_download(self.model_id)
        mono_pipeline_ins = pipeline(task=self.task, model=model_dir)