From 8a51225cb8708b941c4ea39ea7a5f7ff99c34fad Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Mon, 27 May 2024 10:15:45 +0800
Subject: [PATCH] fix error report

---
 modelscope/hub/api.py                         |   2 +-
 modelscope/hub/errors.py                      |  45 ++-
 modelscope/hub/snapshot_download.py           |   3 +-
 modelscope/models/audio/sv/TDNN.py            | 303 ------------------
 .../models/audio/sv/{tdnn.py => xvector.py}   |   0
 tests/hub/test_hub_operation.py               |  37 +++
 6 files changed, 74 insertions(+), 316 deletions(-)
 delete mode 100644 modelscope/models/audio/sv/TDNN.py
 rename modelscope/models/audio/sv/{tdnn.py => xvector.py} (100%)

diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index 5cae4f32..f235a62d 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -404,7 +404,7 @@ class HubApi:
             (owner_or_group, page_number, page_size),
             cookies=cookies,
             headers=self.builder_headers(self.headers))
-        handle_http_response(r, logger, cookies, 'list_model')
+        handle_http_response(r, logger, cookies, owner_or_group)
         if r.status_code == HTTPStatus.OK:
             if is_ok(r.json()):
                 data = r.json()[API_RESPONSE_FIELD_DATA]
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
index 804cfe27..6831bd8a 100644
--- a/modelscope/hub/errors.py
+++ b/modelscope/hub/errors.py
@@ -87,16 +87,34 @@ def handle_http_post_error(response, url, request_body):
 
 def handle_http_response(response: requests.Response, logger, cookies,
                          model_id):
-    try:
-        response.raise_for_status()
-    except HTTPError as error:
-        if cookies is None:  # code in [403] and
-            logger.error(
-                f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \
-                private. Please login first.')
-        message = _decode_response_error(response)
-        raise HTTPError('Response details: %s, Request id: %s' %
-                        (message, get_request_id(response))) from error
+    http_error_msg = ''
+    if isinstance(response.reason, bytes):
+        try:
+            reason = response.reason.decode('utf-8')
+        except UnicodeDecodeError:
+            reason = response.reason.decode('iso-8859-1')
+    else:
+        reason = response.reason
+    request_id = get_request_id(response)
+    if 404 == response.status_code:
+        http_error_msg = 'The request model: %s does not exist!' % (model_id)
+    elif 403 == response.status_code:
+        if cookies is None:
+            http_error_msg = 'Authentication token does not exist, '
+            'failed to access model {model_id} which may not exist or may be '
+            'private. Please login first.'
+        else:
+            http_error_msg = 'The authentication token is invalid, failed to access model {model_id}.'
+    elif 400 <= response.status_code < 500:
+        http_error_msg = u'%s Client Error: %s, Request id: %s for url: %s' % (
+            response.status_code, reason, request_id, response.url)
+
+    elif 500 <= response.status_code < 600:
+        http_error_msg = u'%s Server Error: %s, Request id: %s, for url: %s' % (
+            response.status_code, reason, request_id, response.url)
+    if http_error_msg:  # there is error.
+        logger.error(http_error_msg)
+        raise HTTPError(http_error_msg, response=response)
 
 
 def raise_on_error(rsp):
@@ -160,7 +178,12 @@ def raise_for_http_status(rsp):
     else:
         reason = rsp.reason
     request_id = get_request_id(rsp)
-    if 400 <= rsp.status_code < 500:
+    if 404 == rsp.status_code:
+        http_error_msg = 'The request resource(model or dataset) does not exist!,'
+        'url: %s, reason: %s' % (rsp.url, reason)
+    elif 403 == rsp.status_code:
+        http_error_msg = 'Authentication token does not exist or invalid.'
+    elif 400 <= rsp.status_code < 500:
         http_error_msg = u'%s Client Error: %s, Request id: %s for url: %s' % (
             rsp.status_code, reason, request_id, rsp.url)
 
diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py
index ded40ba4..2ede9621 100644
--- a/modelscope/hub/snapshot_download.py
+++ b/modelscope/hub/snapshot_download.py
@@ -43,7 +43,8 @@ def snapshot_download(
         model_id (str): A user or an organization name and a repo name separated by a `/`.
         revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
             commit hash. NOTE: currently only branch and tag name is supported
-        cache_dir (str, Path, optional): Path to the folder where cached files are stored.
+        cache_dir (str, Path, optional): Path to the folder where cached files are stored, model will
+            be save as cache_dir/model_id/THE_MODEL_FILES.
         user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
         local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
             local cached file if it exists.
diff --git a/modelscope/models/audio/sv/TDNN.py b/modelscope/models/audio/sv/TDNN.py
deleted file mode 100644
index 9cc35c1f..00000000
--- a/modelscope/models/audio/sv/TDNN.py
+++ /dev/null
@@ -1,303 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Conv1d_O(nn.Module):
-
-    def __init__(
-        self,
-        out_channels,
-        kernel_size,
-        input_shape=None,
-        in_channels=None,
-        stride=1,
-        dilation=1,
-        padding='same',
-        groups=1,
-        bias=True,
-        padding_mode='reflect',
-        skip_transpose=False,
-    ):
-        super().__init__()
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.dilation = dilation
-        self.padding = padding
-        self.padding_mode = padding_mode
-        self.unsqueeze = False
-        self.skip_transpose = skip_transpose
-
-        if input_shape is None and in_channels is None:
-            raise ValueError('Must provide one of input_shape or in_channels')
-
-        if in_channels is None:
-            in_channels = self._check_input_shape(input_shape)
-
-        self.conv = nn.Conv1d(
-            in_channels,
-            out_channels,
-            self.kernel_size,
-            stride=self.stride,
-            dilation=self.dilation,
-            padding=0,
-            groups=groups,
-            bias=bias,
-        )
-
-    def forward(self, x):
-        """Returns the output of the convolution.
-
-        Arguments
-        ---------
-        x : torch.Tensor (batch, time, channel)
-            input to convolve. 2d or 4d tensors are expected.
-        """
-
-        if not self.skip_transpose:
-            x = x.transpose(1, -1)
-
-        if self.unsqueeze:
-            x = x.unsqueeze(1)
-
-        if self.padding == 'same':
-            x = self._manage_padding(x, self.kernel_size, self.dilation,
-                                     self.stride)
-
-        elif self.padding == 'causal':
-            num_pad = (self.kernel_size - 1) * self.dilation
-            x = F.pad(x, (num_pad, 0))
-
-        elif self.padding == 'valid':
-            pass
-
-        else:
-            raise ValueError(
-                "Padding must be 'same', 'valid' or 'causal'. Got "
-                + self.padding)
-
-        wx = self.conv(x)
-
-        if self.unsqueeze:
-            wx = wx.squeeze(1)
-
-        if not self.skip_transpose:
-            wx = wx.transpose(1, -1)
-
-        return wx
-
-    def _manage_padding(
-        self,
-        x,
-        kernel_size: int,
-        dilation: int,
-        stride: int,
-    ):
-        # Detecting input shape
-        L_in = x.shape[-1]
-
-        # Time padding
-        padding = get_padding_elem(L_in, stride, kernel_size, dilation)
-
-        # Applying padding
-        x = F.pad(x, padding, mode=self.padding_mode)
-
-        return x
-
-    def _check_input_shape(self, shape):
-        """Checks the input shape and returns the number of input channels.
-        """
-
-        if len(shape) == 2:
-            self.unsqueeze = True
-            in_channels = 1
-        elif self.skip_transpose:
-            in_channels = shape[1]
-        elif len(shape) == 3:
-            in_channels = shape[2]
-        else:
-            raise ValueError('conv1d expects 2d, 3d inputs. Got '
-                             + str(len(shape)))
-
-        # Kernel size must be odd
-        if self.kernel_size % 2 == 0:
-            raise ValueError(
-                'The field kernel size must be an odd number. Got %s.' %
-                (self.kernel_size))
-        return in_channels
-
-
-# Skip transpose as much as possible for efficiency
-class Conv1d(Conv1d_O):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(skip_transpose=True, *args, **kwargs)
-
-
-def get_padding_elem(L_in: int, stride: int, kernel_size: int, dilation: int):
-    """This function computes the number of elements to add for zero-padding.
-
-    Arguments
-    ---------
-    L_in : int
-    stride: int
-    kernel_size : int
-    dilation : int
-    """
-    if stride > 1:
-        n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1)
-        L_out = stride * (n_steps - 1) + kernel_size * dilation
-        padding = [kernel_size // 2, kernel_size // 2]
-
-    else:
-        L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1
-
-        padding = [(L_in - L_out) // 2, (L_in - L_out) // 2]
-    return padding
-
-
-class BatchNorm1d_O(nn.Module):
-
-    def __init__(
-        self,
-        input_shape=None,
-        input_size=None,
-        eps=1e-05,
-        momentum=0.1,
-        affine=True,
-        track_running_stats=True,
-        combine_batch_time=False,
-        skip_transpose=False,
-    ):
-        super().__init__()
-        self.combine_batch_time = combine_batch_time
-        self.skip_transpose = skip_transpose
-
-        if input_size is None and skip_transpose:
-            input_size = input_shape[1]
-        elif input_size is None:
-            input_size = input_shape[-1]
-
-        self.norm = nn.BatchNorm1d(
-            input_size,
-            eps=eps,
-            momentum=momentum,
-            affine=affine,
-            track_running_stats=track_running_stats,
-        )
-
-    def forward(self, x):
-        """Returns the normalized input tensor.
-
-        Arguments
-        ---------
-        x : torch.Tensor (batch, time, [channels])
-            input to normalize. 2d or 3d tensors are expected in input
-            4d tensors can be used when combine_dims=True.
-        """
-        shape_or = x.shape
-        if self.combine_batch_time:
-            if x.ndim == 3:
-                x = x.reshape(shape_or[0] * shape_or[1], shape_or[2])
-            else:
-                x = x.reshape(shape_or[0] * shape_or[1], shape_or[3],
-                              shape_or[2])
-
-        elif not self.skip_transpose:
-            x = x.transpose(-1, 1)
-
-        x_n = self.norm(x)
-
-        if self.combine_batch_time:
-            x_n = x_n.reshape(shape_or)
-        elif not self.skip_transpose:
-            x_n = x_n.transpose(1, -1)
-
-        return x_n
-
-
-class BatchNorm1d(BatchNorm1d_O):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(skip_transpose=True, *args, **kwargs)
-
-
-class Xvector(torch.nn.Module):
-    """This model extracts X-vectors for speaker recognition and diarization.
-
-    Arguments
-    ---------
-    device : str
-        Device used e.g. "cpu" or "cuda".
-    activation : torch class
-        A class for constructing the activation layers.
-    tdnn_blocks : int
-        Number of time-delay neural (TDNN) layers.
-    tdnn_channels : list of ints
-        Output channels for TDNN layer.
-    tdnn_kernel_sizes : list of ints
-        List of kernel sizes for each TDNN layer.
-    tdnn_dilations : list of ints
-        List of dilations for kernels in each TDNN layer.
-    lin_neurons : int
-        Number of neurons in linear layers.
-
-    Example
-    -------
-    >>> compute_xvect = Xvector('cpu')
-    >>> input_feats = torch.rand([5, 10, 40])
-    >>> outputs = compute_xvect(input_feats)
-    >>> outputs.shape
-    torch.Size([5, 1, 512])
-    """
-
-    def __init__(
-        self,
-        device='cpu',
-        activation=torch.nn.LeakyReLU,
-        tdnn_blocks=5,
-        tdnn_channels=[512, 512, 512, 512, 1500],
-        tdnn_kernel_sizes=[5, 3, 3, 1, 1],
-        tdnn_dilations=[1, 2, 3, 1, 1],
-        lin_neurons=512,
-        in_channels=80,
-    ):
-
-        super().__init__()
-        self.blocks = nn.ModuleList()
-
-        # TDNN layers
-        for block_index in range(tdnn_blocks):
-            out_channels = tdnn_channels[block_index]
-            self.blocks.extend([
-                Conv1d(
-                    in_channels=in_channels,
-                    out_channels=out_channels,
-                    kernel_size=tdnn_kernel_sizes[block_index],
-                    dilation=tdnn_dilations[block_index],
-                ),
-                activation(),
-                BatchNorm1d(input_size=out_channels),
-            ])
-            in_channels = tdnn_channels[block_index]
-
-    def forward(self, x, lens=None):
-        """Returns the x-vectors.
-
-        Arguments
-        ---------
-        x : torch.Tensor
-        """
-
-        x = x.transpose(1, 2)
-
-        for layer in self.blocks:
-            try:
-                x = layer(x, lengths=lens)
-            except TypeError:
-                x = layer(x)
-        x = x.transpose(1, 2)
-        return x
diff --git a/modelscope/models/audio/sv/tdnn.py b/modelscope/models/audio/sv/xvector.py
similarity index 100%
rename from modelscope/models/audio/sv/tdnn.py
rename to modelscope/models/audio/sv/xvector.py
diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py
index a22aaa64..a337accb 100644
--- a/tests/hub/test_hub_operation.py
+++ b/tests/hub/test_hub_operation.py
@@ -1,8 +1,10 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os
+import shutil
 import tempfile
 import unittest
 import uuid
+from pathlib import Path
 from shutil import rmtree
 
 import requests
@@ -13,6 +15,7 @@ from modelscope.hub.file_download import model_file_download
 from modelscope.hub.repository import Repository
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.utils.constant import ModelFile
+from modelscope.utils.file_utils import get_model_cache_dir
 from modelscope.utils.test_utils import (TEST_ACCESS_TOKEN1,
                                          TEST_MODEL_CHINESE_NAME,
                                          TEST_MODEL_ORG)
@@ -148,6 +151,40 @@ class HubOperationTest(unittest.TestCase):
         data = self.api.list_models(TEST_MODEL_ORG)
         assert len(data['Models']) >= 1
 
+    def test_snapshot_download_location(self):
+        self.prepare_case()
+        snapshot_download_path = snapshot_download(
+            model_id=self.model_id, revision=self.revision)
+        assert os.path.exists(snapshot_download_path)
+        assert '/hub/' in snapshot_download_path
+        print(snapshot_download_path)
+        shutil.rmtree(snapshot_download_path)
+        # download with cache_dir
+        cache_dir = '/tmp/snapshot_download_cache_test'
+        snapshot_download_path = snapshot_download(
+            self.model_id, revision=self.revision, cache_dir=cache_dir)
+        expect_path = os.path.join(cache_dir, self.model_id)
+        assert snapshot_download_path == expect_path
+        assert os.path.exists(
+            os.path.join(snapshot_download_path, ModelFile.README))
+        shutil.rmtree(cache_dir)
+        # download with local_dir
+        local_dir = '/tmp/snapshot_download_local_dir'
+        snapshot_download_path = snapshot_download(
+            self.model_id, revision=self.revision, local_dir=local_dir)
+        assert snapshot_download_path == local_dir
+        assert os.path.exists(os.path.join(local_dir, ModelFile.README))
+        shutil.rmtree(local_dir)
+        # download with local_dir and cache dir, with local first.
+        local_dir = '/tmp/snapshot_download_local_dir'
+        snapshot_download_path = snapshot_download(
+            self.model_id,
+            revision=self.revision,
+            cache_dir=cache_dir,
+            local_dir=local_dir)
+        assert snapshot_download_path == local_dir
+        assert os.path.exists(os.path.join(local_dir, ModelFile.README))
+
 
 if __name__ == '__main__':
     unittest.main()