From 7e3fc3c1aa62e2724acde294c43ae13fa65b4829 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 26 Sep 2023 19:37:51 +0800
Subject: [PATCH 01/36] fix hf bug (#567)

---
 modelscope/utils/hf_util.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py
index 3abcce6d..5722928a 100644
--- a/modelscope/utils/hf_util.py
+++ b/modelscope/utils/hf_util.py
@@ -91,12 +91,13 @@ def check_hf_code(model_dir: str, auto_class: type,
         raise FileNotFoundError(f'{config_path} is not found')
     config_dict = PretrainedConfig.get_config_dict(config_path)[0]
     auto_class_name = auto_class.__name__
+    if auto_class is AutoTokenizerHF:
+        tokenizer_config = get_tokenizer_config(model_dir)
     # load from repo
     if trust_remote_code:
         has_remote_code = False
         if auto_class is AutoTokenizerHF:
-            tokenizer_config_dict = get_tokenizer_config(model_dir)
-            auto_map = tokenizer_config_dict.get('auto_map', None)
+            auto_map = tokenizer_config.get('auto_map', None)
             if auto_map is not None:
                 module_name = auto_map.get(auto_class_name, None)
                 if module_name is not None:
@@ -129,7 +130,10 @@ def check_hf_code(model_dir: str, auto_class: type,
                 f'{model_type} not found in HF `CONFIG_MAPPING`{trust_remote_code_info}'
             )
     elif auto_class is AutoTokenizerHF:
-        if model_type not in TOKENIZER_MAPPING_NAMES:
+        tokenizer_class = tokenizer_config.get('tokenizer_class')
+        if tokenizer_class is not None:
+            return
+        if model_type in TOKENIZER_MAPPING_NAMES:
             raise ValueError(
                 f'{model_type} not found in HF `TOKENIZER_MAPPING_NAMES`{trust_remote_code_info}'
             )

From 29f4445dc270b552d8dcc2cd37d611fdced59a4c Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Tue, 26 Sep 2023 21:23:38 +0800
Subject: [PATCH 02/36] modify docker file

---
 docker/Dockerfile.ubuntu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index e5247774..2af8994b 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -48,7 +48,7 @@ ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 RUN CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git'
 
 # torchmetrics==0.11.4 for ofa
-RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 https://modelscope.oss-cn-beijing.aliyuncs.com/releases/v/ms_swift-1.1.0-py3-none-any.whl transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
+RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
 COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_flash_attension.sh; \

From 01defd40e89cb0c612431c81141fb372585c449e Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Tue, 26 Sep 2023 21:24:59 +0800
Subject: [PATCH 03/36] version to 1.9.2

---
 modelscope/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/version.py b/modelscope/version.py
index 7f3298ab..97e90b1d 100644
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '1.9.1'
+__version__ = '1.9.2'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2023-09-06 00:00:00'

From 00e822d73ecafd472816b78ce8df30732fd4c446 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Tue, 26 Sep 2023 22:07:04 +0800
Subject: [PATCH 04/36] version to 1.9.2

---
 modelscope/version.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modelscope/version.py b/modelscope/version.py
index 23ef0243..97e90b1d 100644
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '1.9.1'
+__version__ = '1.9.2'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
-__release_datetime__ = '2099-09-06 00:00:00'
+__release_datetime__ = '2023-09-06 00:00:00'

From 749cb37d0a5ac8bbc577aa3b97d60ecf398ef2a1 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 27 Sep 2023 09:33:32 +0800
Subject: [PATCH 05/36] fix hf bug (#567) Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14181647 * fix hf
 bug (#567)

---
 modelscope/utils/hf_util.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py
index 3abcce6d..5722928a 100644
--- a/modelscope/utils/hf_util.py
+++ b/modelscope/utils/hf_util.py
@@ -91,12 +91,13 @@ def check_hf_code(model_dir: str, auto_class: type,
         raise FileNotFoundError(f'{config_path} is not found')
     config_dict = PretrainedConfig.get_config_dict(config_path)[0]
     auto_class_name = auto_class.__name__
+    if auto_class is AutoTokenizerHF:
+        tokenizer_config = get_tokenizer_config(model_dir)
     # load from repo
     if trust_remote_code:
         has_remote_code = False
         if auto_class is AutoTokenizerHF:
-            tokenizer_config_dict = get_tokenizer_config(model_dir)
-            auto_map = tokenizer_config_dict.get('auto_map', None)
+            auto_map = tokenizer_config.get('auto_map', None)
             if auto_map is not None:
                 module_name = auto_map.get(auto_class_name, None)
                 if module_name is not None:
@@ -129,7 +130,10 @@ def check_hf_code(model_dir: str, auto_class: type,
                 f'{model_type} not found in HF `CONFIG_MAPPING`{trust_remote_code_info}'
             )
     elif auto_class is AutoTokenizerHF:
-        if model_type not in TOKENIZER_MAPPING_NAMES:
+        tokenizer_class = tokenizer_config.get('tokenizer_class')
+        if tokenizer_class is not None:
+            return
+        if model_type in TOKENIZER_MAPPING_NAMES:
             raise ValueError(
                 f'{model_type} not found in HF `TOKENIZER_MAPPING_NAMES`{trust_remote_code_info}'
             )

From 6944d4173452d6ec79ef7ee6608350efe07d7c03 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 27 Sep 2023 11:47:23 +0800
Subject: [PATCH 06/36] skip install detection2 in cpu

---
 docker/Dockerfile.ubuntu | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index 2af8994b..c6c01a47 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -45,7 +45,11 @@ COPY examples /modelscope/examples
 # for pai-easycv setup compatiblity issue
 ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 
-RUN CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git'
+RUN if [ "$USE_GPU" = "True" ] ; then \
+        CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git'; \
+    else \
+        echo 'cpu unsupport detectron2'; \
+    fi
 
 # torchmetrics==0.11.4 for ofa
 RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr

From 007568469205fde86790042ffc436fcbbbddacc8 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 27 Sep 2023 13:51:11 +0800
Subject: [PATCH 07/36] add use gpu arg

---
 docker/Dockerfile.ubuntu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index c6c01a47..f0e28075 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -32,6 +32,7 @@ RUN pip install --no-cache-dir mpi4py paint_ldm \
          mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 pai-easycv ms_swift \
          ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
 
+ARG USE_GPU
 # for cpu install cpu version faiss, faiss depends on blas lib, we install libopenblas TODO rename gpu or cpu version faiss
 RUN if [ "$USE_GPU" = "True" ] ; then \
         pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 faiss==1.7.2 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \

From 4a5be4bc0949167ada042a74a038b11fc6f58200 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Wed, 27 Sep 2023 17:12:09 +0800
Subject: [PATCH 08/36] fix hf bug (#569)

---
 modelscope/utils/hf_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py
index 5722928a..6ef98ccf 100644
--- a/modelscope/utils/hf_util.py
+++ b/modelscope/utils/hf_util.py
@@ -133,7 +133,7 @@ def check_hf_code(model_dir: str, auto_class: type,
         tokenizer_class = tokenizer_config.get('tokenizer_class')
         if tokenizer_class is not None:
             return
-        if model_type in TOKENIZER_MAPPING_NAMES:
+        if model_type not in TOKENIZER_MAPPING_NAMES:
             raise ValueError(
                 f'{model_type} not found in HF `TOKENIZER_MAPPING_NAMES`{trust_remote_code_info}'
             )

From c72c95cce6c40926f844c295823b24a6293b9505 Mon Sep 17 00:00:00 2001
From: "lingcai.wl" <lingcai.wl@alibaba-inc.com>
Date: Wed, 27 Sep 2023 19:42:54 +0800
Subject: [PATCH 09/36] [to #51336898] fix minor problems in deploying Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14190317

---
 modelscope/outputs/outputs.py    |  1 +
 modelscope/pipeline_inputs.py    | 36 ++++++++++++++++----------------
 modelscope/utils/input_output.py |  6 ++----
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py
index 82c5ce10..368abad6 100644
--- a/modelscope/outputs/outputs.py
+++ b/modelscope/outputs/outputs.py
@@ -562,6 +562,7 @@ TASK_OUTPUTS = {
     #   }
     Tasks.facial_expression_recognition:
     [OutputKeys.SCORES, OutputKeys.LABELS],
+    Tasks.general_recognition: [OutputKeys.SCORES, OutputKeys.LABELS],
 
     # face processing base result for single img
     #   {
diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py
index f465a722..bffbebbd 100644
--- a/modelscope/pipeline_inputs.py
+++ b/modelscope/pipeline_inputs.py
@@ -82,14 +82,28 @@ def check_input_type(input_type, input):
 
 
 TASK_INPUTS = {
+
+    # if task input is single var, value is  InputType
+    # if task input is a tuple,  value is tuple of InputType
+    # if task input is a dict, value is a dict of InputType, where key
+    # equals the one needed in pipeline input dict
+    # if task input is a list, value is a set of input format, in which
+    # each element corresponds to one input format as described above and
+    # must include a dict format.
+
+
+    Tasks.task_template: {
+        'image': InputType.IMAGE,
+        'text': InputType.TEXT
+    },
+    # ============ vision tasks ===================
+
     Tasks.image_text_retrieval: {
         InputKeys.IMAGE: InputType.IMAGE,
         InputKeys.TEXT: InputType.TEXT
     },
-    Tasks.general_recognition: {
-        InputKeys.IMAGE: InputType.IMAGE,
-        InputKeys.TEXT: InputType.TEXT
-    },
+    Tasks.general_recognition:
+    InputType.IMAGE,
     Tasks.video_depth_estimation: {
         InputKeys.IMAGE: InputType.IMAGE,
         InputKeys.TEXT: InputType.TEXT
@@ -110,20 +124,6 @@ TASK_INPUTS = {
     InputType.VIDEO,
 
 
-
-    Tasks.task_template: {
-        'image': InputType.IMAGE,
-        'text': InputType.TEXT
-    },
-    # if task input is single var, value is  InputType
-    # if task input is a tuple,  value is tuple of InputType
-    # if task input is a dict, value is a dict of InputType, where key
-    # equals the one needed in pipeline input dict
-    # if task input is a list, value is a set of input format, in which
-    # each element corresponds to one input format as described above and
-    # must include a dict format.
-
-    # ============ vision tasks ===================
     Tasks.ocr_detection:
     InputType.IMAGE,
     Tasks.ocr_recognition:
diff --git a/modelscope/utils/input_output.py b/modelscope/utils/input_output.py
index dbe5861d..d8e32cce 100644
--- a/modelscope/utils/input_output.py
+++ b/modelscope/utils/input_output.py
@@ -656,7 +656,7 @@ def service_base64_input_to_pipeline_input(task_name, body):
 
     if isinstance(service_input, (str, int, float)):
         return service_input, parameters
-    task_input_info = TASK_INPUTS[task_name]
+    task_input_info = TASK_INPUTS.get(task_name, None)
     if isinstance(task_input_info, str):  # no input key default
         if isinstance(service_input, dict):
             return base64_decoder_map[task_input_info](list(
@@ -767,9 +767,7 @@ def pipeline_output_to_service_base64_output(task_name, pipeline_output):
         pipeline_output (object): The pipeline output.
     """
     json_serializable_output = {}
-    task_outputs = []
-    if task_name in TASK_OUTPUTS:
-        task_outputs = TASK_OUTPUTS[task_name]
+    task_outputs = TASK_OUTPUTS.get(task_name, [])
     # TODO: for batch
     if isinstance(pipeline_output, list):
         pipeline_output = pipeline_output[0]

From cc057378160cfa639aecf8fddeb8c897e5024d28 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Wed, 27 Sep 2023 17:12:09 +0800
Subject: [PATCH 10/36] fix hf bug (#569)

---
 modelscope/utils/hf_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py
index 5722928a..6ef98ccf 100644
--- a/modelscope/utils/hf_util.py
+++ b/modelscope/utils/hf_util.py
@@ -133,7 +133,7 @@ def check_hf_code(model_dir: str, auto_class: type,
         tokenizer_class = tokenizer_config.get('tokenizer_class')
         if tokenizer_class is not None:
             return
-        if model_type in TOKENIZER_MAPPING_NAMES:
+        if model_type not in TOKENIZER_MAPPING_NAMES:
             raise ValueError(
                 f'{model_type} not found in HF `TOKENIZER_MAPPING_NAMES`{trust_remote_code_info}'
             )

From 61bd1570e7b362952ae8b77e4db85589b9f2e06a Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Wed, 27 Sep 2023 21:15:55 +0800
Subject: [PATCH 11/36] remove ignore_file_pattern

---
 modelscope/utils/hf_util.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py
index 6ef98ccf..e3e8cac8 100644
--- a/modelscope/utils/hf_util.py
+++ b/modelscope/utils/hf_util.py
@@ -190,18 +190,13 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
     return ClassWrapper
 
 
-AutoModel = get_wrapped_class(
-    AutoModelHF, ignore_file_pattern=[r'\w+\.safetensors'])
-AutoModelForCausalLM = get_wrapped_class(
-    AutoModelForCausalLMHF, ignore_file_pattern=[r'\w+\.safetensors'])
-AutoModelForSeq2SeqLM = get_wrapped_class(
-    AutoModelForSeq2SeqLMHF, ignore_file_pattern=[r'\w+\.safetensors'])
+AutoModel = get_wrapped_class(AutoModelHF)
+AutoModelForCausalLM = get_wrapped_class(AutoModelForCausalLMHF)
+AutoModelForSeq2SeqLM = get_wrapped_class(AutoModelForSeq2SeqLMHF)
 AutoModelForSequenceClassification = get_wrapped_class(
-    AutoModelForSequenceClassificationHF,
-    ignore_file_pattern=[r'\w+\.safetensors'])
+    AutoModelForSequenceClassificationHF)
 AutoModelForTokenClassification = get_wrapped_class(
-    AutoModelForTokenClassificationHF,
-    ignore_file_pattern=[r'\w+\.safetensors'])
+    AutoModelForTokenClassificationHF)
 
 AutoTokenizer = get_wrapped_class(
     AutoTokenizerHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors'])

From 8d0ffa47b3cfad462c8700060c3c64643f6f6e65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8B=84=E5=92=96?= <wuzhikai.wzk@alibaba-inc.com>
Date: Fri, 25 Aug 2023 17:17:37 +0800
Subject: [PATCH 12/36] [feat]chat pipeline

---
 modelscope/models/base/base_model.py     |  15 +-
 modelscope/pipelines/nlp/llm_pipeline.py | 405 +++++++++++++++++++++++
 modelscope/pipelines/util.py             |  11 +-
 tests/pipelines/test_llm_pipeline.py     | 312 +++++++++++++++++
 4 files changed, 736 insertions(+), 7 deletions(-)
 create mode 100644 modelscope/pipelines/nlp/llm_pipeline.py
 create mode 100644 tests/pipelines/test_llm_pipeline.py

diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py
index 788d5c43..9f225383 100644
--- a/modelscope/models/base/base_model.py
+++ b/modelscope/models/base/base_model.py
@@ -126,7 +126,7 @@ class Model(ABC):
                 )
 
             invoked_by = '%s/%s' % (Invoke.KEY, invoked_by)
-            ignore_file_pattern = kwargs.get('ignore_file_pattern', None)
+            ignore_file_pattern = kwargs.pop('ignore_file_pattern', None)
             local_model_dir = snapshot_download(
                 model_name_or_path,
                 revision,
@@ -142,10 +142,15 @@ class Model(ABC):
         task_name = cfg.task
         if 'task' in kwargs:
             task_name = kwargs.pop('task')
-        model_cfg = cfg.model
-        if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
-            model_cfg.type = model_cfg.model_type
-        model_type = model_cfg.type
+        try:
+            model_cfg = cfg.model
+            if hasattr(model_cfg,
+                       'model_type') and not hasattr(model_cfg, 'type'):
+                model_cfg.type = model_cfg.model_type
+            model_type = model_cfg.type
+        except Exception:
+            model_cfg = {}
+            model_type = ''
         if isinstance(device, str) and device.startswith('gpu'):
             device = 'cuda' + device[3:]
         use_hf = kwargs.pop('use_hf', None)
diff --git a/modelscope/pipelines/nlp/llm_pipeline.py b/modelscope/pipelines/nlp/llm_pipeline.py
new file mode 100644
index 00000000..91f26812
--- /dev/null
+++ b/modelscope/pipelines/nlp/llm_pipeline.py
@@ -0,0 +1,405 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Any, Callable, Dict, Iterator, List, Tuple, Union
+
+import torch
+from transformers import PreTrainedTokenizer
+
+from modelscope import AutoTokenizer, Pipeline
+from modelscope.models.base import Model
+from modelscope.models.nlp import ChatGLM2Tokenizer, Llama2Tokenizer
+from modelscope.pipelines.builder import PIPELINES
+from modelscope.pipelines.util import is_model, is_official_hub_path
+from modelscope.utils.constant import Invoke, Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+@PIPELINES.register_module(Tasks.chat, module_name='llm-pipeline')
+class LLMPipeline(Pipeline):
+
+    def initiate_single_model(self, model):
+        if isinstance(model, str):
+            logger.info(f'initiate model from {model}')
+        if isinstance(model, str) and is_official_hub_path(model):
+            logger.info(f'initiate model from location {model}.')
+            return Model.from_pretrained(
+                model,
+                invoked_by=Invoke.PIPELINE,
+                device_map=self.device_map,
+                torch_dtype=self.torch_dtype,
+                ignore_file_pattern=self.ignore_file_pattern) if is_model(
+                    model) else model
+        else:
+            return model
+
+    def __init__(self,
+                 format_messages: Union[Callable, str] = None,
+                 format_output: Callable = None,
+                 tokenizer: PreTrainedTokenizer = None,
+                 *args,
+                 **kwargs):
+        self.torch_dtype = kwargs.pop('torch_dtype', None)
+        self.ignore_file_pattern = kwargs.pop('ignore_file_pattern', None)
+        super().__init__(*args, **kwargs)
+
+        tokenizer_class = None
+        if isinstance(format_messages, str):
+            assert format_messages in LLM_FORMAT_MAP, \
+                f'Can not find function for `{format_messages}`!'
+            format_messages, format_output, tokenizer_class = LLM_FORMAT_MAP[
+                format_messages]
+
+        if format_messages is None:
+            model_type = self.cfg.safe_get('model.type',
+                                           '').lower().split('-')[0]
+
+            if model_type in LLM_FORMAT_MAP:
+                format_messages, format_output, tokenizer_class = LLM_FORMAT_MAP[
+                    model_type]
+            else:
+                raise KeyError(
+                    f'model type `{model_type}` is not supported for LLM pipeline!'
+                )
+
+        if format_messages is not None:
+            self.format_messages = format_messages
+        if format_output is not None:
+            self.format_output = format_output
+        self.tokenizer = self._get_tokenizer(
+            tokenizer_class) if tokenizer is None else tokenizer
+
+    def _process_single(self, inputs, *args, **kwargs) -> Dict[str, Any]:
+        preprocess_params = kwargs.get('preprocess_params', {})
+        forward_params = kwargs.get('forward_params', {})
+        postprocess_params = kwargs.get('postprocess_params', {})
+
+        is_messages = isinstance(inputs, dict) and 'messages' in inputs
+        tokens = self.preprocess(inputs, is_messages, **preprocess_params)
+
+        if hasattr(self.model, 'generate'):
+            outputs = self.model.generate(**tokens, **forward_params)
+        elif hasattr(self.model, 'model') and hasattr(self.model.model,
+                                                      'generate'):
+            outputs = self.model.model.generate(**tokens, **forward_params)
+        else:
+            raise ValueError('model does not support `generate`!')
+
+        outputs = outputs.tolist()[0][len(tokens['inputs'][0]):]
+        response = self.postprocess(outputs, is_messages, **postprocess_params)
+        return response
+
+    def preprocess(self, inputs: Union[str, Dict], is_messages: bool,
+                   **kwargs):
+        if is_messages:
+            tokens = self.format_messages(inputs, self.tokenizer, **kwargs)
+        else:
+            tokens = self.tokenizer(inputs, return_tensors='pt', **kwargs)
+
+        tokens['inputs'] = tokens.pop('input_ids')
+
+        if hasattr(self.model, 'device'):
+            device = self.model.device
+        elif hasattr(self.model, 'model') and hasattr(self.model.model,
+                                                      'device'):
+            device = self.model.model.device
+        else:
+            raise ValueError('model does not have `device` attribute!')
+        return {k: v.to(device) for k, v in tokens.items()}
+
+    def postprocess(self, outputs, is_messages: bool, **kwargs):
+
+        response = self.tokenizer.decode(
+            outputs, skip_special_tokens=True, **kwargs)
+        if is_messages:
+            response = self.format_output(response, **kwargs)
+
+        return response
+
+    def _sanitize_parameters(self, **generate_parameter):
+        """
+        this method should sanitize the keyword args to preprocessor params,
+        forward params and postprocess params on '__call__' or '_process_single' method
+        considered to be a normal classmethod with default implementation / output
+
+        Default Returns:
+            Dict[str, str]:  preprocess_params = {}
+            Dict[str, str]:  forward_params = {}
+            Dict[str, str]:  postprocess_params = pipeline_parameters
+        """
+        return {}, generate_parameter, {}
+
+    def _get_tokenizer(self, tokenizer_class=None):
+        if isinstance(self.model, str):
+            model_dir = self.model
+        else:
+            model_dir = self.model.model_dir
+        if tokenizer_class is None:
+            tokenizer_class = AutoTokenizer
+        return tokenizer_class.from_pretrained(
+            model_dir, trust_remote_code=True)
+
+    @staticmethod
+    def format_messages(messages: Dict[str, List[Dict[str, str]]],
+                        tokenizer: PreTrainedTokenizer,
+                        **kwargs) -> Dict[str, torch.Tensor]:
+        # {"messages":[{"role": "system", "content": "You are a helpful assistant."}...]}
+        tokens = []
+        for role, content in LLMPipeline._message_iter(messages):
+            tokens = LLMPipeline._concat_with_special_tokens(
+                tokens, role, content, tokenizer)
+        return {'input_ids': torch.tensor([tokens], dtype=torch.int64)}
+
+    @staticmethod
+    def format_output(response: str, **kwargs):
+        response = response.strip()
+        message = {'message': {'role': 'assistant', 'content': response}}
+        return message
+
+    @staticmethod
+    def _message_iter(
+            data: Dict[str, List[Dict[str,
+                                      str]]]) -> Iterator[Tuple[str, str]]:
+        for pair in data['messages']:
+            yield pair['role'], pair['content']
+
+    @staticmethod
+    def _concat_with_special_tokens(
+            ids: List[int], role: str, content: Union[str, List[Dict[str,
+                                                                     str]]],
+            tokenizer: PreTrainedTokenizer) -> List[int]:
+        im_start = tokenizer.im_start_id
+        im_end = tokenizer.im_end_id
+        nl_token = tokenizer.encode('\n')
+        role = tokenizer.encode(role.strip())
+        content = LLMPipeline._encode(tokenizer, content)
+        return LLMPipeline._concat(ids, im_start, role, nl_token, content,
+                                   im_end, nl_token)
+
+    @staticmethod
+    def _encode(tokenizer: PreTrainedTokenizer,
+                content: Union[str, List[Dict[str, str]]]):
+        if isinstance(content, str):
+            return tokenizer.encode(content.rstrip())
+        encoded = []
+        for pair in content:
+            (modal, value), = pair.items()
+            if modal == 'image':
+                img_token_span = getattr(tokenizer, 'img_token_span', 256)
+                img_start_id = tokenizer.img_start_id
+                img_end_id = img_start_id + 1
+                img_pad_id = img_start_id + 2
+                list_int_url = list(bytes(value, encoding='utf-8'))
+                assert len(
+                    list_int_url) <= img_token_span, 'Image url is too long.'
+                pad_ids = [img_pad_id] * (img_token_span - len(list_int_url))
+                encoded = LLMPipeline._concat(encoded, img_start_id,
+                                              list_int_url, pad_ids,
+                                              img_end_id)
+            else:  # text
+                encoded.extend(tokenizer.encode(value))
+        return encoded
+
+    @staticmethod
+    def _concat(ids: List[int], *args: Union[int, List[int]]) -> List[int]:
+        for item in args:
+            if isinstance(item, list):
+                ids.extend(item)
+            else:
+                ids.append(item)
+        return ids
+
+
+def chatglm2_format_messages(messages, tokenizer, **kwargs):
+
+    def build_chatglm2_prompt(messages, **kwargs):
+        prompt = ''
+        messages = messages['messages']
+        # chatglm2 does not have system messages
+        assert messages[0][
+            'role'] == 'user', 'chatglm2 does not have system messages'
+
+        for i in range(0, len(messages) - 1, 2):
+            prompt += '[Round {}]\n\n问：{}\n\n答：{}\n\n'.format(
+                i // 2 + 1, messages[i]['content'], messages[i + 1]['content'])
+        prompt += '[Round {}]\n\n问：{}\n\n答：'.format(
+            len(messages) // 2 + 1, messages[-1]['content'])
+        return prompt
+
+    prompt = build_chatglm2_prompt(messages, **kwargs)
+    return tokenizer(prompt, return_tensors='pt')
+
+
+def chatglm2_format_output(response, **kwargs):
+    response = response.strip()
+    response = response.replace('[[训练时间]]', '2023年')
+    messages = {'role': 'assistant', 'content': response}
+    outputs = {
+        'messages': messages,
+    }
+    return outputs
+
+
+def llama2_format_messages(messages, tokenizer, **kwargs):
+    from transformers import BatchEncoding
+
+    def build_llama2_prompt(messages, tokenizer, **kwargs):
+        max_length = kwargs.get('max_length', 2048)
+        default_system_message = 'you are a helpful assistant!'
+
+        messages = messages['messages']
+        # llama2 have system messages
+        if messages[0]['role'] != 'system':
+            messages = [{
+                'role': 'system',
+                'content': default_system_message
+            }] + messages
+
+        system = messages[0]['content']
+        system_prompt = f'<s>[INST] <<SYS>>\n{system}\n<</SYS>>\n\n'
+        system_ids = tokenizer(system_prompt, return_tensors='pt').input_ids
+
+        text = messages[-1]['content']
+        text_prompt = f'{text.strip()} [/INST]'
+        text_ids = tokenizer(text_prompt, return_tensors='pt').input_ids
+        prompt_length = system_ids.shape[-1] + text_ids.shape[-1]
+        if prompt_length > max_length:
+            raise RuntimeError(
+                f'prepend prompt length {prompt_length} is bigger than max_length {max_length}'
+            )
+
+        # history items
+        history_prompt = ''
+        history_ids_list = []
+        for i in range(len(messages) - 2, 0, -2):
+            user, assistant = messages[i]['content'], messages[i
+                                                               + 1]['content']
+            round_prompt = f'{user.strip()} [/INST] {assistant.strip()} </s><s>[INST] '
+            round_ids = tokenizer(round_prompt, return_tensors='pt').input_ids
+            if prompt_length + round_ids.shape[-1] > max_length:
+                # excess history should not be appended to the prompt
+                break
+            else:
+                history_prompt = round_prompt + history_prompt
+                history_ids_list = [round_ids] + history_ids_list
+                prompt_length += round_ids.shape[-1]
+        prompt_list = [system_prompt, history_prompt, text_prompt]
+        prompt_ids_list = [system_ids] + history_ids_list + [text_ids]
+        return ''.join(prompt_list), torch.cat(prompt_ids_list, dim=-1)
+
+    prompt, tokens = build_llama2_prompt(messages, tokenizer, **kwargs)
+    return BatchEncoding({'input_ids': tokens})
+
+
+def baichuan_format_messages(messages, tokenizer, **kwargs):
+    from transformers import BatchEncoding
+
+    def _parse_messages(messages, split_role='user'):
+        system, rounds = '', []
+        round = []
+        for i, message in enumerate(messages):
+            if message['role'] == 'system':
+                assert i == 0, 'first message should be system message.'
+                system = message['content']
+                continue
+            if message['role'] == split_role and round:
+                rounds.append(round)
+                round = []
+            round.append(message)
+        if round:
+            rounds.append(round)
+        return system, rounds
+
+    messages = messages['messages']
+    assistant_token_id = 196
+    user_token_id = 195
+    max_new_tokens = kwargs.get('max_new_tokens', None) or 2048
+    model_max_length = 4096
+    max_input_tokens = model_max_length - max_new_tokens
+    system, rounds = _parse_messages(messages, split_role='user')
+    system_tokens = tokenizer.encode(system)
+    max_history_tokens = max_input_tokens - len(system_tokens)
+
+    history_tokens = []
+    for round in rounds[::-1]:
+        round_tokens = []
+        for message in round:
+            if message['role'] == 'user':
+                round_tokens.append(user_token_id)
+            else:
+                round_tokens.append(assistant_token_id)
+            round_tokens.extend(tokenizer.encode(message['content']))
+        if len(history_tokens) == 0 or len(history_tokens) + len(
+                round_tokens) <= max_history_tokens:
+            history_tokens = round_tokens + history_tokens  # concat left
+            if len(history_tokens) < max_history_tokens:
+                continue
+        break
+
+    input_tokens = system_tokens + history_tokens
+    if messages[-1]['role'] != 'assistant':
+        input_tokens.append(assistant_token_id)
+    input_tokens = input_tokens[-max_input_tokens:]  # truncate left
+    input_tokens = torch.LongTensor([input_tokens])
+    return BatchEncoding({'input_ids': input_tokens})
+
+
+def wizardlm_format_messages(messages, tokenizer, **kwargs):
+
+    def build_wizardlm_prompt(messages, tokenizer, **kwargs):
+        default_system_message = 'A chat between a curious user and an artificial intelligence assistant.'
+        'The assistant gives helpful, detailed, and polite answers to the user\'s questions.'
+
+        messages = messages['messages']
+        # llama2 have system messages
+        if messages[0]['role'] != 'system':
+            messages = [{
+                'role': 'system',
+                'content': default_system_message
+            }] + messages
+
+        system_prompt = messages[0]['content']
+        prompt_list = [system_prompt]
+        for i, message in enumerate(messages[1:]):
+            if message['role'] == 'user':
+                user_prompt = message['content']
+                prompt_list.append(f'USER: {user_prompt}')
+            elif message['role'] == 'assistant':
+                user_prompt = message['content']
+                prompt_list.append(f'ASSISTANT: {user_prompt}</s>')
+        prompts = ' '.join(prompt_list)
+        return prompts
+
+    prompts = build_wizardlm_prompt(messages, tokenizer, **kwargs)
+    return tokenizer(prompts, return_tensors='pt')
+
+
+def wizardcode_format_messages(messages, tokenizer, **kwargs):
+    messages = messages['messages']
+    assert len(messages) == 2, 'wizard code only support two messages.'
+    system, user = '', ''
+    for i, message in enumerate(messages):
+        if message['role'] == 'system':
+            assert i == 0, 'first message should be system message.'
+            system = message['content']
+        if message['role'] == 'user':
+            assert i == 1, 'second message should be user message.'
+            user = message['content']
+
+    prompt = system + '\n\n### Instruction:\n' + user + '\n\n### Response:'
+    inputs = tokenizer(
+        prompt, padding=False, add_special_tokens=False, return_tensors='pt')
+    return inputs
+
+
+LLM_FORMAT_MAP = {
+    'chatglm2':
+    (chatglm2_format_messages, chatglm2_format_output, ChatGLM2Tokenizer),
+    'qwen': (LLMPipeline.format_messages, LLMPipeline.format_output, None),
+    'llama2': (llama2_format_messages, None, Llama2Tokenizer),
+    'llama': (llama2_format_messages, None, Llama2Tokenizer),
+    'baichuan': (baichuan_format_messages, None, None),
+    'baichuan2': (baichuan_format_messages, None, None),
+    'wizardlm': (wizardlm_format_messages, None, None),
+    'wizardcode': (wizardcode_format_messages, None, None)
+}
diff --git a/modelscope/pipelines/util.py b/modelscope/pipelines/util.py
index a2a3ed2b..9788d7d6 100644
--- a/modelscope/pipelines/util.py
+++ b/modelscope/pipelines/util.py
@@ -14,7 +14,7 @@ logger = get_logger()
 def is_config_has_model(cfg_file):
     try:
         cfg = Config.from_file(cfg_file)
-        return hasattr(cfg, 'model')
+        return hasattr(cfg, 'model') or hasattr(cfg, 'model_type')
     except Exception as e:
         logger.error(f'parse config file {cfg_file} failed: {e}')
         return False
@@ -58,14 +58,21 @@ def is_model(path: Union[str, List]):
     def is_modelhub_path_impl(path):
         if osp.exists(path):
             cfg_file = osp.join(path, ModelFile.CONFIGURATION)
+            hf_cfg_file = osp.join(path, ModelFile.CONFIG)
             if osp.exists(cfg_file):
                 return is_config_has_model(cfg_file)
+            elif osp.exists(hf_cfg_file):
+                return is_config_has_model(hf_cfg_file)
             else:
                 return False
         else:
             try:
                 cfg_file = model_file_download(path, ModelFile.CONFIGURATION)
-                return is_config_has_model(cfg_file)
+                if is_config_has_model(cfg_file):
+                    return True
+                else:
+                    hf_cfg_file = model_file_download(path, ModelFile.CONFIG)
+                    return is_config_has_model(hf_cfg_file)
             except Exception:
                 return False
 
diff --git a/tests/pipelines/test_llm_pipeline.py b/tests/pipelines/test_llm_pipeline.py
new file mode 100644
index 00000000..bbebb25e
--- /dev/null
+++ b/tests/pipelines/test_llm_pipeline.py
@@ -0,0 +1,312 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import unittest
+
+import torch
+
+from modelscope import (AutoConfig, AutoModelForCausalLM, Model,
+                        snapshot_download)
+from modelscope.pipelines import pipeline
+from modelscope.pipelines.nlp.llm_pipeline import LLMPipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.test_utils import test_level
+
+
+class LLMPipelineTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.messages_zh = {
+            'messages': [{
+                'role': 'user',
+                'content': 'Hello! 你是谁？'
+            }, {
+                'role': 'assistant',
+                'content': '我是你的助手。'
+            }, {
+                'role': 'user',
+                'content': '你叫什么名字？'
+            }]
+        }
+        self.messages_zh_with_system = {
+            'messages': [{
+                'role': 'system',
+                'content': '你是达摩院的生活助手机器人。'
+            }, {
+                'role': 'user',
+                'content': '今天天气好吗？'
+            }]
+        }
+        self.prompt_zh = '请介绍一下你自己'
+        self.messages_en = {
+            'messages': [{
+                'role': 'system',
+                'content': 'You are a helpful assistant.'
+            }, {
+                'role': 'user',
+                'content': 'Hello! Where is the capital of Zhejiang?'
+            }, {
+                'role': 'assistant',
+                'content': 'Hangzhou is the capital of Zhejiang.'
+            }, {
+                'role': 'user',
+                'content': 'Tell me something about HangZhou?'
+            }]
+        }
+        self.prompt_en = 'Tell me the capital of Zhejiang. '
+        self.messages_code = {
+            'messages': [{
+                'role':
+                'system',
+                'content':
+                'You are a helpful, respectful and honest assistant '
+                'with a deep knowledge of code and software design. '
+                'Always answer as helpfully as possible, while being safe. '
+                'Your answers should not include any harmful, unethical, racist, '
+                'sexist, toxic, dangerous, or illegal content. Please ensure that '
+                'your responses are socially unbiased and positive in nature.\n\n'
+                'If a question does not make any sense, or is not factually coherent, '
+                'explain why instead of answering something not correct. '
+                'If you don\'t know the answer to a question, '
+                'please don\'t share false information.'
+            }, {
+                'role':
+                'user',
+                'content':
+                'write a program to implement the quicksort in java'
+            }]
+        }
+        self.prompt_code = 'import socket\n\ndef ping_exponential_backoff(host: str):'
+
+        self.message_wizard_math = {
+            'messages': [{
+                'role':
+                'system',
+                'content':
+                'Below is an instruction that describes a task. '
+                'Write a response that appropriately completes the request.'
+            }, {
+                'role':
+                'user',
+                'content':
+                'James decides to run 3 sprints 3 times a week. He runs 60 meters each sprint.'
+                'How many total meters does he run a week?'
+            }]
+        }
+        self.prompt_wizard_math = """"Below is an instruction that describes a task.
+        Write a response that appropriately completes the request.\n\n
+        ### Instruction:\nJames decides to run 3 sprints 3 times a week. He runs 60 meters each sprint.
+        How many total meters does he run a week?\n\n
+        ### Response:"""
+
+        self.message_wizard_code = {
+            'messages': [{
+                'role':
+                'system',
+                'content':
+                'Below is an instruction that describes a task.'
+                'Write a response that appropriately completes the request.'
+            }, {
+                'role': 'user',
+                'content': 'Write a Jave code to sum 1 to 10'
+            }]
+        }
+        self.prompt_wizard_code = """"Below is an instruction that describes a task.
+        Write a response that appropriately completes the request.\n\n
+        ### Instruction:\nWrite a Jave code to sum 1 to 10\n\n
+        ### Response:"""
+
+        self.messages_mm = {
+            'messages': [{
+                'role': 'system',
+                'content': '你是达摩院的生活助手机器人。'
+            }, {
+                'role':
+                'user',
+                'content': [
+                    {
+                        'image':
+                        'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg'
+                    },
+                    {
+                        'text': '这是什么?'
+                    },
+                ]
+            }]
+        }
+        self.gen_cfg = {'do_sample': True, 'max_length': 512}
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_chatglm2(self):
+        pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b', device_map='auto')
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_chatglm2int4(self):
+        pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b-int4')
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_chatglm232k(self):
+        pipe = LLMPipeline(model='ZhipuAI/chatglm2-6b-32k', device_map='auto')
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_llama2(self):
+        pipe = LLMPipeline(
+            model='modelscope/Llama-2-7b-ms',
+            torch_dtype=torch.float16,
+            device_map='auto',
+            ignore_file_pattern=[r'.+\.bin$'])
+        print('messages: ', pipe(self.messages_en, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_llama2chat(self):
+        pipe = LLMPipeline(
+            model='modelscope/Llama-2-7b-chat-ms',
+            revision='v1.0.2',
+            torch_dtype=torch.float16,
+            device_map='auto',
+            ignore_file_pattern=[r'.+\.bin$'])
+        print('messages: ', pipe(self.messages_en, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_codellama(self):
+        pipe = LLMPipeline(
+            model='AI-ModelScope/CodeLlama-7b-Instruct-hf',
+            torch_dtype=torch.float16,
+            device_map='auto',
+            ignore_file_pattern=[r'.+\.bin$'])
+        print('messages: ', pipe(self.messages_code, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_code, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_baichuan_7b(self):
+        pipe = LLMPipeline(
+            model='baichuan-inc/baichuan-7B',
+            device_map='auto',
+            torch_dtype=torch.float16)
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_baichuan_13b(self):
+        pipe = LLMPipeline(
+            model='baichuan-inc/Baichuan-13B-Base',
+            device_map='auto',
+            torch_dtype=torch.float16)
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_baichuan_13bchat(self):
+        pipe = LLMPipeline(
+            model='baichuan-inc/Baichuan-13B-Chat',
+            device_map='auto',
+            torch_dtype=torch.float16)
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_baichuan2_7b(self):
+        pipe = LLMPipeline(
+            model='baichuan-inc/Baichuan2-7B-Base',
+            device_map='auto',
+            torch_dtype=torch.float16)
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_baichuan2_7bchat(self):
+        pipe = LLMPipeline(
+            model='baichuan-inc/Baichuan2-7B-Chat',
+            device_map='auto',
+            torch_dtype=torch.float16)
+        print('messages: ', pipe(self.messages_zh, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_wizardlm_13b(self):
+        pipe = LLMPipeline(
+            model='AI-ModelScope/WizardLM-13B-V1.2',
+            device_map='auto',
+            torch_dtype=torch.float16,
+            format_messages='wizardlm')
+        print('messages: ', pipe(self.messages_en, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_en, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_wizardmath(self):
+        pipe = LLMPipeline(
+            model='AI-ModelScope/WizardMath-7B-V1.0',
+            device_map='auto',
+            torch_dtype=torch.float16,
+            format_messages='wizardcode')
+        print('messages: ', pipe(self.message_wizard_math, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_wizard_math, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_wizardcode_13b(self):
+        pipe = LLMPipeline(
+            model='AI-ModelScope/WizardCoder-Python-13B-V1.0',
+            device_map='auto',
+            torch_dtype=torch.float16,
+            format_messages='wizardcode')
+        print('messages: ', pipe(self.message_wizard_code, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_wizard_code, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_wizardcode_1b(self):
+        pipe = LLMPipeline(
+            model='AI-ModelScope/WizardCoder-1B-V1.0',
+            device_map='auto',
+            torch_dtype=torch.float16,
+            format_messages='wizardcode')
+        print('messages: ', pipe(self.message_wizard_code, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_wizard_code, **self.gen_cfg))
+
+    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
+    def test_qwen(self):
+        pipe = LLMPipeline(
+            model='ccyh123/Qwen-7B-Chat',
+            device_map='auto',
+            format_messages='qwen')
+        print('messages: ', pipe(self.messages_zh_with_system, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skip('Need AutoGPTQ')
+    def test_qwen_int4(self):
+        from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+        model_dir = snapshot_download('ccyh123/Qwen-7B-Chat-Int4')
+        quantize_config = BaseQuantizeConfig(
+            bits=4,  # quantize model to 4-bit
+            group_size=128,  # it is recommended to set the value to 128
+            desc_act=
+            False,  # set to False can significantly speed up inference but the perplexity may slightly bad
+        )
+        model = AutoGPTQForCausalLM.from_pretrained(
+            model_dir,
+            quantize_config,
+            device_map='auto',
+            trust_remote_code=True,
+            use_safetensors=True)
+        model.model_dir = model_dir
+        pipe = LLMPipeline(model=model, format_messages='qwen')
+        print('messages: ', pipe(self.messages_zh_with_system, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+    @unittest.skip('File does not exists configuration.json')
+    def test_qwen_vl(self):
+        pipe = LLMPipeline(
+            model='ccyh123/Qwen-VL-Chat',
+            device_map='auto',
+            format_messages='qwen')
+        print('messages: ', pipe(self.messages_mm, **self.gen_cfg))
+        print('prompt: ', pipe(self.prompt_zh, **self.gen_cfg))
+
+
+if __name__ == '__main__':
+    unittest.main()

From 31d04f45b7de43f491e02c4d526d56f4aae81f03 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 28 Sep 2023 01:06:32 +0800
Subject: [PATCH 13/36] force reinstall 1.9.2

---
 docker/Dockerfile.ubuntu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index f0e28075..e74baa29 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -53,7 +53,7 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
     fi
 
 # torchmetrics==0.11.4 for ofa
-RUN pip install --no-cache-dir tiktoken torchmetrics==0.11.4 transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
+RUN pip install --no-cache-dir torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
 COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_flash_attension.sh; \

From f6d4107b0ab8e583f5e726fb6a3e9291aeaa776f Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 28 Sep 2023 10:49:51 +0800
Subject: [PATCH 14/36] fix onnxruntime issue

---
 docker/Dockerfile.ubuntu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index e74baa29..ff2d3886 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -53,7 +53,8 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
     fi
 
 # torchmetrics==0.11.4 for ofa
-RUN pip install --no-cache-dir torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
+# onnxruntime<1.16.0 for Since ORT 1.9, you are required to explicitly set the providers parameter when instantiating InferenceSession. For example, onnxruntime.InferenceSession(..., providers=['AzureExecutionProvider', 'CPUExecutionProvider'], ...)
+RUN pip install --no-cache-dir 'onnxruntime<1.16.0' torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
 COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_flash_attension.sh; \

From e7ecb55e00cff618f2f4f3075a599f6bd7131325 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 28 Sep 2023 14:30:55 +0800
Subject: [PATCH 15/36] remove onnxruntime version

---
 docker/Dockerfile.ubuntu | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index ff2d3886..e74baa29 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -53,8 +53,7 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
     fi
 
 # torchmetrics==0.11.4 for ofa
-# onnxruntime<1.16.0 for Since ORT 1.9, you are required to explicitly set the providers parameter when instantiating InferenceSession. For example, onnxruntime.InferenceSession(..., providers=['AzureExecutionProvider', 'CPUExecutionProvider'], ...)
-RUN pip install --no-cache-dir 'onnxruntime<1.16.0' torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
+RUN pip install --no-cache-dir torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
 COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_flash_attension.sh; \

From 7f4fef0c3ad760b4f38a3dcfe51ef9906b87acf9 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 28 Sep 2023 17:14:42 +0800
Subject: [PATCH 16/36] fix provider bug

---
 tests/export/test_export_speech_signal_process.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py
index d3f6fe14..d4579936 100644
--- a/tests/export/test_export_speech_signal_process.py
+++ b/tests/export/test_export_speech_signal_process.py
@@ -60,7 +60,9 @@ class ExportSpeechSignalProcessTest(unittest.TestCase):
             return
         onnx_model = onnx.load(output)
         onnx.checker.check_model(onnx_model)
-        ort_session = ort.InferenceSession(output)
+        ort_session = ort.InferenceSession(
+            output,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(dummy_inputs)

From c12b22ae3d9e6fd4db28d7983792bcc3e985ce5c Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 28 Sep 2023 14:19:36 +0800
Subject: [PATCH 17/36] fix onnxruntime providers parameter compatible issue

---
 .../exporters/nlp/model_for_token_classification_exporter.py  | 4 +++-
 modelscope/exporters/tf_model_exporter.py                     | 4 +++-
 modelscope/exporters/torch_model_exporter.py                  | 4 +++-
 .../models/cv/product_retrieval_embedding/item_detection.py   | 4 +++-
 modelscope/pipelines/cv/face_liveness_ir_pipeline.py          | 4 +++-
 modelscope/pipelines/cv/face_liveness_xc_pipeline.py          | 4 +++-
 modelscope/pipelines/cv/face_quality_assessment_pipeline.py   | 4 +++-
 modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py  | 4 +++-
 modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py  | 4 +++-
 tests/export/test_export_speech_signal_process.py             | 4 +++-
 10 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/modelscope/exporters/nlp/model_for_token_classification_exporter.py b/modelscope/exporters/nlp/model_for_token_classification_exporter.py
index daa33ea9..159fcee4 100644
--- a/modelscope/exporters/nlp/model_for_token_classification_exporter.py
+++ b/modelscope/exporters/nlp/model_for_token_classification_exporter.py
@@ -77,7 +77,9 @@ class ModelForSequenceClassificationExporter(TorchModelExporter):
             return
         onnx_model = onnx.load(output)
         onnx.checker.check_model(onnx_model)
-        ort_session = ort.InferenceSession(output)
+        ort_session = ort.InferenceSession(
+            output,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(
diff --git a/modelscope/exporters/tf_model_exporter.py b/modelscope/exporters/tf_model_exporter.py
index a2e69847..dbf4640e 100644
--- a/modelscope/exporters/tf_model_exporter.py
+++ b/modelscope/exporters/tf_model_exporter.py
@@ -102,7 +102,9 @@ class TfModelExporter(Exporter):
 
         onnx_model = onnx.load(output)
         onnx.checker.check_model(onnx_model, full_check=True)
-        ort_session = ort.InferenceSession(output)
+        ort_session = ort.InferenceSession(
+            output,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         outputs_origin = call_func(
             dummy_inputs) if call_func is not None else model(dummy_inputs)
         if isinstance(outputs_origin, (Mapping, ModelOutputBase)):
diff --git a/modelscope/exporters/torch_model_exporter.py b/modelscope/exporters/torch_model_exporter.py
index 473b9705..1dd1b459 100644
--- a/modelscope/exporters/torch_model_exporter.py
+++ b/modelscope/exporters/torch_model_exporter.py
@@ -233,7 +233,9 @@ class TorchModelExporter(Exporter):
             return
         onnx_model = onnx.load(output)
         onnx.checker.check_model(onnx_model)
-        ort_session = ort.InferenceSession(output)
+        ort_session = ort.InferenceSession(
+            output,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(
diff --git a/modelscope/models/cv/product_retrieval_embedding/item_detection.py b/modelscope/models/cv/product_retrieval_embedding/item_detection.py
index 2002c6cb..b5024368 100644
--- a/modelscope/models/cv/product_retrieval_embedding/item_detection.py
+++ b/modelscope/models/cv/product_retrieval_embedding/item_detection.py
@@ -26,7 +26,9 @@ class YOLOXONNX(object):
         options.intra_op_num_threads = 1
         options.inter_op_num_threads = 1
         self.ort_session = ort.InferenceSession(
-            self.onnx_path, sess_options=options)
+            self.onnx_path,
+            sess_options=options,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         self.with_p6 = False
         self.multi_detect = multi_detect
 
diff --git a/modelscope/pipelines/cv/face_liveness_ir_pipeline.py b/modelscope/pipelines/cv/face_liveness_ir_pipeline.py
index efc9d9d5..f440c027 100644
--- a/modelscope/pipelines/cv/face_liveness_ir_pipeline.py
+++ b/modelscope/pipelines/cv/face_liveness_ir_pipeline.py
@@ -43,7 +43,9 @@ class FaceLivenessIrPipeline(FaceProcessingBasePipeline):
         logger.info('load model done')
 
     def load_onnx_model(self, onnx_path):
-        sess = onnxruntime.InferenceSession(onnx_path)
+        sess = onnxruntime.InferenceSession(
+            onnx_path,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         out_node_name = []
         input_node_name = []
         for node in sess.get_outputs():
diff --git a/modelscope/pipelines/cv/face_liveness_xc_pipeline.py b/modelscope/pipelines/cv/face_liveness_xc_pipeline.py
index 3a50d91f..316ce778 100644
--- a/modelscope/pipelines/cv/face_liveness_xc_pipeline.py
+++ b/modelscope/pipelines/cv/face_liveness_xc_pipeline.py
@@ -51,7 +51,9 @@ class FaceLivenessXcPipeline(FaceProcessingBasePipeline):
         logger.info('load model done')
 
     def load_onnx_model(self, onnx_path):
-        sess = onnxruntime.InferenceSession(onnx_path)
+        sess = onnxruntime.InferenceSession(
+            onnx_path,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         out_node_name = []
         input_node_name = []
         for node in sess.get_outputs():
diff --git a/modelscope/pipelines/cv/face_quality_assessment_pipeline.py b/modelscope/pipelines/cv/face_quality_assessment_pipeline.py
index 58fab659..1cf9ba59 100644
--- a/modelscope/pipelines/cv/face_quality_assessment_pipeline.py
+++ b/modelscope/pipelines/cv/face_quality_assessment_pipeline.py
@@ -60,7 +60,9 @@ class FaceQualityAssessmentPipeline(FaceProcessingBasePipeline):
         return batch_process(self.model, data)
 
     def load_onnx_model(self, onnx_path):
-        sess = onnxruntime.InferenceSession(onnx_path)
+        sess = onnxruntime.InferenceSession(
+            onnx_path,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         out_node_name = []
         input_node_name = []
         for node in sess.get_outputs():
diff --git a/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py b/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py
index 7577c82d..e36d9351 100644
--- a/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py
+++ b/modelscope/pipelines/cv/face_recognition_onnx_fm_pipeline.py
@@ -53,7 +53,9 @@ class FaceRecognitionOnnxFmPipeline(FaceProcessingBasePipeline):
         logger.info('load model done')
 
     def load_onnx_model(self, onnx_path):
-        sess = onnxruntime.InferenceSession(onnx_path)
+        sess = onnxruntime.InferenceSession(
+            onnx_path,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         out_node_name = []
         input_node_name = []
         for node in sess.get_outputs():
diff --git a/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py b/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py
index ced9b2c6..9c005d90 100644
--- a/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py
+++ b/modelscope/pipelines/cv/face_recognition_onnx_ir_pipeline.py
@@ -50,7 +50,9 @@ class FaceRecognitionOnnxIrPipeline(FaceProcessingBasePipeline):
         logger.info('load model done')
 
     def load_onnx_model(self, onnx_path):
-        sess = onnxruntime.InferenceSession(onnx_path)
+        sess = onnxruntime.InferenceSession(
+            onnx_path,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         out_node_name = []
         input_node_name = []
         for node in sess.get_outputs():
diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py
index d4579936..faba59a5 100644
--- a/tests/export/test_export_speech_signal_process.py
+++ b/tests/export/test_export_speech_signal_process.py
@@ -66,7 +66,9 @@ class ExportSpeechSignalProcessTest(unittest.TestCase):
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(dummy_inputs)
-        outputs_origin = numpify_tensor_nested(outputs_origin)
+        outputs_origin = numpify_tensor_nested(
+            outputs_origin,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
 
         input_feed = {INPUT_NAME: dummy_inputs.numpy()}
         outputs = ort_session.run(

From 74532181d02fe0cfb87779ac40beaee8aeb20bfb Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 28 Sep 2023 17:30:38 +0800
Subject: [PATCH 18/36] add jupyterlab install to docker

---
 docker/Dockerfile.ubuntu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index e74baa29..4ac4fd53 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -53,7 +53,7 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
     fi
 
 # torchmetrics==0.11.4 for ofa
-RUN pip install --no-cache-dir torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
+RUN pip install --no-cache-dir jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
 COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_flash_attension.sh; \

From d88898e4772acc2fb45f72b0183612bc4829eb5a Mon Sep 17 00:00:00 2001
From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com>
Date: Sun, 1 Oct 2023 23:09:09 +0800
Subject: [PATCH 19/36] fix work_dir not set in trainer(#573)

* fix work_dir

* fix running
---
 modelscope/trainers/trainer.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py
index a3707918..25f948bc 100644
--- a/modelscope/trainers/trainer.py
+++ b/modelscope/trainers/trainer.py
@@ -181,8 +181,20 @@ class EpochBasedTrainer(BaseTrainer):
                 compile_options = {}
             self.model = compile_model(self.model, **compile_options)
 
-        if 'work_dir' in kwargs:
+        if kwargs.get('work_dir', None) is not None:
             self.work_dir = kwargs['work_dir']
+            if 'train' not in self.cfg:
+                self.cfg['train'] = ConfigDict()
+            self.cfg['train']['work_dir'] = self.work_dir
+            if 'checkpoint' in self.cfg['train']:
+                if 'period' in self.cfg['train']['checkpoint']:
+                    self.cfg['train']['checkpoint']['period'][
+                        'save_dir'] = self.work_dir
+                if 'best' in self.cfg['train']['checkpoint']:
+                    self.cfg['train']['checkpoint']['best'][
+                        'save_dir'] = self.work_dir
+            if 'logging' in self.cfg['train']:
+                self.cfg['train']['logging']['out_dir'] = self.work_dir
         else:
             self.work_dir = self.cfg.train.get('work_dir', './work_dir')
 

From 43046a719bf676837b05fd7cf80106f41fc49425 Mon Sep 17 00:00:00 2001
From: Zhicheng Zhang <zczhang.hvac@gmail.com>
Date: Sun, 8 Oct 2023 13:36:34 +0800
Subject: [PATCH 20/36] =?UTF-8?q?move=20venv=20import=20from=20file=20leve?=
 =?UTF-8?q?l=20to=20class=20level=20to=20avoid=20import=20error=E2=80=A6?=
 =?UTF-8?q?=20(#575)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* move venv import from file level to class level to avoid import error on windows

* pass lint check

---------

Co-authored-by: Zhicheng Zhang <zhangzhicheng.zzc@alibaba-inc.com>
---
 modelscope/utils/plugins.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py
index 3d39514a..b4485830 100644
--- a/modelscope/utils/plugins.py
+++ b/modelscope/utils/plugins.py
@@ -9,7 +9,6 @@ import os
 import pkgutil
 import shutil
 import sys
-import venv
 from contextlib import contextmanager
 from fnmatch import fnmatch
 from pathlib import Path
@@ -1144,6 +1143,7 @@ class EnvsManager(object):
         cfg = read_config(model_dir)
         self.plugins = cfg.get('plugins', [])
         self.allow_remote = cfg.get('allow_remote', False)
+        import venv
         self.env_builder = venv.EnvBuilder(
             system_site_packages=True,
             clear=False,

From e60d56a564d496fda9306bda653549ccb75bf34d Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Sun, 8 Oct 2023 15:18:31 +0800
Subject: [PATCH 21/36] fix test_export_speech_signal_process.py issue

---
 tests/export/test_export_speech_signal_process.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py
index faba59a5..d4579936 100644
--- a/tests/export/test_export_speech_signal_process.py
+++ b/tests/export/test_export_speech_signal_process.py
@@ -66,9 +66,7 @@ class ExportSpeechSignalProcessTest(unittest.TestCase):
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(dummy_inputs)
-        outputs_origin = numpify_tensor_nested(
-            outputs_origin,
-            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        outputs_origin = numpify_tensor_nested(outputs_origin)
 
         input_feed = {INPUT_NAME: dummy_inputs.numpy()}
         outputs = ort_session.run(

From 610cf981b1028f48054db3dcc1e1a5798b2e0435 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Sun, 8 Oct 2023 15:21:26 +0800
Subject: [PATCH 22/36] fix test_export_speech_signal_process.py issue

---
 tests/export/test_export_speech_signal_process.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/export/test_export_speech_signal_process.py b/tests/export/test_export_speech_signal_process.py
index faba59a5..d4579936 100644
--- a/tests/export/test_export_speech_signal_process.py
+++ b/tests/export/test_export_speech_signal_process.py
@@ -66,9 +66,7 @@ class ExportSpeechSignalProcessTest(unittest.TestCase):
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(dummy_inputs)
-        outputs_origin = numpify_tensor_nested(
-            outputs_origin,
-            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        outputs_origin = numpify_tensor_nested(outputs_origin)
 
         input_feed = {INPUT_NAME: dummy_inputs.numpy()}
         outputs = ort_session.run(

From 57e2647a4a61b39d4fdffbceb854565bbfaba6b2 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Tue, 10 Oct 2023 10:43:16 +0800
Subject: [PATCH 23/36] remove configuration.json dependency (#579)

---
 modelscope/models/base/base_model.py | 18 ++++---
 modelscope/utils/automodel_utils.py  | 59 +++++++++++++----------
 modelscope/utils/hf_util.py          | 70 +---------------------------
 3 files changed, 47 insertions(+), 100 deletions(-)

diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py
index 788d5c43..a63859cc 100644
--- a/modelscope/models/base/base_model.py
+++ b/modelscope/models/base/base_model.py
@@ -126,7 +126,7 @@ class Model(ABC):
                 )
 
             invoked_by = '%s/%s' % (Invoke.KEY, invoked_by)
-            ignore_file_pattern = kwargs.get('ignore_file_pattern', None)
+            ignore_file_pattern = kwargs.pop('ignore_file_pattern', None)
             local_model_dir = snapshot_download(
                 model_name_or_path,
                 revision,
@@ -134,18 +134,19 @@ class Model(ABC):
                 ignore_file_pattern=ignore_file_pattern)
         logger.info(f'initialize model from {local_model_dir}')
 
+        configuration_path = osp.join(local_model_dir, ModelFile.CONFIGURATION)
+        cfg = None
         if cfg_dict is not None:
             cfg = cfg_dict
-        else:
-            cfg = Config.from_file(
-                osp.join(local_model_dir, ModelFile.CONFIGURATION))
-        task_name = cfg.task
+        elif os.path.exists(configuration_path):
+            cfg = Config.from_file(configuration_path)
+        task_name = getattr(cfg, 'task', None)
         if 'task' in kwargs:
             task_name = kwargs.pop('task')
-        model_cfg = cfg.model
+        model_cfg = getattr(cfg, 'model', None)
         if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
             model_cfg.type = model_cfg.model_type
-        model_type = model_cfg.type
+        model_type = getattr(model_cfg, 'type', None)
         if isinstance(device, str) and device.startswith('gpu'):
             device = 'cuda' + device[3:]
         use_hf = kwargs.pop('use_hf', None)
@@ -162,6 +163,9 @@ class Model(ABC):
                 model = model.to(device)
             return model
         # use ms
+        if cfg is None:
+            raise FileNotFoundError(
+                f'`{ModelFile.CONFIGURATION}` file not found.')
         model_cfg.model_dir = local_model_dir
 
         # install and import remote repos before build
diff --git a/modelscope/utils/automodel_utils.py b/modelscope/utils/automodel_utils.py
index afd83817..1f5de3b6 100644
--- a/modelscope/utils/automodel_utils.py
+++ b/modelscope/utils/automodel_utils.py
@@ -6,8 +6,11 @@ from modelscope.utils.ast_utils import INDEX_KEY
 from modelscope.utils.import_utils import LazyImportModule
 
 
-def can_load_by_ms(model_dir: str, tast_name: str, model_type: str) -> bool:
-    if ('MODELS', tast_name,
+def can_load_by_ms(model_dir: str, task_name: Optional[str],
+                   model_type: Optional[str]) -> bool:
+    if model_type is None or task_name is None:
+        return False
+    if ('MODELS', task_name,
             model_type) in LazyImportModule.AST_INDEX[INDEX_KEY]:
         return True
     ms_wrapper_path = os.path.join(model_dir, 'ms_wrapper.py')
@@ -25,11 +28,27 @@ def _can_load_by_hf_automodel(automodel_class: type, config) -> bool:
     return False
 
 
-def get_hf_automodel_class(model_dir: str, task_name: str) -> Optional[type]:
-    from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM,
-                            AutoModelForSeq2SeqLM,
-                            AutoModelForTokenClassification,
-                            AutoModelForSequenceClassification)
+def get_default_automodel(config) -> Optional[type]:
+    import modelscope.utils.hf_util as hf_util
+    if not hasattr(config, 'auto_map'):
+        return None
+    auto_map = config.auto_map
+    automodel_list = [k for k in auto_map.keys() if k.startswith('AutoModel')]
+    if len(automodel_list) == 1:
+        return getattr(hf_util, automodel_list[0])
+    if len(automodel_list) > 1 and len(
+            set([auto_map[k] for k in automodel_list])) == 1:
+        return getattr(hf_util, automodel_list[0])
+    return None
+
+
+def get_hf_automodel_class(model_dir: str,
+                           task_name: Optional[str]) -> Optional[type]:
+    from modelscope.utils.hf_util import (AutoConfig, AutoModel,
+                                          AutoModelForCausalLM,
+                                          AutoModelForSeq2SeqLM,
+                                          AutoModelForTokenClassification,
+                                          AutoModelForSequenceClassification)
     automodel_mapping = {
         Tasks.backbone: AutoModel,
         Tasks.chat: AutoModelForCausalLM,
@@ -37,19 +56,18 @@ def get_hf_automodel_class(model_dir: str, task_name: str) -> Optional[type]:
         Tasks.text_classification: AutoModelForSequenceClassification,
         Tasks.token_classification: AutoModelForTokenClassification,
     }
-    automodel_class = automodel_mapping.get(task_name, None)
-    if automodel_class is None:
-        return None
     config_path = os.path.join(model_dir, 'config.json')
     if not os.path.exists(config_path):
         return None
     try:
-        try:
-            config = AutoConfig.from_pretrained(
-                model_dir, trust_remote_code=True)
-        except (FileNotFoundError, ValueError):
-            return None
+        config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
+        if task_name is None:
+            automodel_class = get_default_automodel(config)
+        else:
+            automodel_class = automodel_mapping.get(task_name, None)
 
+        if automodel_class is None:
+            return None
         if _can_load_by_hf_automodel(automodel_class, config):
             return automodel_class
         if (automodel_class is AutoModelForCausalLM
@@ -71,14 +89,5 @@ def try_to_load_hf_model(model_dir: str, task_name: str,
     model = None
     if automodel_class is not None:
         # use hf
-        device_map = kwargs.get('device_map', None)
-        torch_dtype = kwargs.get('torch_dtype', None)
-        config = kwargs.get('config', None)
-
-        model = automodel_class.from_pretrained(
-            model_dir,
-            device_map=device_map,
-            torch_dtype=torch_dtype,
-            config=config,
-            trust_remote_code=True)
+        model = automodel_class.from_pretrained(model_dir, **kwargs)
     return model
diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py
index 6ef98ccf..97b4ea3d 100644
--- a/modelscope/utils/hf_util.py
+++ b/modelscope/utils/hf_util.py
@@ -21,7 +21,7 @@ from transformers.models.auto.tokenization_auto import (
     TOKENIZER_MAPPING_NAMES, get_tokenizer_config)
 
 from modelscope import snapshot_download
-from modelscope.utils.constant import Invoke
+from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke
 
 try:
     from transformers import GPTQConfig as GPTQConfigHF
@@ -84,69 +84,6 @@ patch_tokenizer_base()
 patch_model_base()
 
 
-def check_hf_code(model_dir: str, auto_class: type,
-                  trust_remote_code: bool) -> None:
-    config_path = os.path.join(model_dir, 'config.json')
-    if not os.path.exists(config_path):
-        raise FileNotFoundError(f'{config_path} is not found')
-    config_dict = PretrainedConfig.get_config_dict(config_path)[0]
-    auto_class_name = auto_class.__name__
-    if auto_class is AutoTokenizerHF:
-        tokenizer_config = get_tokenizer_config(model_dir)
-    # load from repo
-    if trust_remote_code:
-        has_remote_code = False
-        if auto_class is AutoTokenizerHF:
-            auto_map = tokenizer_config.get('auto_map', None)
-            if auto_map is not None:
-                module_name = auto_map.get(auto_class_name, None)
-                if module_name is not None:
-                    module_name = module_name[0]
-                    has_remote_code = True
-        else:
-            auto_map = config_dict.get('auto_map', None)
-            if auto_map is not None:
-                module_name = auto_map.get(auto_class_name, None)
-                has_remote_code = module_name is not None
-
-        if has_remote_code:
-            module_path = os.path.join(model_dir,
-                                       module_name.split('.')[0] + '.py')
-            if not os.path.exists(module_path):
-                raise FileNotFoundError(f'{module_path} is not found')
-            return
-
-    # trust_remote_code is False or has_remote_code is False
-    model_type = config_dict.get('model_type', None)
-    if model_type is None:
-        raise ValueError(f'`model_type` key is not found in {config_path}.')
-
-    trust_remote_code_info = '.'
-    if not trust_remote_code:
-        trust_remote_code_info = ', You can try passing `trust_remote_code=True`.'
-    if auto_class is AutoConfigHF:
-        if model_type not in CONFIG_MAPPING:
-            raise ValueError(
-                f'{model_type} not found in HF `CONFIG_MAPPING`{trust_remote_code_info}'
-            )
-    elif auto_class is AutoTokenizerHF:
-        tokenizer_class = tokenizer_config.get('tokenizer_class')
-        if tokenizer_class is not None:
-            return
-        if model_type not in TOKENIZER_MAPPING_NAMES:
-            raise ValueError(
-                f'{model_type} not found in HF `TOKENIZER_MAPPING_NAMES`{trust_remote_code_info}'
-            )
-    else:
-        mapping_names = [
-            m.model_type for m in auto_class._model_mapping.keys()
-        ]
-        if model_type not in mapping_names:
-            raise ValueError(
-                f'{model_type} not found in HF `auto_class._model_mapping`{trust_remote_code_info}'
-            )
-
-
 def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
     """Get a custom wrapper class for  auto classes to download the models from the ModelScope hub
     Args:
@@ -166,7 +103,7 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
             ignore_file_pattern = kwargs.pop('ignore_file_pattern',
                                              default_ignore_file_pattern)
             if not os.path.exists(pretrained_model_name_or_path):
-                revision = kwargs.pop('revision', None)
+                revision = kwargs.pop('revision', DEFAULT_MODEL_REVISION)
                 model_dir = snapshot_download(
                     pretrained_model_name_or_path,
                     revision=revision,
@@ -175,9 +112,6 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
             else:
                 model_dir = pretrained_model_name_or_path
 
-            if module_class is not GenerationConfigHF:
-                trust_remote_code = kwargs.get('trust_remote_code', False)
-                check_hf_code(model_dir, module_class, trust_remote_code)
             module_obj = module_class.from_pretrained(model_dir, *model_args,
                                                       **kwargs)
 

From 362872d6bb7e8beb8c02d8a27ec386a5fb023973 Mon Sep 17 00:00:00 2001
From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com>
Date: Tue, 10 Oct 2023 10:43:30 +0800
Subject: [PATCH 24/36] fix onnx backend (#580)

---
 modelscope/exporters/torch_model_exporter.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modelscope/exporters/torch_model_exporter.py b/modelscope/exporters/torch_model_exporter.py
index 473b9705..1dd1b459 100644
--- a/modelscope/exporters/torch_model_exporter.py
+++ b/modelscope/exporters/torch_model_exporter.py
@@ -233,7 +233,9 @@ class TorchModelExporter(Exporter):
             return
         onnx_model = onnx.load(output)
         onnx.checker.check_model(onnx_model)
-        ort_session = ort.InferenceSession(output)
+        ort_session = ort.InferenceSession(
+            output,
+            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
         with torch.no_grad():
             model.eval()
             outputs_origin = model.forward(

From 4d099aa603092ebceac591d2b0b405638952a366 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 11 Oct 2023 10:07:50 +0800
Subject: [PATCH 25/36] release date to 2099

---
 modelscope/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/version.py b/modelscope/version.py
index 97e90b1d..5ea46f3f 100644
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -2,4 +2,4 @@
 __version__ = '1.9.2'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
-__release_datetime__ = '2023-09-06 00:00:00'
+__release_datetime__ = '2099-09-06 00:00:00'

From 35f31b011f25357dfbfa601497eebaa2384dedda Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 12 Oct 2023 10:27:31 +0800
Subject: [PATCH 26/36] fix merge error (#582)

---
 modelscope/models/base/base_model.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py
index 69669650..eb905e12 100644
--- a/modelscope/models/base/base_model.py
+++ b/modelscope/models/base/base_model.py
@@ -9,7 +9,7 @@ from modelscope.metainfo import Tasks
 from modelscope.models.builder import build_backbone, build_model
 from modelscope.utils.automodel_utils import (can_load_by_ms,
                                               try_to_load_hf_model)
-from modelscope.utils.config import Config
+from modelscope.utils.config import Config, ConfigDict
 from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
 from modelscope.utils.device import verify_device
 from modelscope.utils.logger import get_logger
@@ -143,15 +143,10 @@ class Model(ABC):
         task_name = getattr(cfg, 'task', None)
         if 'task' in kwargs:
             task_name = kwargs.pop('task')
-        try:
-            model_cfg = cfg.model
-            if hasattr(model_cfg,
-                       'model_type') and not hasattr(model_cfg, 'type'):
-                model_cfg.type = model_cfg.model_type
-            model_type = model_cfg.type
-        except Exception:
-            model_cfg = {}
-            model_type = ''
+        model_cfg = getattr(cfg, 'model', ConfigDict())
+        if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
+            model_cfg.type = model_cfg.model_type
+        model_type = getattr(model_cfg, 'type', None)
         if isinstance(device, str) and device.startswith('gpu'):
             device = 'cuda' + device[3:]
         use_hf = kwargs.pop('use_hf', None)
@@ -163,7 +158,7 @@ class Model(ABC):
             model = try_to_load_hf_model(local_model_dir, task_name, use_hf,
                                          **kwargs)
         if model is not None:
-            device_map = kwargs.get('device_map', None)
+            device_map = kwargs.pop('device_map', None)
             if device_map is None and device is not None:
                 model = model.to(device)
             return model

From e75f5b4bc41be484e2e2e4dc3dd94d9495c4746f Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Fri, 13 Oct 2023 22:42:08 +0800
Subject: [PATCH 27/36] version to 1.9.3

---
 modelscope/version.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modelscope/version.py b/modelscope/version.py
index 97e90b1d..f7f006e7 100644
--- a/modelscope/version.py
+++ b/modelscope/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '1.9.2'
+__version__ = '1.9.3'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
-__release_datetime__ = '2023-09-06 00:00:00'
+__release_datetime__ = '2023-10-17 00:00:00'

From 087cb4e463dc5dc022954796df484f3bfe65c9ba Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Mon, 16 Oct 2023 16:13:28 +0800
Subject: [PATCH 28/36] upgrade flash attention to 2.32.2

---
 docker/scripts/install_flash_attension.sh | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/docker/scripts/install_flash_attension.sh b/docker/scripts/install_flash_attension.sh
index 6a3301c2..f37e567d 100644
--- a/docker/scripts/install_flash_attension.sh
+++ b/docker/scripts/install_flash_attension.sh
@@ -1,6 +1,4 @@
-    git clone -b v1.0.8 https://github.com/Dao-AILab/flash-attention && \
-    cd flash-attention && pip install . && \
-    pip install csrc/layer_norm && \
-    pip install csrc/rotary && \
+    git clone -b v2.3.2 https://github.com/Dao-AILab/flash-attention && \
+    cd flash-attention && python setup.py install && \
     cd .. && \
     rm -rf flash-attention

From 19e7c1c80700ca5f9544fe826b40930d01923e69 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Mon, 16 Oct 2023 22:12:31 +0800
Subject: [PATCH 29/36] fix ofa new transformers compatible issue Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14317517 * fix ofa
 new transformers compatible issue

* fix timm.layers to timm.models.layers compatible issue
---
 modelscope/models/cv/shop_segmentation/head_fpn.py   |  4 ++--
 modelscope/models/cv/shop_segmentation/models.py     |  4 ++--
 modelscope/models/cv/shop_segmentation/neck_fpn.py   |  4 ++--
 .../models/multi_modal/ofa/tokenization_ofa.py       | 12 ++++++------
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/modelscope/models/cv/shop_segmentation/head_fpn.py b/modelscope/models/cv/shop_segmentation/head_fpn.py
index dfa284d4..0d4027cb 100644
--- a/modelscope/models/cv/shop_segmentation/head_fpn.py
+++ b/modelscope/models/cv/shop_segmentation/head_fpn.py
@@ -9,8 +9,8 @@ import numpy as np
 import torch
 import torch.nn as nn
 from mmcv.cnn import ConvModule
-from timm.layers.drop import drop_path
-from timm.layers.weight_init import trunc_normal_
+from timm.models.layers.drop import drop_path
+from timm.models.layers.weight_init import trunc_normal_
 
 from .common import Upsample, resize
 
diff --git a/modelscope/models/cv/shop_segmentation/models.py b/modelscope/models/cv/shop_segmentation/models.py
index 1b07a08c..a206e9f1 100644
--- a/modelscope/models/cv/shop_segmentation/models.py
+++ b/modelscope/models/cv/shop_segmentation/models.py
@@ -11,8 +11,8 @@ from collections import OrderedDict
 import torch
 import torch.nn.functional as F
 import torch.utils.checkpoint as checkpoint
-from timm.layers.drop import drop_path
-from timm.layers.weight_init import trunc_normal_
+from timm.models.layers.drop import drop_path
+from timm.models.layers.weight_init import trunc_normal_
 from torch import nn
 
 
diff --git a/modelscope/models/cv/shop_segmentation/neck_fpn.py b/modelscope/models/cv/shop_segmentation/neck_fpn.py
index 12c11d76..d344de71 100644
--- a/modelscope/models/cv/shop_segmentation/neck_fpn.py
+++ b/modelscope/models/cv/shop_segmentation/neck_fpn.py
@@ -8,8 +8,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn import ConvModule
-from timm.layers.drop import drop_path
-from timm.layers.weight_init import trunc_normal_
+from timm.models.layers.drop import drop_path
+from timm.models.layers.weight_init import trunc_normal_
 
 from .common import resize
 
diff --git a/modelscope/models/multi_modal/ofa/tokenization_ofa.py b/modelscope/models/multi_modal/ofa/tokenization_ofa.py
index 77de7a1d..ea79a327 100644
--- a/modelscope/models/multi_modal/ofa/tokenization_ofa.py
+++ b/modelscope/models/multi_modal/ofa/tokenization_ofa.py
@@ -183,6 +183,12 @@ class OFATokenizerZH(PreTrainedTokenizer):
                  tokenize_chinese_chars=True,
                  strip_accents=None,
                  **kwargs):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
+                'model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`'
+            )
+        self.vocab = load_vocab(vocab_file)
         super().__init__(
             do_lower_case=do_lower_case,
             do_basic_tokenize=do_basic_tokenize,
@@ -199,12 +205,6 @@ class OFATokenizerZH(PreTrainedTokenizer):
             **kwargs,
         )
 
-        if not os.path.isfile(vocab_file):
-            raise ValueError(
-                f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
-                'model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`'
-            )
-        self.vocab = load_vocab(vocab_file)
         self.ids_to_tokens = collections.OrderedDict([
             (ids, tok) for tok, ids in self.vocab.items()
         ])

From 66430171ae3618dc8e86fc39910aa75da54f781f Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Tue, 17 Oct 2023 22:15:54 +0800
Subject: [PATCH 30/36] fix chatglm2 can't find tokenizer issue Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14335080 * fix
 chatglm2 can't find tokenizer issue

---
 modelscope/models/nlp/chatglm2/tokenization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope/models/nlp/chatglm2/tokenization.py b/modelscope/models/nlp/chatglm2/tokenization.py
index 7014dc9c..4523dcdd 100644
--- a/modelscope/models/nlp/chatglm2/tokenization.py
+++ b/modelscope/models/nlp/chatglm2/tokenization.py
@@ -72,7 +72,6 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
     model_input_names = ['input_ids', 'attention_mask', 'position_ids']
 
     def __init__(self, vocab_file, padding_side='left', **kwargs):
-        super().__init__(padding_side=padding_side, **kwargs)
         self.name = 'GLMTokenizer'
 
         self.vocab_file = vocab_file
@@ -82,6 +81,7 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer):
             '<eos>': self.tokenizer.eos_id,
             '<pad>': self.tokenizer.pad_id
         }
+        super().__init__(padding_side=padding_side, **kwargs)
 
     def get_command(self, token):
         if token in self.special_tokens:

From 0908e20da2756ad9434d019550d43e8f7e8e1608 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 12 Oct 2023 10:27:31 +0800
Subject: [PATCH 31/36] fix merge error (#582)

---
 modelscope/models/base/base_model.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py
index a3b65812..8e6d4ae6 100644
--- a/modelscope/models/base/base_model.py
+++ b/modelscope/models/base/base_model.py
@@ -9,7 +9,7 @@ from modelscope.metainfo import Tasks
 from modelscope.models.builder import build_backbone, build_model
 from modelscope.utils.automodel_utils import (can_load_by_ms,
                                               try_to_load_hf_model)
-from modelscope.utils.config import Config
+from modelscope.utils.config import Config, ConfigDict
 from modelscope.utils.constant import DEFAULT_MODEL_REVISION, Invoke, ModelFile
 from modelscope.utils.device import verify_device
 from modelscope.utils.logger import get_logger
@@ -142,15 +142,10 @@ class Model(ABC):
         task_name = cfg.task
         if 'task' in kwargs:
             task_name = kwargs.pop('task')
-        try:
-            model_cfg = cfg.model
-            if hasattr(model_cfg,
-                       'model_type') and not hasattr(model_cfg, 'type'):
-                model_cfg.type = model_cfg.model_type
-            model_type = model_cfg.type
-        except Exception:
-            model_cfg = {}
-            model_type = ''
+        model_cfg = getattr(cfg, 'model', ConfigDict())
+        if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
+            model_cfg.type = model_cfg.model_type
+        model_type = getattr(model_cfg, 'type', None)
         if isinstance(device, str) and device.startswith('gpu'):
             device = 'cuda' + device[3:]
         use_hf = kwargs.pop('use_hf', None)
@@ -162,7 +157,7 @@ class Model(ABC):
             model = try_to_load_hf_model(local_model_dir, task_name, use_hf,
                                          **kwargs)
         if model is not None:
-            device_map = kwargs.get('device_map', None)
+            device_map = kwargs.pop('device_map', None)
             if device_map is None and device is not None:
                 model = model.to(device)
             return model

From 6201d1bfbc29cce4e57a74f675fcb013a4020ddf Mon Sep 17 00:00:00 2001
From: "suluyan.sly" <suluyan.sly@alibaba-inc.com>
Date: Wed, 18 Oct 2023 16:29:13 +0800
Subject: [PATCH 32/36] [swingdeploy] oss examples Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14342415 *
 oss_examples

---
 modelscope/utils/pipeline_inputs.json | 66 +++++++++++++--------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/modelscope/utils/pipeline_inputs.json b/modelscope/utils/pipeline_inputs.json
index 0cb9c1b1..03a00636 100644
--- a/modelscope/utils/pipeline_inputs.json
+++ b/modelscope/utils/pipeline_inputs.json
@@ -1,17 +1,17 @@
 {
     "action-detection":{
         "input":{
-            "video":"data/test/videos/action_detection_test_video.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/action_detection_test_video.mp4"
         }
     },
     "action-recognition":{
         "input":{
-            "video":"data/test/videos/action_recognition_test_video.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/action_recognition_test_video.mp4"
         }
     },
     "animal-recognition":{
         "input":{
-            "image":"data/test/images/dogs.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/dogs.jpg"
         }
     },
     "chat":{
@@ -34,52 +34,52 @@
     },
     "domain-specific-object-detection":{
         "input":{
-            "image":"data/test/images/image_traffic_sign.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_traffic_sign.jpg"
         }
     },
     "face-2d-keypoints":{
         "input":{
-            "image":"data/test/images/face_detection.png"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/face_detection.png"
         }
     },
     "face-attribute-recognition":{
         "input":{
-            "image":"data/test/images/face_recognition_1.png"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/face_recognition_1.png"
         }
     },
     "facial-expression-recognition":{
         "input":{
-            "image":"data/test/images/facial_expression_recognition.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/facial_expression_recognition.jpg"
         }
     },
     "general-recognition":{
         "input":{
-            "image":"data/test/images/dogs.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/dogs.jpg"
         }
     },
     "human-detection":{
         "input":{
-            "image":"data/test/images/image_detection.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_detection.jpg"
         }
     },
     "image-captioning":{
         "input":{
-            "image":"data/test/images/image_captioning.png"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_captioning.png"
         }
     },
     "image-classification":{
         "input":{
-            "image":"data/test/images/content_check.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/content_check.jpg"
         }
     },
     "image-demoireing":{
         "input":{
-            "image":"data/test/images/shop_segmentation.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/shop_segmentation.jpg"
         }
     },
     "image-object-detection":{
         "input":{
-            "image":"data/test/images/image_detection.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_detection.jpg"
         }
     },
     "image-portrait-stylization":{
@@ -89,7 +89,7 @@
     },
     "image-segmentation":{
         "input":{
-            "image":"data/test/images/image_semantic_segmentation.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_semantic_segmentation.jpg"
         },
         "parameters":{
 
@@ -97,18 +97,18 @@
     },
     "image-text-retrieval":{
         "input":{
-            "image":"data/test/images/image_mplug_vqa.jpg",
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_mplug_vqa.jpg",
             "text":"What is the woman doing?"
         }
     },
     "indoor-layout-estimation":{
         "input":{
-            "image":"data/test/images/image_traffic_sign.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_traffic_sign.jpg"
         }
     },
     "live-category":{
         "input":{
-            "video":"data/test/videos/live_category_test_video.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/live_category_test_video.mp4"
         }
     },
     "motion-generation":{
@@ -132,22 +132,22 @@
     },
     "ocr-recognition":{
         "input":{
-            "image":"data/test/images/image_ocr_recognition.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_ocr_recognition.jpg"
         }
     },
     "panorama-depth-estimation":{
         "input":{
-            "image":"data/test/images/panorama_depth_estimation.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/panorama_depth_estimation.jpg"
         }
     },
     "semantic-segmentation":{
         "input":{
-            "image":"data/test/images/image_salient_detection.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_salient_detection.jpg"
         }
     },
     "shop-segmentation":{
         "input":{
-            "image":"data/test/images/shop_segmentation.jpg"
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/shop_segmentation.jpg"
         }
     },
     "text-classification":{
@@ -160,7 +160,7 @@
     },
     "text-driven-segmentation":{
         "input":{
-            "image":"data/test/images/text_driven_segmentation.jpg",
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/text_driven_segmentation.jpg",
             "text":"bear"
         }
     },
@@ -201,60 +201,60 @@
     },
     "video-captioning":{
         "input":{
-            "video":"data/test/videos/video_caption_and_qa_test.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/video_caption_and_qa_test.mp4"
         }
     },
     "video-category":{
         "input":{
-            "video":"data/test/videos/video_category_test_video.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/video_category_test_video.mp4"
         }
     },
     "video-depth-estimation":{
         "input":{
-            "video":"data/test/videos/video_depth_estimation.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/video_depth_estimation.mp4"
         }
     },
     "video-embedding":{
         "input":{
-            "video":"data/test/videos/action_recognition_test_video.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/action_recognition_test_video.mp4"
         }
     },
     "video-multi-object-tracking":{
         "input":{
-            "video":"data/test/videos/MOT17-03-partial.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/MOT17-03-partial.mp4"
         }
     },
     "video-panoptic-segmentation":{
         "input":{
-            "video":"data/test/videos/kitti-step_testing_image_02_0000.mp4"
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/kitti-step_testing_image_02_0000.mp4"
         }
     },
     "video-question-answering":{
         "input":{
-            "video":"data/test/videos/video_caption_and_qa_test.mp4",
+            "video":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/video_caption_and_qa_test.mp4",
             "text":"How many people are there?"
         }
     },
     "video-summarization":{
         "input":{
-            "text":"data/test/videos/video_category_test_video.mp4"
+            "text":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/videos/video_category_test_video.mp4"
         }
     },
     "visual-entailment":{
         "input":{
-            "image":"data/test/images/dogs.jpg",
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/dogs.jpg",
             "text":"there are two birds."
         }
     },
     "visual-grounding":{
         "input":{
-            "image":"data/test/images/visual_grounding.png",
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/visual_grounding.png",
             "text":"a blue turtle-like pokemon with round head"
         }
     },
     "visual-question-answering":{
         "input":{
-            "image":"data/test/images/image_mplug_vqa.jpg",
+            "image":"http://modelscope.oss-cn-beijing.aliyuncs.com/demo/images/image_mplug_vqa.jpg",
             "text":"What is the woman doing?"
         }
     },

From e10237074e4129dbd17457e08bf21d59e496f785 Mon Sep 17 00:00:00 2001
From: "wenmeng.zwm" <wenmeng.zwm@alibaba-inc.com>
Date: Wed, 18 Oct 2023 20:24:42 +0800
Subject: [PATCH 33/36] fix chatglm sp_tokenizer error Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14352495

---
 modelscope/models/nlp/chatglm/tokenization.py  | 7 ++++---
 modelscope/models/nlp/llama/text_generation.py | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/modelscope/models/nlp/chatglm/tokenization.py b/modelscope/models/nlp/chatglm/tokenization.py
index f5f8cd0c..6ce1b90d 100644
--- a/modelscope/models/nlp/chatglm/tokenization.py
+++ b/modelscope/models/nlp/chatglm/tokenization.py
@@ -199,6 +199,10 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
                  padding_side='left',
                  num_image_tokens=20000,
                  **kwargs) -> None:
+
+        self.sp_tokenizer = SPTokenizer(
+            vocab_file, num_image_tokens=num_image_tokens)
+
         super().__init__(
             do_lower_case=do_lower_case,
             remove_space=remove_space,
@@ -220,9 +224,6 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
         self.end_token = end_token
         self.mask_token = mask_token
         self.gmask_token = gmask_token
-
-        self.sp_tokenizer = SPTokenizer(
-            vocab_file, num_image_tokens=num_image_tokens)
         """ Initialisation """
 
     @property
diff --git a/modelscope/models/nlp/llama/text_generation.py b/modelscope/models/nlp/llama/text_generation.py
index b9cc8032..d95cae34 100644
--- a/modelscope/models/nlp/llama/text_generation.py
+++ b/modelscope/models/nlp/llama/text_generation.py
@@ -71,6 +71,8 @@ def get_chat_prompt(system: str, text: str, history: List[Tuple[str, str]],
 
 
 # This file is mainly copied from the llama code of transformers
+@MODELS.register_module(Tasks.chat, module_name=Models.llama2)
+@MODELS.register_module(Tasks.chat, module_name=Models.llama)
 @MODELS.register_module(Tasks.text_generation, module_name=Models.llama2)
 @MODELS.register_module(Tasks.chat, module_name=Models.llama2)
 @MODELS.register_module(Tasks.text_generation, module_name=Models.llama)

From f493ed007ce66ae61262a5ac7db5b0f8346e0fc1 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 18 Oct 2023 21:17:51 +0800
Subject: [PATCH 34/36] force rebuid image

---
 .dev_scripts/build_image.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh
index 9ce2a4a8..386e5aad 100644
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -150,7 +150,7 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten
 docker_file_content=`cat docker/Dockerfile.ubuntu`
 if [ "$is_ci_test" != "True" ]; then
     echo "Building ModelScope lib, will install ModelScope lib to image"
-    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U transformers && pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl "
+    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl && pip install --no-cache-dir -U transformers"
 fi
 echo "$is_dsw"
 if [ "$is_dsw" == "False" ]; then

From 8e187bdb962b7671a3d9384f80100cfc0cf4094c Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 19 Oct 2023 12:12:35 +0800
Subject: [PATCH 35/36] remove llama2 dup in chat task Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14359280 * remove
 llama2 dup in chat task

---
 modelscope/models/nlp/llama/text_generation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modelscope/models/nlp/llama/text_generation.py b/modelscope/models/nlp/llama/text_generation.py
index d95cae34..45b9d5f0 100644
--- a/modelscope/models/nlp/llama/text_generation.py
+++ b/modelscope/models/nlp/llama/text_generation.py
@@ -74,7 +74,6 @@ def get_chat_prompt(system: str, text: str, history: List[Tuple[str, str]],
 @MODELS.register_module(Tasks.chat, module_name=Models.llama2)
 @MODELS.register_module(Tasks.chat, module_name=Models.llama)
 @MODELS.register_module(Tasks.text_generation, module_name=Models.llama2)
-@MODELS.register_module(Tasks.chat, module_name=Models.llama2)
 @MODELS.register_module(Tasks.text_generation, module_name=Models.llama)
 class LlamaForTextGeneration(MsModelMixin, LlamaForCausalLM, TorchModel):
 

From b14f3464e59fce10e1eec665bfafd4cbaa252df8 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Thu, 19 Oct 2023 12:30:10 +0800
Subject: [PATCH 36/36] force rebuild image

---
 .dev_scripts/build_image.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh
index 386e5aad..c1e61890 100644
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -150,7 +150,7 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten
 docker_file_content=`cat docker/Dockerfile.ubuntu`
 if [ "$is_ci_test" != "True" ]; then
     echo "Building ModelScope lib, will install ModelScope lib to image"
-    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl && pip install --no-cache-dir -U transformers"
+    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir numpy https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl && pip install --no-cache-dir -U transformers"
 fi
 echo "$is_dsw"
 if [ "$is_dsw" == "False" ]; then