From ea324b484121937cfdca1de4c82e97c3484a21c0 Mon Sep 17 00:00:00 2001
From: "suluyan.sly" <suluyan.sly@alibaba-inc.com>
Date: Wed, 8 Nov 2023 16:10:02 +0800
Subject: [PATCH 01/14] feat: deploy checker for swingdeploy Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14575909 * feat:
 deploy checker for swingdeploy

* fix: configuration.json mismatch the revision.
---
 modelscope/utils/deploy_checker.py | 90 ++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 modelscope/utils/deploy_checker.py

diff --git a/modelscope/utils/deploy_checker.py b/modelscope/utils/deploy_checker.py
new file mode 100644
index 00000000..c57f7d64
--- /dev/null
+++ b/modelscope/utils/deploy_checker.py
@@ -0,0 +1,90 @@
+import argparse
+import os
+import traceback
+from typing import List, Union
+
+import json
+
+from modelscope.hub.api import HubApi
+from modelscope.hub.file_download import model_file_download
+from modelscope.hub.utils.utils import get_cache_dir
+from modelscope.pipelines import pipeline
+from modelscope.utils.config import Config
+from modelscope.utils.constant import ModelFile
+from modelscope.utils.input_output import (
+    call_pipeline_with_json, get_pipeline_information_by_pipeline,
+    get_task_input_examples, pipeline_output_to_service_base64_output)
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class DeployChecker:
+
+    def __init__(self):
+        self.api = HubApi()
+
+    def check_model(self, model_id: str, model_revision=None):
+        # get model_revision & task info
+        if not model_revision:
+            model_revisions = self.api.list_model_revisions(model_id)
+            logger.info(
+                f'All model_revisions of `{model_id}`: {model_revisions}')
+            if len(model_revisions):
+                model_revision = model_revisions[0]
+            else:
+                logger.error(f'{model_id} has no revision.')
+
+        configuration_file = model_file_download(
+            model_id=model_id,
+            file_path=ModelFile.CONFIGURATION,
+            revision=model_revision)
+        cfg = Config.from_file(configuration_file)
+        task = cfg.safe_get('task')
+
+        # init pipeline
+        ppl = pipeline(
+            task=task,
+            model=model_id,
+            model_revision=model_revision,
+            llm_first=True)
+        pipeline_info = get_pipeline_information_by_pipeline(ppl)
+
+        # call pipeline
+        data = get_task_input_examples(task)
+
+        infer_result = call_pipeline_with_json(pipeline_info, ppl, data)
+        result = pipeline_output_to_service_base64_output(task, infer_result)
+        return result
+
+
+def check_deploy(models: Union[str, List], revisions: Union[str, List] = None):
+    if not isinstance(models, list):
+        models = [models]
+    if not isinstance(revisions, list):
+        revisions = [revisions] * (1 if revisions else len(models))
+
+    if len(models) != len(revisions):
+        logger.error(
+            f'The number of models and revisions need to be equal: The number of models'
+            f' is {len(model)} while the number of revisions is {len(revision)}.'
+        )
+
+    checker = DeployChecker()
+    for model, revision in zip(models, revisions):
+        try:
+            res = checker.check_model(model, revision)
+            logger.info(f'{model} {revision}: Deploy pre-check pass. {res}\n')
+        except BaseException as e:
+            logger.info(
+                f'{model} {revision}: Deploy pre-check failed: {e}. {traceback.print_exc()}\n'
+            )
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_id', type=str)
+    parser.add_argument('--revision', type=str, default=None)
+    args = parser.parse_args()
+
+    check_deploy(args.model_id, args.revision)

From 00eb4219a06686816d2e97d43eb7407d3371677a Mon Sep 17 00:00:00 2001
From: myf272609 <myf272609@alibaba-inc.com>
Date: Wed, 8 Nov 2023 21:11:21 +0800
Subject: [PATCH 02/14] [to #42322933] fix issues for 3dhuman models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 角色驱动：添加自定义blender路径支持；移除模型位置标准化
- 角色渲染：添加自定义渲染分辨率支持；添加模型位置标准化
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14459360
* fix some issues

* fix
---
 .../pipelines/cv/human3d_animation_pipeline.py | 10 ++++++----
 .../pipelines/cv/human3d_render_pipeline.py    | 18 ++++++++++++------
 tests/pipelines/test_human3d_animation.py      |  1 +
 tests/pipelines/test_human3d_render.py         |  1 +
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/modelscope/pipelines/cv/human3d_animation_pipeline.py b/modelscope/pipelines/cv/human3d_animation_pipeline.py
index d03cd8a3..4e5ab46d 100644
--- a/modelscope/pipelines/cv/human3d_animation_pipeline.py
+++ b/modelscope/pipelines/cv/human3d_animation_pipeline.py
@@ -72,7 +72,7 @@ class Human3DAnimationPipeline(Pipeline):
                                      (case_name, action_name))
         exec_path = os.path.join(self.model_dir, 'skinning.py')
 
-        cmd = f'blender -b -P {exec_path}  -- --input {self.case_dir}' \
+        cmd = f'{self.blender} -b -P {exec_path}  -- --input {self.case_dir}' \
               f' --gltf_path {gltf_path} --action {self.action}'
         os.system(cmd)
         return gltf_path
@@ -83,9 +83,6 @@ class Human3DAnimationPipeline(Pipeline):
         mesh = read_obj(mesh_path)
         tex = cv2.imread(tex_path)
         vertices = mesh['vertices']
-        cent = (vertices.max(axis=0) + vertices.min(axis=0)) / 2
-        new_cent = (0, 1.8 / 2, 0)
-        vertices -= (cent - new_cent)
         mesh['vertices'] = vertices
         mesh['texture_map'] = tex
         write_obj(mesh_path, mesh)
@@ -108,6 +105,11 @@ class Human3DAnimationPipeline(Pipeline):
         else:
             save_dir = None
 
+        if 'blender' in input:
+            self.blender = input['blender']
+        else:
+            self.blender = 'blender'
+
         if case_id.endswith('.obj'):
             mesh_path = case_id
         else:
diff --git a/modelscope/pipelines/cv/human3d_render_pipeline.py b/modelscope/pipelines/cv/human3d_render_pipeline.py
index 44d0bb21..cf506d19 100644
--- a/modelscope/pipelines/cv/human3d_render_pipeline.py
+++ b/modelscope/pipelines/cv/human3d_render_pipeline.py
@@ -68,6 +68,8 @@ class Human3DRenderPipeline(Pipeline):
 
     def format_nvdiffrast_format(self, mesh, tex):
         vert = mesh['vertices']
+        cent = (vert.max(axis=0) + vert.min(axis=0)) / 2
+        vert -= cent
         tri = mesh['faces']
         tri = tri - 1 if tri.min() == 1 else tri
         vert_uv = mesh['uvs']
@@ -81,7 +83,7 @@ class Human3DRenderPipeline(Pipeline):
         tex = torch.from_numpy(tex.astype(np.float32) / 255.0).cuda()
         return vtx_pos, pos_idx, vtx_uv, uv_idx, tex
 
-    def render_scene(self, mesh_path):
+    def render_scene(self, mesh_path, resolution=512):
         if not os.path.exists(mesh_path):
             logger.info('can not found %s, use default one' % mesh_path)
             mesh_path = os.path.join(self.model_dir, '3D-assets',
@@ -99,8 +101,8 @@ class Human3DRenderPipeline(Pipeline):
         frames_normals = []
         for i in tqdm.tqdm(range(frame_length)):
             proj = projection(x=0.4, n=1.0, f=200.0)
-            a_rot = np.matmul(rotate_x(-0.1), rotate_y(ang))
-            a_mv = np.matmul(translate(0, 0, -2.5), a_rot)
+            a_rot = np.matmul(rotate_x(0.0), rotate_y(ang))
+            a_mv = np.matmul(translate(0, 0, -2.7), a_rot)
             r_mvp = np.matmul(proj, a_mv).astype(np.float32)
             pred_img, pred_mask, normal = render(
                 glctx,
@@ -110,7 +112,7 @@ class Human3DRenderPipeline(Pipeline):
                 vtx_uv,
                 uv_idx,
                 tex,
-                resolution=512,
+                resolution=resolution,
                 enable_mip=False,
                 max_mip_level=9)
             color = np.clip(
@@ -123,7 +125,7 @@ class Human3DRenderPipeline(Pipeline):
             frames_normals.append(normals)
             ang = ang + step
 
-        logger.info('load case %s done'
+        logger.info('render case %s done'
                     % os.path.basename(os.path.dirname(mesh_path)))
 
         return mesh, frames_color, frames_normals
@@ -131,6 +133,10 @@ class Human3DRenderPipeline(Pipeline):
     def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
         dataset_id = input['dataset_id']
         case_id = input['case_id']
+        if 'resolution' in input:
+            resolution = input['resolution']
+        else:
+            resolution = 512
         if case_id.endswith('.obj'):
             mesh_path = case_id
         else:
@@ -142,7 +148,7 @@ class Human3DRenderPipeline(Pipeline):
             case_dir = os.path.join(data_dir, case_id)
             mesh_path = os.path.join(case_dir, 'body.obj')
 
-        mesh, colors, normals = self.render_scene(mesh_path)
+        mesh, colors, normals = self.render_scene(mesh_path, resolution)
 
         results = {
             'mesh': mesh,
diff --git a/tests/pipelines/test_human3d_animation.py b/tests/pipelines/test_human3d_animation.py
index 75fc4c9d..97ee12f4 100644
--- a/tests/pipelines/test_human3d_animation.py
+++ b/tests/pipelines/test_human3d_animation.py
@@ -21,6 +21,7 @@ class Human3DAnimationTest(unittest.TestCase):
             'action_dataset': 'damo/3DHuman_action_dataset',
             'action': 'SwingDancing',
             'save_dir': 'outputs',
+            'blender': 'blender',
         }
         output = human3d(input)
         print('saved animation file to %s' % output)
diff --git a/tests/pipelines/test_human3d_render.py b/tests/pipelines/test_human3d_render.py
index e1840af4..47bb6a83 100644
--- a/tests/pipelines/test_human3d_render.py
+++ b/tests/pipelines/test_human3d_render.py
@@ -45,6 +45,7 @@ class Human3DRenderTest(unittest.TestCase):
         input = {
             'dataset_id': 'damo/3DHuman_synthetic_dataset',
             'case_id': '3f2a7538253e42a8',
+            'resolution': 1024,
         }
         output = human3d(input)
         self.save_results(output, './human3d_results')

From 6833bdabfc03b1afa8e3b3c30e485a41b032f004 Mon Sep 17 00:00:00 2001
From: "xingjun.wxj" <xingjun.wxj@alibaba-inc.com>
Date: Fri, 17 Nov 2023 10:46:58 +0800
Subject: [PATCH 03/14] set datasets==2.14.6 Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14593950

---
 requirements/framework.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/framework.txt b/requirements/framework.txt
index 83e69a00..4efce85d 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -1,6 +1,6 @@
 addict
 attrs
-datasets>=2.8.0,<=2.13.0
+datasets>=2.13.0,<=2.14.6
 einops
 filelock>=3.3.0
 gast>=0.2.2

From b8e86060f51b56b42f0944a07a1fabc6bbb3f613 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Mon, 27 Nov 2023 13:56:33 +0800
Subject: [PATCH 04/14] numpy version unrestrict Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13398805 * numpy
 version unrestrict

---
 requirements/tensorflow1x.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/tensorflow1x.txt b/requirements/tensorflow1x.txt
index 5d680652..c808f28f 100644
--- a/requirements/tensorflow1x.txt
+++ b/requirements/tensorflow1x.txt
@@ -1 +1 @@
-numpy<1.20.0
+numpy<=1.18.5

From 5ba9fd23079b87a14a8aa92ee297e744039bae22 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Mon, 27 Nov 2023 20:21:00 +0800
Subject: [PATCH 05/14] modify auto gptq and vllm env Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14790283 * upgrade
 to python3.10

* modify auto gptq and vllm env

* fix lint issue

* Merge remote-tracking branch 'origin/master' into python10_support

* python310 support

* build from repo

* add commit id force install modelscope every build

* add commit id force install modelscope every build

* fix cpu build issue

* fix datahub error message

* Merge branch 'python10_support' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into python10_support

* add --no-cache-dir install auto_gptq
---
 .dev_scripts/build_base_image.sh              | 42 +++++++--
 .dev_scripts/build_image.sh                   | 24 +++--
 docker/Dockerfile.ubuntu                      | 90 +++++++++----------
 docker/Dockerfile.ubuntu_base                 | 79 +++++++---------
 docker/rcfiles/conda.aliyun                   | 14 +++
 docker/rcfiles/conda.tuna                     | 15 ----
 docker/rcfiles/pip.conf.tsinghua              |  2 -
 docker/rcfiles/ubuntu2204.aliyun              | 10 +++
 docker/scripts/install_apex.sh                |  2 +-
 docker/scripts/install_colmap.sh              |  2 +-
 docker/scripts/install_flash_attension.sh     |  4 +-
 .../scripts/install_pytorch3d_nvdiffrast.sh   |  9 +-
 docker/scripts/install_tiny_cuda_nn.sh        |  3 +-
 modelscope/hub/api.py                         |  6 +-
 modelscope/hub/errors.py                      |  5 +-
 15 files changed, 167 insertions(+), 140 deletions(-)
 create mode 100644 docker/rcfiles/conda.aliyun
 delete mode 100644 docker/rcfiles/conda.tuna
 delete mode 100644 docker/rcfiles/pip.conf.tsinghua
 create mode 100644 docker/rcfiles/ubuntu2204.aliyun

diff --git a/.dev_scripts/build_base_image.sh b/.dev_scripts/build_base_image.sh
index 8c8c9a0e..872798cd 100644
--- a/.dev_scripts/build_base_image.sh
+++ b/.dev_scripts/build_base_image.sh
@@ -1,19 +1,24 @@
 #!/bin/bash
 # default values.
-BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04
+BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu
 BASE_GPU_CUDA113_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cudnn8-devel
 BASE_GPU_CUDA117_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.7.1-cudnn8-devel
 BASE_GPU_CUDA118_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.8.0-cudnn8-devel
+BASE_GPU_CUDA121_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.8.0-cudnn8-devel
+BASE_GPU_CUDA122_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.2.2-cudnn8-devel
 MODELSCOPE_REPO_ADDRESS=reg.docker.alibaba-inc.com/modelscope/modelscope
 python_version=3.7.13
 torch_version=1.11.0
 cuda_version=11.7.1
 cudatoolkit_version=11.3
 tensorflow_version=1.15.5
+os_version=20.04
 version=None
 is_cpu=False
+is_dryrun=False
 function usage(){
     echo "usage: build.sh "
+    echo "       --os=ubuntu_version set ubuntu os version, default: 20.04"
     echo "       --python=python_version set python version, default: $python_version"
     echo "       --cuda=cuda_version set cuda version,only[11.3.0, 11.7.1], fefault: $cuda_version"
     echo "       --torch=torch_version set pytorch version, fefault: $torch_version"
@@ -21,9 +26,14 @@ function usage(){
     echo "       --test option for run test before push image, only push on ci test pass"
     echo "       --cpu option for build cpu version"
     echo "       --push option for push image to remote repo"
+    echo "       --dryrun create Dockerfile not build"
 }
 for i in "$@"; do
   case $i in
+    --os=*)
+      os_version="${i#*=}"
+      shift
+      ;;
     --python=*)
       python_version="${i#*=}"
       shift
@@ -52,6 +62,10 @@ for i in "$@"; do
       is_push=True
       shift # option for push image to remote repo
       ;;
+    --dryrun)
+      is_dryrun=True
+      shift
+      ;;
     --help)
       usage
       exit 0
@@ -68,7 +82,7 @@ done
 
 if [ "$cuda_version" == 11.3.0 ]; then
     echo "Building base image cuda11.3.0"
-    BASE_GPU_IMAGE=$BASE_GPU_CUDA113_IMAGE
+    BASE_GPU_IMAGE=$os_version-$cudatoolkit_version-cudnn8-devel
     cudatoolkit_version=cu113
 elif [ "$cuda_version" == 11.7.1 ]; then
     echo "Building base image cuda11.7.1"
@@ -77,43 +91,55 @@ elif [ "$cuda_version" == 11.7.1 ]; then
 elif [ "$cuda_version" == 11.8.0 ]; then
     echo "Building base image cuda11.8.0"
     cudatoolkit_version=cu118
-    BASE_GPU_IMAGE=$BASE_GPU_CUDA118_IMAGE
+    BASE_GPU_IMAGE=$MODELSCOPE_REPO_ADDRESS:$os_version-cuda$cuda_version-cudnn8-devel
+elif [ "$cuda_version" == 12.1.0 ]; then
+    cudatoolkit_version=cu121
+    BASE_GPU_IMAGE=$BASE_GPU_CUDA121_IMAGE
 else
     echo "Unsupport cuda version: $cuda_version"
     exit 1
 fi
 
 if [ "$is_cpu" == "True" ]; then
-    export BASE_IMAGE=$BASE_CPU_IMAGE
-    base_tag=ubuntu20.04
+    export BASE_IMAGE=$BASE_CPU_IMAGE:$os_version
+    base_tag=ubuntu$os_version
     export USE_GPU=False
 else
     export BASE_IMAGE=$BASE_GPU_IMAGE
-    base_tag=ubuntu20.04-cuda$cuda_version
+    base_tag=ubuntu$os_version-cuda$cuda_version
     export USE_GPU=True
 fi
+
 if [[ $python_version == 3.7* ]]; then
     base_tag=$base_tag-py37
 elif [[ $python_version == 3.8* ]]; then
     base_tag=$base_tag-py38
+elif [[ $python_version == 3.10* ]]; then
+    base_tag=$base_tag-py310
 else
     echo "Unsupport python version: $python_version"
     exit 1
 fi
-
 target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-base
 export IMAGE_TO_BUILD=$MODELSCOPE_REPO_ADDRESS:$target_image_tag
 export PYTHON_VERSION=$python_version
 export TORCH_VERSION=$torch_version
 export CUDATOOLKIT_VERSION=$cudatoolkit_version
 export TENSORFLOW_VERSION=$tensorflow_version
+echo "From: $BASE_IMAGE build: $target_image_tag"
 echo -e "Building image with:\npython$python_version\npytorch$torch_version\ntensorflow:$tensorflow_version\ncudatoolkit:$cudatoolkit_version\ncpu:$is_cpu\n"
 docker_file_content=`cat docker/Dockerfile.ubuntu_base`
 printf "$docker_file_content" > Dockerfile
 
+if [ "$is_dryrun" == "True" ]; then
+    echo 'Dockerfile created'
+    exit 0
+fi
+
+# DOCKER_BUILDKIT=0
 while true
 do
-  docker build -t $IMAGE_TO_BUILD  \
+ DOCKER_BUILDKIT=0 docker build -t $IMAGE_TO_BUILD  \
              --build-arg USE_GPU \
              --build-arg BASE_IMAGE \
              --build-arg PYTHON_VERSION \
diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh
index dceaaa22..bb8c7e3d 100644
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -44,6 +44,8 @@ for i in "$@"; do
           cudatoolkit_version=11.7
       elif [ "$cuda_version" == "11.8.0" ]; then
           cudatoolkit_version=11.8
+      elif [ "$cuda_version" == "12.1.0" ]; then
+          cudatoolkit_version=12.1
       else
           echo "Unsupport cuda version $cuda_version"
           exit 1
@@ -130,6 +132,17 @@ elif [[ $python_version == 3.8* ]]; then
         export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda$cuda_version-py38-torch$torch_version-tf$tensorflow_version-base
     fi
     base_tag=$base_tag-py38
+elif [[ $python_version == 3.10* ]]; then
+    if [ "$is_cpu" == "True" ]; then
+        echo "Building python3.10 cpu image"
+        base_tag=ubuntu22.04-py310
+        export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-py310-torch$torch_version-tf$tensorflow_version-base
+    else
+        echo "Building python3.10 gpu image"
+        base_tag=ubuntu22.04-cuda$cuda_version-py310
+        # reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda12.1.0-py310-torch2.1.0-tf2.14.0-base
+        export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda$cuda_version-py310-torch$torch_version-tf$tensorflow_version-base
+    fi
 else
     echo "Unsupport python version: $python_version"
     exit 1
@@ -150,7 +163,8 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten
 docker_file_content=`cat docker/Dockerfile.ubuntu`
 if [ "$is_ci_test" != "True" ]; then
     echo "Building ModelScope lib, will install ModelScope lib to image"
-    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U funasr transformers && pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl "
+    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'"
+    docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH  --single-branch $REPO_URL && cd MaaS-lib && python setup.py install && cd / && rm -fr /tmp/MaaS-lib"
 fi
 echo "$is_dsw"
 if [ "$is_dsw" == "False" ]; then
@@ -160,12 +174,6 @@ else
     docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope"
     # pre compile extension
     docker_file_content="${docker_file_content} \nRUN python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'"
-    if [ "$is_cpu" == "True" ]; then
-        echo 'build cpu image'
-    else
-        # fix easycv extension and tinycudann conflict.
-        docker_file_content="${docker_file_content} \nRUN bash /tmp/install_tiny_cuda_nn.sh"
-    fi
 fi
 if [ "$is_ci_test" == "True" ]; then
     echo "Building CI image, uninstall modelscope"
@@ -175,7 +183,7 @@ printf "$docker_file_content" > Dockerfile
 
 while true
 do
-  docker build -t $IMAGE_TO_BUILD  \
+  DOCKER_BUILDKIT=0 docker build -t $IMAGE_TO_BUILD  \
              --build-arg USE_GPU \
              --build-arg BASE_IMAGE \
              --build-arg PYTHON_VERSION \
diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index 4ac4fd53..55965f83 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -1,10 +1,47 @@
 ARG BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-base
 FROM $BASE_IMAGE
-
-RUN apt-get update && apt-get install -y iputils-ping net-tools iproute2 && \
+RUN apt-get update && \
+    apt-get install -y libsox-dev unzip  zip iputils-ping telnet && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
-# install modelscope
+
+# install  jupyter plugin
+RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \
+    cp -r  /tmp/resources/jupyter_plugins/*  /root/.local/share/jupyter/labextensions/
+
+COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh
+# python3.8 pip install git+https://github.com/jin-s13/xtcocoapi.git@v1.13
+# pip install git+https://github.com/gatagat/lap.git@v0.4.0
+RUN pip install --no-cache-dir numpy 'cython<=0.29.36' funtextprocessing kwsbp==0.0.6 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+
+RUN pip install --no-cache-dir adaseq text2sql_lgesql==1.3.0 \
+         git+https://github.com/jin-s13/xtcocoapi.git@v1.14 \
+         git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps
+
+RUN mv /opt/conda/compiler_compat/ld /opt/conda/compiler_compat/ldbk && \
+         pip install --no-cache-dir mpi4py paint_ldm \
+         mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 \
+         ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+
+ARG USE_GPU
+
+
+RUN if [ "$USE_GPU" = "True" ] ; then \
+        CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git'; \
+    else \
+        echo 'cpu unsupport detectron2'; \
+    fi
+
+# torchmetrics==0.11.4 for ofa
+RUN if [ "$USE_GPU" = "True" ] ; then \
+    pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator bitsandbytes basicsr optimum && \
+    pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ && \
+    pip install --no-cache-dir -U xformers --index-url https://download.pytorch.org/whl/cu118 && \
+    pip install --no-cache-dir flash_attn==2.3.3+torch2.1cu118 tinycudann==1.7+cu118 vllm==0.2.1+cu118torch2.1 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
+    else \
+        echo 'cpu unsupport vllm auto-gptq'; \
+    fi
+
 COPY requirements /var/modelscope
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
@@ -16,47 +53,10 @@ RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r /var/modelscope/tests.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
     pip cache purge
 
-# install  jupyter plugin
-RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \
-    cp -r  /tmp/resources/jupyter_plugins/*  /root/.local/share/jupyter/labextensions/
-
-COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh
-# python3.8 pip install git+https://github.com/jin-s13/xtcocoapi.git@v1.13
-# pip install git+https://github.com/gatagat/lap.git@v0.4.0
-RUN pip install --no-cache-dir text2sql_lgesql==1.3.0 \
-         git+https://github.com/jin-s13/xtcocoapi.git@v1.13 \
-         git+https://github.com/gatagat/lap.git@v0.4.0 \
-         detectron2==0.3 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps
-
-RUN pip install --no-cache-dir mpi4py paint_ldm \
-         mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 pai-easycv ms_swift \
-         ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
-
-ARG USE_GPU
-# for cpu install cpu version faiss, faiss depends on blas lib, we install libopenblas TODO rename gpu or cpu version faiss
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 faiss==1.7.2 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
-    else \
-        pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/faiss-1.7.2-py37-none-linux_x86_64.whl safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
-    fi
-
-RUN pip install --no-cache-dir wenetruntime==1.11.0 adaseq --no-deps
 COPY examples /modelscope/examples
-
-# for pai-easycv setup compatiblity issue
 ENV SETUPTOOLS_USE_DISTUTILS=stdlib
-
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git'; \
-    else \
-        echo 'cpu unsupport detectron2'; \
-    fi
-
-# torchmetrics==0.11.4 for ofa
-RUN pip install --no-cache-dir jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr
-COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        bash /tmp/install_flash_attension.sh; \
-    else \
-        echo 'cpu unsupport flash attention'; \
-    fi
+ENV VLLM_USE_MODELSCOPE=True
+RUN cp /tmp/resources/conda.aliyun  ~/.condarc && \
+    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
+    pip config set install.trusted-host mirrors.aliyun.com && \
+    cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list
diff --git a/docker/Dockerfile.ubuntu_base b/docker/Dockerfile.ubuntu_base
index b848e1a1..7f8409fe 100644
--- a/docker/Dockerfile.ubuntu_base
+++ b/docker/Dockerfile.ubuntu_base
@@ -9,10 +9,11 @@ SHELL ["/bin/bash", "-c"]
 COPY docker/rcfiles /tmp/resources
 COPY docker/jupyter_plugins /tmp/resources/jupyter_plugins
 RUN apt-get update && apt-get install -y --reinstall ca-certificates && \
-    apt-get clean && \
-    cp /tmp/resources/sources.list.aliyun /etc/apt/sources.list && \
-    apt-get update && \
-    apt-get install -y locales wget git strace gdb sox libopenmpi-dev curl \
+    apt-get install -y apt-utils openssh-server locales wget git strace gdb sox libopenmpi-dev curl \
+    iputils-ping net-tools iproute2 autoconf automake gperf libre2-dev libssl-dev \
+    libtool libcurl4-openssl-dev libb64-dev libgoogle-perftools-dev patchelf \
+    rapidjson-dev scons software-properties-common pkg-config unzip zlib1g-dev \
+    libarchive-dev libxml2-dev libnuma-dev \
     libgeos-dev strace vim ffmpeg libsm6 tzdata language-pack-zh-hans \
     ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy libxext6 build-essential ninja-build && \
     wget https://packagecloud.io/github/git-lfs/packages/debian/bullseye/git-lfs_3.2.0_amd64.deb/download -O ./git-lfs_3.2.0_amd64.deb && \
@@ -27,33 +28,17 @@ RUN apt-get update && apt-get install -y --reinstall ca-certificates && \
     rm -rf /var/lib/apt/lists/*
 
 ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8
+RUN wget -O /tmp/boost.tar.gz         https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.gz &&     (cd /tmp && tar xzf boost.tar.gz) &&     mv /tmp/boost_1_80_0/boost /usr/include/boost
 
 #install and config python
-ARG PYTHON_VERSION=3.7.13
+ARG PYTHON_VERSION=3.10.13
 # Miniconda3-py37_23.1.0-1-Linux-x86_64.sh is last python3.7 version
-RUN if [ "$PYTHON_VERSION" = "3.7.13" ] ; then \
-    wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-py37_23.1.0-1-Linux-x86_64.sh -O ./miniconda.sh && \
+RUN  wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py310_23.9.0-0-Linux-x86_64.sh -O ./miniconda.sh && \
     /bin/bash  miniconda.sh -b -p /opt/conda && \
     rm  -f miniconda.sh && \
     ln  -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
     echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
-    cp /tmp/resources/conda.tuna  ~/.condarc && \
-    source /root/.bashrc && \
-    conda install --yes python==${PYTHON_VERSION} && \
-    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
-    pip config set install.trusted-host mirrors.aliyun.com;\
-else \
-    wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \
-    /bin/bash  miniconda.sh -b -p /opt/conda && \
-    rm  -f miniconda.sh && \
-    ln  -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
-    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
-    cp /tmp/resources/conda.tuna  ~/.condarc && \
-    source /root/.bashrc && \
-    conda install --yes python==${PYTHON_VERSION} && \
-    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
-    pip config set install.trusted-host mirrors.aliyun.com;\
-fi
+    source /root/.bashrc
 
 ARG USE_GPU=True
 
@@ -85,12 +70,6 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
         fi \
     fi
 
-# mmcv-full<=1.7.0 for mmdet3d compatible
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="5.0 5.2 6.0 6.1 7.0 7.5 8.0 8.6" MMCV_WITH_OPS=1 MAX_JOBS=8 FORCE_CUDA=1 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \
-    else \
-        MMCV_WITH_OPS=1 MAX_JOBS=8 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \
-    fi
 
 # default shell bash
 ENV SHELL=/bin/bash
@@ -98,12 +77,25 @@ ENV SHELL=/bin/bash
 RUN if [ "$USE_GPU" = "True" ] ; then \
         pip install  dgl -f https://data.dgl.ai/wheels/$CUDATOOLKIT_VERSION/repo.html; \
     else \
-        pip install --no-cache-dir dgl==0.9.0 dglgo -f https://data.dgl.ai/wheels/repo.html; \
+        pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \
     fi
 
 # copy install scripts
 COPY docker/scripts/install_unifold.sh docker/scripts/install_colmap.sh docker/scripts/install_pytorch3d_nvdiffrast.sh docker/scripts/install_tiny_cuda_nn.sh docker/scripts/install_apex.sh /tmp/
 
+# 3d supports
+RUN if [ "$USE_GPU" = "True" ] ; then \
+        bash /tmp/install_colmap.sh; \
+    else \
+     echo 'cpu unsupport colmap'; \
+    fi
+# install pytorch3d
+RUN if [ "$USE_GPU" = "True" ] ; then \
+        bash /tmp/install_pytorch3d_nvdiffrast.sh; \
+    else \
+     echo 'cpu unsupport pytorch3d nvdiffrast'; \
+    fi
+
 # for uniford
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_unifold.sh; \
@@ -112,28 +104,11 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
     fi
 
 RUN if [ "$USE_GPU" = "True" ] ; then \
-       export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \
+       export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \
     else \
      echo 'cpu unsupport Pointnet2'; \
     fi
 
-# 3d supports
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        bash /tmp/install_colmap.sh; \
-    else \
-     echo 'cpu unsupport colmap'; \
-    fi
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        bash /tmp/install_tiny_cuda_nn.sh \
-    else \
-     echo 'cpu unsupport tiny_cudann'; \
-    fi
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        bash /tmp/install_pytorch3d_nvdiffrast.sh; \
-    else \
-     echo 'cpu unsupport pytorch3d nvdiffrast'; \
-    fi
-# end of 3D
 # install apex after deepspeed
 RUN if [ "$USE_GPU" = "True" ] ; then \
         bash /tmp/install_apex.sh; \
@@ -141,4 +116,10 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
      echo 'cpu unsupport apex'; \
     fi
 
+RUN if [ "$USE_GPU" = "True" ] ; then \
+        pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/mmcv_full-1.7.0-cp310-cp310-linux_x86_64.whl; \
+    else \
+        pip install --no-cache-dir mmcv_full==1.7.0+torch2.1cpu -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
+    fi
+RUN conda install imageio-ffmpeg -c conda-forge -y
 ENTRYPOINT []
diff --git a/docker/rcfiles/conda.aliyun b/docker/rcfiles/conda.aliyun
new file mode 100644
index 00000000..d0aa2014
--- /dev/null
+++ b/docker/rcfiles/conda.aliyun
@@ -0,0 +1,14 @@
+channels:
+  - defaults
+show_channel_urls: true
+default_channels:
+  - http://mirrors.aliyun.com/anaconda/pkgs/main
+  - http://mirrors.aliyun.com/anaconda/pkgs/r
+  - http://mirrors.aliyun.com/anaconda/pkgs/msys2
+custom_channels:
+  conda-forge: http://mirrors.aliyun.com/anaconda/cloud
+  msys2: http://mirrors.aliyun.com/anaconda/cloud
+  bioconda: http://mirrors.aliyun.com/anaconda/cloud
+  menpo: http://mirrors.aliyun.com/anaconda/cloud
+  pytorch: http://mirrors.aliyun.com/anaconda/cloud
+  simpleitk: http://mirrors.aliyun.com/anaconda/cloud
diff --git a/docker/rcfiles/conda.tuna b/docker/rcfiles/conda.tuna
deleted file mode 100644
index ce8a2908..00000000
--- a/docker/rcfiles/conda.tuna
+++ /dev/null
@@ -1,15 +0,0 @@
-channels:
-  - defaults
-show_channel_urls: true
-default_channels:
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
-custom_channels:
-  conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
-  msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
-  bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
-  menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
-  pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
-  pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
-  simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud
diff --git a/docker/rcfiles/pip.conf.tsinghua b/docker/rcfiles/pip.conf.tsinghua
deleted file mode 100644
index 4242075a..00000000
--- a/docker/rcfiles/pip.conf.tsinghua
+++ /dev/null
@@ -1,2 +0,0 @@
-[global]
-index-url=https://pypi.tuna.tsinghua.edu.cn/simple
diff --git a/docker/rcfiles/ubuntu2204.aliyun b/docker/rcfiles/ubuntu2204.aliyun
new file mode 100644
index 00000000..d5dce70c
--- /dev/null
+++ b/docker/rcfiles/ubuntu2204.aliyun
@@ -0,0 +1,10 @@
+deb http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse
+#deb-src http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse
+deb http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse
+#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse
+deb http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse
+#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse
+#deb http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse
+#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse
+deb http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse
+#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse
diff --git a/docker/scripts/install_apex.sh b/docker/scripts/install_apex.sh
index 40d9f268..7ecd288b 100644
--- a/docker/scripts/install_apex.sh
+++ b/docker/scripts/install_apex.sh
@@ -2,6 +2,6 @@ export MAX_JOBS=16 \
 && git clone https://github.com/NVIDIA/apex \
 && cd apex \
 && git checkout 6bd01c4b99a84648ad5e5238a959735e6936c813 \
-&& TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.6" pip install -v --disable-pip-version-check --no-cache --global-option="--cpp_ext" --global-option="--cuda_ext" ./ \
+&& TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.9;9.0;8.6+PTX" pip install -v --disable-pip-version-check --no-cache --global-option="--cpp_ext" --global-option="--cuda_ext" ./ \
 && cd .. \
 && rm -rf apex
diff --git a/docker/scripts/install_colmap.sh b/docker/scripts/install_colmap.sh
index f21fca1d..ada7077a 100644
--- a/docker/scripts/install_colmap.sh
+++ b/docker/scripts/install_colmap.sh
@@ -8,7 +8,7 @@ wget -q https://cmake.org/files/v3.25/cmake-3.25.2-linux-x86_64.sh \
     && export CMAKE_BUILD_PARALLEL_LEVEL=36 \
     && export MAX_JOBS=16 \
     && export CUDA_ARCHITECTURES="all" \
-    && git clone --depth 1 --branch 3.8 https://github.com/colmap/colmap.git \
+    && git clone https://github.com/colmap/colmap.git \
     && cd colmap \
     && mkdir build \
     && cd build \
diff --git a/docker/scripts/install_flash_attension.sh b/docker/scripts/install_flash_attension.sh
index f37e567d..6413cca9 100644
--- a/docker/scripts/install_flash_attension.sh
+++ b/docker/scripts/install_flash_attension.sh
@@ -1,4 +1,4 @@
-    git clone -b v2.3.2 https://github.com/Dao-AILab/flash-attention && \
-    cd flash-attention && python setup.py install && \
+ git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention && \
+    cd flash-attention && MAX_JOBS=46 python setup.py install && \
     cd .. && \
     rm -rf flash-attention
diff --git a/docker/scripts/install_pytorch3d_nvdiffrast.sh b/docker/scripts/install_pytorch3d_nvdiffrast.sh
index c7880f92..c64ea7fb 100644
--- a/docker/scripts/install_pytorch3d_nvdiffrast.sh
+++ b/docker/scripts/install_pytorch3d_nvdiffrast.sh
@@ -1,6 +1,7 @@
 export CMAKE_BUILD_PARALLEL_LEVEL=36 \
         && export MAX_JOBS=36 \
-        && export CMAKE_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;86" \
+        && export CMAKE_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;8.6+PTX;87;89;90" \
+        && export TORCH_CUDA_ARCH_LIST="5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.7;8.9;9.0" \
         && git clone --branch 2.1.0 --recursive https://github.com/NVIDIA/thrust.git \
         && cd thrust \
         && mkdir build \
@@ -10,7 +11,11 @@ export CMAKE_BUILD_PARALLEL_LEVEL=36 \
         && cd ../.. \
         && rm -rf thrust \
         && pip install --no-cache-dir fvcore iopath \
-        && pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" \
+	&& curl -LO https://github.com/NVIDIA/cub/archive/2.1.0.tar.gz \
+        && tar xzf 2.1.0.tar.gz \
+        && export CUB_HOME=$PWD/cub-2.1.0 \
+        && FORCE_CUDA=1 pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" \
+        && rm -fr 2.1.0.tar.gz $PWD/cub-2.1.0 \
         && apt-get update \
         && apt-get install -y --no-install-recommends pkg-config libglvnd0 libgl1 libglx0 libegl1  libgles2 libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev  libgles2-mesa-dev -y \
         && git clone https://github.com/NVlabs/nvdiffrast.git \
diff --git a/docker/scripts/install_tiny_cuda_nn.sh b/docker/scripts/install_tiny_cuda_nn.sh
index 96ae5c72..1aaa2863 100644
--- a/docker/scripts/install_tiny_cuda_nn.sh
+++ b/docker/scripts/install_tiny_cuda_nn.sh
@@ -1,7 +1,6 @@
-export CMAKE_BUILD_PARALLEL_LEVEL=36 && export MAX_JOBS=36 && export TCNN_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;86" \
+export CMAKE_BUILD_PARALLEL_LEVEL=36 && export MAX_JOBS=36 && export TCNN_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;89;90;86" \
         && git clone --recursive https://github.com/nvlabs/tiny-cuda-nn \
         && cd tiny-cuda-nn \
-        && git checkout v1.6 \
         && cd bindings/torch \
         && python setup.py install \
         && cd ../../.. \
diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index f83defd0..45d1d442 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -600,7 +600,7 @@ class HubApi:
         cookies = ModelScopeConfig.get_cookies()
         r = self.session.get(datahub_url, cookies=cookies)
         resp = r.json()
-        datahub_raise_on_error(datahub_url, resp)
+        datahub_raise_on_error(datahub_url, resp, r)
         dataset_id = resp['Data']['Id']
         dataset_type = resp['Data']['Type']
         return dataset_id, dataset_type
@@ -613,7 +613,7 @@ class HubApi:
                              cookies=cookies,
                              headers=self.builder_headers(self.headers))
         resp = r.json()
-        datahub_raise_on_error(datahub_url, resp)
+        datahub_raise_on_error(datahub_url, resp, r)
         file_list = resp['Data']
         if file_list is None:
             raise NotExistError(
@@ -866,7 +866,7 @@ class HubApi:
             cookies=cookies,
             headers={'user-agent': ModelScopeConfig.get_user_agent()})
         resp = r.json()
-        datahub_raise_on_error(url, resp)
+        datahub_raise_on_error(url, resp, r)
         return resp['Data']
 
     def dataset_download_statistics(self, dataset_name: str, namespace: str, use_streaming: bool) -> None:
diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py
index 48bb5fe0..804cfe27 100644
--- a/modelscope/hub/errors.py
+++ b/modelscope/hub/errors.py
@@ -117,12 +117,13 @@ def raise_on_error(rsp):
         raise RequestError(rsp['Message'])
 
 
-def datahub_raise_on_error(url, rsp):
+def datahub_raise_on_error(url, rsp, http_response: requests.Response):
     """If response error, raise exception
 
     Args:
         url (str): The request url
         rsp (HTTPResponse): The server response.
+        http_response: the origin http response.
 
     Raises:
         RequestError: the http request error.
@@ -133,7 +134,7 @@ def datahub_raise_on_error(url, rsp):
     if rsp.get('Code') == HTTPStatus.OK:
         return True
     else:
-        request_id = get_request_id(rsp)
+        request_id = get_request_id(http_response)
         raise RequestError(
             f"Url = {url}, Request id={request_id} Message = {rsp.get('Message')},\
                 Please specify correct dataset_name and namespace.")

From a19fe73afb089ef4406e9fc7a68604459fff4373 Mon Sep 17 00:00:00 2001
From: "biwen.lbw" <biwen.lbw@alibaba-inc.com>
Date: Tue, 28 Nov 2023 17:17:29 +0800
Subject: [PATCH 06/14] fix numpy bug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修复numpy版本导致的bug
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14816762
* fix numpy bug
---
 modelscope/models/cv/face_reconstruction/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modelscope/models/cv/face_reconstruction/utils.py b/modelscope/models/cv/face_reconstruction/utils.py
index 655d8b2a..f23b2f70 100644
--- a/modelscope/models/cv/face_reconstruction/utils.py
+++ b/modelscope/models/cv/face_reconstruction/utils.py
@@ -767,6 +767,7 @@ def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.):
 
     # calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face
     t, s = POS(lm5p.transpose(), lm3D.transpose())
+    t = t.squeeze()
     s = rescale_factor / s
 
     # processing the image

From ae425433895e349b977137e4a67441aa59009715 Mon Sep 17 00:00:00 2001
From: "chenyafeng.cyf" <chenyafeng.cyf@alibaba-inc.com>
Date: Wed, 29 Nov 2023 10:03:52 +0800
Subject: [PATCH 07/14] fix_gpu_bug Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14822269

---
 modelscope/models/audio/sv/ERes2Net.py     | 5 ++++-
 modelscope/models/audio/sv/ERes2Net_aug.py | 6 ++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/modelscope/models/audio/sv/ERes2Net.py b/modelscope/models/audio/sv/ERes2Net.py
index 0119783c..3c07390b 100644
--- a/modelscope/models/audio/sv/ERes2Net.py
+++ b/modelscope/models/audio/sv/ERes2Net.py
@@ -19,6 +19,7 @@ from modelscope.metainfo import Models
 from modelscope.models import MODELS, TorchModel
 from modelscope.models.audio.sv.fusion import AFF
 from modelscope.utils.constant import Tasks
+from modelscope.utils.device import create_device
 
 
 class ReLU(nn.Hardtanh):
@@ -314,6 +315,7 @@ class SpeakerVerificationERes2Net(TorchModel):
         self.m_channels = self.model_config['channels']
         self.other_config = kwargs
         self.feature_dim = 80
+        self.device = create_device(self.other_config['device'])
 
         self.embedding_model = ERes2Net(
             embed_dim=self.embed_dim, m_channels=self.m_channels)
@@ -321,6 +323,7 @@ class SpeakerVerificationERes2Net(TorchModel):
         pretrained_model_name = kwargs['pretrained_model']
         self.__load_check_point(pretrained_model_name)
 
+        self.embedding_model.to(self.device)
         self.embedding_model.eval()
 
     def forward(self, audio):
@@ -333,7 +336,7 @@ class SpeakerVerificationERes2Net(TorchModel):
         ) == 2, 'modelscope error: the shape of input audio to model needs to be [N, T]'
         # audio shape: [N, T]
         feature = self.__extract_feature(audio)
-        embedding = self.embedding_model(feature)
+        embedding = self.embedding_model(feature.to(self.device))
 
         return embedding.detach().cpu()
 
diff --git a/modelscope/models/audio/sv/ERes2Net_aug.py b/modelscope/models/audio/sv/ERes2Net_aug.py
index d0739cad..5540ff3e 100644
--- a/modelscope/models/audio/sv/ERes2Net_aug.py
+++ b/modelscope/models/audio/sv/ERes2Net_aug.py
@@ -19,6 +19,7 @@ from modelscope.metainfo import Models
 from modelscope.models import MODELS, TorchModel
 from modelscope.models.audio.sv.fusion import AFF
 from modelscope.utils.constant import Tasks
+from modelscope.utils.device import create_device
 
 
 class ReLU(nn.Hardtanh):
@@ -308,12 +309,13 @@ class SpeakerVerificationERes2Net(TorchModel):
         self.model_config = model_config
         self.other_config = kwargs
         self.feature_dim = 80
-
+        self.device = create_device(self.other_config['device'])
         self.embedding_model = ERes2Net_aug()
 
         pretrained_model_name = kwargs['pretrained_model']
         self.__load_check_point(pretrained_model_name)
 
+        self.embedding_model.to(self.device)
         self.embedding_model.eval()
 
     def forward(self, audio):
@@ -326,7 +328,7 @@ class SpeakerVerificationERes2Net(TorchModel):
         ) == 2, 'modelscope error: the shape of input audio to model needs to be [N, T]'
         # audio shape: [N, T]
         feature = self.__extract_feature(audio)
-        embedding = self.embedding_model(feature)
+        embedding = self.embedding_model(feature.to(self.device))
 
         return embedding.detach().cpu()
 

From 6c7fca830732d7356cb46826f3169a147e7fad38 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 29 Nov 2023 17:37:56 +0800
Subject: [PATCH 08/14] =?UTF-8?q?=E6=94=AF=E6=8C=81modelscope=E7=9B=B4?=
 =?UTF-8?q?=E6=8E=A5=E6=8B=89=E8=B5=B7=E6=8E=A8=E7=90=86=E6=9C=8D=E5=8A=A1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14702876

* add inference server code

* add server requirement

* fix import issue

* debug

* add command line

* add llmpipeline support

* modify port to int

* add serer usage

* remove unused code

* fix lint issue

* add inference server code

* upgrade env to VLLM_USE_MODELSCOPE
---
 docs/source/server.md                         | 41 ++++++++++++++++
 modelscope/cli/cli.py                         |  2 +
 modelscope/cli/server.py                      | 40 ++++++++++++++++
 modelscope/server/__init__.py                 |  0
 modelscope/server/api/__init__.py             |  0
 modelscope/server/api/routers/__init__.py     |  0
 modelscope/server/api/routers/health.py       | 14 ++++++
 modelscope/server/api/routers/model_router.py | 45 ++++++++++++++++++
 modelscope/server/api/routers/router.py       |  8 ++++
 modelscope/server/api_server.py               | 45 ++++++++++++++++++
 modelscope/server/core/__init__.py            |  0
 modelscope/server/core/event_handlers.py      | 47 +++++++++++++++++++
 modelscope/server/models/__init__.py          |  0
 modelscope/server/models/input.py             |  8 ++++
 modelscope/server/models/output.py            | 34 ++++++++++++++
 modelscope/utils/input_output.py              | 31 ++++++++----
 requirements/svr.txt                          |  4 ++
 17 files changed, 310 insertions(+), 9 deletions(-)
 create mode 100644 docs/source/server.md
 create mode 100644 modelscope/cli/server.py
 create mode 100644 modelscope/server/__init__.py
 create mode 100644 modelscope/server/api/__init__.py
 create mode 100644 modelscope/server/api/routers/__init__.py
 create mode 100644 modelscope/server/api/routers/health.py
 create mode 100644 modelscope/server/api/routers/model_router.py
 create mode 100644 modelscope/server/api/routers/router.py
 create mode 100644 modelscope/server/api_server.py
 create mode 100644 modelscope/server/core/__init__.py
 create mode 100644 modelscope/server/core/event_handlers.py
 create mode 100644 modelscope/server/models/__init__.py
 create mode 100644 modelscope/server/models/input.py
 create mode 100644 modelscope/server/models/output.py
 create mode 100644 requirements/svr.txt

diff --git a/docs/source/server.md b/docs/source/server.md
new file mode 100644
index 00000000..150f5686
--- /dev/null
+++ b/docs/source/server.md
@@ -0,0 +1,41 @@
+# modelscope server使用
+## 1. 通用服务
+modelscope库基于fastapi开发一个简单模型服务，可以通过一条命令拉起绝大多数模型
+使用方法：
+
+```bash
+modelscope server --model_id=modelscope/Llama-2-7b-chat-ms --revision=v1.0.5
+```
+我们提供的官方镜像中也可以一个命令启动(镜像还未完成)
+```bash
+docker run --rm --name maas_dev --shm-size=50gb --gpus='"device=0"' -e MODELSCOPE_CACHE=/modelscope_cache -v /host_path_to_modelscope_cache:/modelscope_cache -p 8000:8000 reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.9.5-server modelscope server --model_id=modelscope/Llama-2-7b-chat-ms --revision=v1.0.5
+```
+服务默认监听8000端口，您也可以通过--port改变端口，默认服务提供两个接口，接口文档您可以通过
+http://ip:port/docs查看
+通过describe接口，可以获取服务输入输出信息以及输入sample数据，如下图：
+![describe](https://modelscope.oss-cn-beijing.aliyuncs.com/resource/describe.jpg)
+服务调用接口，可以直接拷贝describe接口example示例数据，如下图：
+![call](https://modelscope.oss-cn-beijing.aliyuncs.com/resource/call.jpg)
+
+## 2. vllm大模型推理
+对于LLM我们提供了vllm推理支持，目前只有部分模型支持vllm。
+
+### 2.1 vllm直接支持modelscope模型
+可以通过设置环境变量使得vllm从www.modelscope.cn下载模型。
+
+启动普通server
+```bash
+VLLM_USE_MODELSCOPE=True python -m vllm.entrypoints.api_server  --model="damo/nlp_gpt2_text-generation_english-base" --revision="v1.0.0"
+```
+启动openai兼容接口
+```bash
+VLLM_USE_MODELSCOPE=True python -m vllm.entrypoints.openai.api_server  --model="damo/nlp_gpt2_text-generation_english-base" --revision="v1.0.0"
+```
+
+如果模型在modelscope cache目录已经存在，则会直接使用cache中的模型，否则会从www.modelscope.cn下载模型。
+
+通过modelscope官方镜像启动vllm，指定端口为9090
+
+```bash
+docker run --rm --name maas_dev --shm-size=50gb --gpus='"device=0"' -e MODELSCOPE_CACHE=/modelscope_cache -v /host_path_to_modelscope_cache:/modelscope_cache -p 9090:9090 reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.9.5-server python -m vllm.entrypoints.api_server --model "modelscope/Llama-2-7b-chat-ms" --revision "v1.0.5" --port 9090
+```
diff --git a/modelscope/cli/cli.py b/modelscope/cli/cli.py
index a25502fd..d67e8aa1 100644
--- a/modelscope/cli/cli.py
+++ b/modelscope/cli/cli.py
@@ -6,6 +6,7 @@ from modelscope.cli.download import DownloadCMD
 from modelscope.cli.modelcard import ModelCardCMD
 from modelscope.cli.pipeline import PipelineCMD
 from modelscope.cli.plugins import PluginsCMD
+from modelscope.cli.server import ServerCMD
 
 
 def run_cmd():
@@ -17,6 +18,7 @@ def run_cmd():
     PluginsCMD.define_args(subparsers)
     PipelineCMD.define_args(subparsers)
     ModelCardCMD.define_args(subparsers)
+    ServerCMD.define_args(subparsers)
 
     args = parser.parse_args()
 
diff --git a/modelscope/cli/server.py b/modelscope/cli/server.py
new file mode 100644
index 00000000..2925d68f
--- /dev/null
+++ b/modelscope/cli/server.py
@@ -0,0 +1,40 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from argparse import ArgumentParser
+from string import Template
+
+import uvicorn
+
+from modelscope.cli.base import CLICommand
+from modelscope.server.api_server import add_server_args, get_app
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+current_path = os.path.dirname(os.path.abspath(__file__))
+template_path = os.path.join(current_path, 'template')
+
+
+def subparser_func(args):
+    """ Function which will be called for a specific sub parser.
+    """
+    return ServerCMD(args)
+
+
+class ServerCMD(CLICommand):
+    name = 'server'
+
+    def __init__(self, args):
+        self.args = args
+
+    @staticmethod
+    def define_args(parsers: ArgumentParser):
+        """ define args for create pipeline template command.
+        """
+        parser = parsers.add_parser(ServerCMD.name)
+        add_server_args(parser)
+        parser.set_defaults(func=subparser_func)
+
+    def execute(self):
+        app = get_app(self.args)
+        uvicorn.run(app, host=self.args.host, port=self.args.port)
diff --git a/modelscope/server/__init__.py b/modelscope/server/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/server/api/__init__.py b/modelscope/server/api/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/server/api/routers/__init__.py b/modelscope/server/api/routers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/server/api/routers/health.py b/modelscope/server/api/routers/health.py
new file mode 100644
index 00000000..2d88c58c
--- /dev/null
+++ b/modelscope/server/api/routers/health.py
@@ -0,0 +1,14 @@
+from faulthandler import disable
+from http import HTTPStatus
+from typing import Any, Dict
+
+from fastapi import APIRouter
+
+from modelscope.server.models.output import ApiResponse
+
+router = APIRouter()
+
+
+@router.get('', response_model=ApiResponse[Dict], status_code=200)
+def health() -> Any:
+    return ApiResponse[Dict](Data={}, Code=HTTPStatus.OK, Success=True)
diff --git a/modelscope/server/api/routers/model_router.py b/modelscope/server/api/routers/model_router.py
new file mode 100644
index 00000000..8d3a33f2
--- /dev/null
+++ b/modelscope/server/api/routers/model_router.py
@@ -0,0 +1,45 @@
+from fastapi import APIRouter, Body
+from pydantic import BaseModel
+from starlette.requests import Request
+
+from modelscope.utils.input_output import \
+    pipeline_output_to_service_base64_output  # noqa E125
+from modelscope.utils.input_output import call_pipeline_with_json
+
+router = APIRouter()
+
+
+@router.post('/call')
+async def inference(
+    request: Request,
+    body: BaseModel = Body(examples=[{
+        'usage': 'copy body from describe'
+    }])):  # noqa E125
+    """Inference general interface.
+
+    For image, video, audio etc binary data, need encoded with base64.
+
+    Args:
+        request (Request): The request object.
+        request_info (ModelScopeRequest): The post body.
+
+    Returns:
+        ApiResponse: For binary field, encoded with base64
+    """
+    pipeline_service = request.app.state.pipeline
+    pipeline_info = request.app.state.pipeline_info
+    request_json = await request.json()
+    result = call_pipeline_with_json(pipeline_info, pipeline_service,
+                                     request_json)
+    # convert output to json, if binary field, we need encoded.
+    output = pipeline_output_to_service_base64_output(
+        pipeline_info['task_name'], result)
+    return output
+
+
+@router.get('/describe')
+async def describe(request: Request):
+    info = {}
+    info['schema'] = request.app.state.pipeline_info
+    info['sample'] = request.app.state.pipeline_sample
+    return info
diff --git a/modelscope/server/api/routers/router.py b/modelscope/server/api/routers/router.py
new file mode 100644
index 00000000..df1a1868
--- /dev/null
+++ b/modelscope/server/api/routers/router.py
@@ -0,0 +1,8 @@
+from fastapi import APIRouter
+from starlette.routing import Route, WebSocketRoute
+
+from modelscope.server.api.routers import health, model_router
+
+api_router = APIRouter()
+api_router.include_router(model_router.router, tags=['prediction'], prefix='')
+api_router.include_router(health.router, tags=['health'], prefix='/health')
diff --git a/modelscope/server/api_server.py b/modelscope/server/api_server.py
new file mode 100644
index 00000000..99d20275
--- /dev/null
+++ b/modelscope/server/api_server.py
@@ -0,0 +1,45 @@
+import argparse
+
+import uvicorn
+from fastapi import FastAPI
+
+from modelscope.server.api.routers.router import api_router
+from modelscope.server.core.event_handlers import (start_app_handler,
+                                                   stop_app_handler)
+
+
+def get_app(args) -> FastAPI:
+    app = FastAPI(
+        title='modelscope_server',
+        version='0.1',
+        debug=True,
+        swagger_ui_parameters={'tryItOutEnabled': True})
+    app.state.args = args
+    app.include_router(api_router)
+
+    app.add_event_handler('startup', start_app_handler(app))
+    app.add_event_handler('shutdown', stop_app_handler(app))
+    return app
+
+
+def add_server_args(parser):
+    parser.add_argument(
+        '--model_id', required=True, type=str, help='The target model id')
+    parser.add_argument(
+        '--revision', required=True, type=str, help='Model revision')
+    parser.add_argument('--host', default='0.0.0.0', help='Host to listen')
+    parser.add_argument('--port', type=int, default=8000, help='Server port')
+    parser.add_argument('--debug', default='debug', help='Set debug level.')
+    parser.add_argument(
+        '--llm_first',
+        type=bool,
+        default=True,
+        help='Use LLMPipeline first for llm models.')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('modelscope_server')
+    add_server_args(parser)
+    args = parser.parse_args()
+    app = get_app(args)
+    uvicorn.run(app, host=args.host, port=args.port)
diff --git a/modelscope/server/core/__init__.py b/modelscope/server/core/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/server/core/event_handlers.py b/modelscope/server/core/event_handlers.py
new file mode 100644
index 00000000..a4f515a2
--- /dev/null
+++ b/modelscope/server/core/event_handlers.py
@@ -0,0 +1,47 @@
+from typing import Callable
+
+from fastapi import FastAPI
+
+from modelscope.utils.input_output import (  # yapf: disable
+    create_pipeline, get_pipeline_information_by_pipeline,
+    get_task_input_examples, get_task_schemas)
+from modelscope.utils.logger import get_logger
+
+# control the model start stop
+
+logger = get_logger()
+
+
+def _startup_model(app: FastAPI) -> None:
+    logger.info('download model and create pipeline')
+    app.state.pipeline = create_pipeline(app.state.args.model_id,
+                                         app.state.args.revision,
+                                         app.state.args.llm_first)
+    info = {}
+    info['task_name'] = app.state.pipeline.group_key
+    info['schema'] = get_task_schemas(app.state.pipeline.group_key)
+    app.state.pipeline_info = info
+    app.state.pipeline_sample = get_task_input_examples(
+        app.state.pipeline.group_key)
+    logger.info('pipeline created.')
+
+
+def _shutdown_model(app: FastAPI) -> None:
+    app.state.pipeline = None
+    logger.info('shutdown model service')
+
+
+def start_app_handler(app: FastAPI) -> Callable:
+
+    def startup() -> None:
+        _startup_model(app)
+
+    return startup
+
+
+def stop_app_handler(app: FastAPI) -> Callable:
+
+    def shutdown() -> None:
+        _shutdown_model(app)
+
+    return shutdown
diff --git a/modelscope/server/models/__init__.py b/modelscope/server/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modelscope/server/models/input.py b/modelscope/server/models/input.py
new file mode 100644
index 00000000..08ff9851
--- /dev/null
+++ b/modelscope/server/models/input.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel
+
+
+class ModelScopeRequest(BaseModel):
+
+    def __init__(self, input: object, parameters: object):
+        self.input = input
+        self.parameters = parameters
diff --git a/modelscope/server/models/output.py b/modelscope/server/models/output.py
new file mode 100644
index 00000000..39abcac2
--- /dev/null
+++ b/modelscope/server/models/output.py
@@ -0,0 +1,34 @@
+import datetime
+from http import HTTPStatus
+from typing import Generic, Optional, Type, TypeVar
+
+import json
+from pydantic.generics import GenericModel
+
+ResultType = TypeVar('ResultType')
+
+
+class ApiResponse(GenericModel, Generic[ResultType]):
+    Code: Optional[int] = HTTPStatus.OK
+    Success: Optional[bool] = True
+    RequestId: Optional[str] = ''
+    Message: Optional[str] = 'success'
+    Data: Optional[ResultType] = {}
+    """
+        ResultType (_type_): The response data type.
+        Failed: {'Code': 10010101004, 'Message': 'get model info failed, err: unauthorized permission',
+                 'RequestId': '', 'Success': False}
+        Success: {'Code': 200, 'Data': {}, 'Message': 'success', 'RequestId': '', 'Success': True}
+
+
+
+    def set_data(self, data=Type[ResultType]):
+        self.Data = data
+
+    def set_message(self, message):
+        self.Message = message
+
+    def toJSON(self):
+        return json.dumps(self, default=lambda o: o.isoformat() if (isinstance(o, datetime.datetime))
+                          else o.__dict__, sort_keys=True, indent=4)
+    """
diff --git a/modelscope/utils/input_output.py b/modelscope/utils/input_output.py
index 679069c1..5e3e1305 100644
--- a/modelscope/utils/input_output.py
+++ b/modelscope/utils/input_output.py
@@ -36,16 +36,18 @@ decodes relevant fields.
 Example:
     # create pipeine instance and pipeline information, save it to app
     pipeline_instance = create_pipeline('damo/cv_gpen_image-portrait-enhancement', 'v1.0.0')
+    # get pipeline information, input,output, request example.
     pipeline_info = get_pipeline_information_by_pipeline(pipeline_instance)
+    # save the pipeline and info to the app for use in subsequent request processing
     app.state.pipeline = pipeline_instance
     app.state.pipeline_info = pipeline_info
 
-    # for service schema request.
-    pipeline_info = request.app.state.pipeline_info
-    return pipeline_info.schema
-
-    # for service call request.
-    def inference(request: Request):
+    # for inference request, use call_pipeline_with_json to decode input and
+    # call pipeline, call pipeline_output_to_service_base64_output
+    # to encode necessary fields, and return the result.
+    # request and response are json format.
+    @router.post('/call')
+    async def inference(request: Request):
         pipeline_service = request.app.state.pipeline
         pipeline_info = request.app.state.pipeline_info
         request_json = await request.json()
@@ -55,19 +57,30 @@ Example:
         # convert output to json, if binary field, we need encoded.
         output = pipeline_output_to_service_base64_output(pipeline_info.task_name, result)
         return output
+
+    # Inference service input and output and sample information can be obtained through the docs interface
+    @router.get('/describe')
+    async def index(request: Request):
+        pipeline_info = request.app.state.pipeline_info
+        return pipeline_info.schema
+
 Todo:
     * Support more service input type, such as form.
 
 """
 
 
-def create_pipeline(model_id: str, revision: str):
+def create_pipeline(model_id: str, revision: str, llm_first: bool = True):
     model_configuration_file = model_file_download(
         model_id=model_id,
         file_path=ModelFile.CONFIGURATION,
         revision=revision)
     cfg = Config.from_file(model_configuration_file)
-    return pipeline(task=cfg.task, model=model_id, model_revision=revision)
+    return pipeline(
+        task=cfg.task,
+        model=model_id,
+        model_revision=revision,
+        llm_first=llm_first)
 
 
 def get_class_user_attributes(cls):
@@ -632,7 +645,7 @@ def call_pipeline_with_json(pipeline_info: PipelineInfomation,
     #     result = pipeline(**pipeline_inputs)
     # else:
     pipeline_inputs, parameters = service_base64_input_to_pipeline_input(
-        pipeline_info.task_name, body)
+        pipeline_info['task_name'], body)
     result = pipeline(pipeline_inputs, **parameters)
 
     return result
diff --git a/requirements/svr.txt b/requirements/svr.txt
new file mode 100644
index 00000000..ea439c66
--- /dev/null
+++ b/requirements/svr.txt
@@ -0,0 +1,4 @@
+fastapi
+requests
+sse-starlette
+uvicorn

From fe8bfa921996bf4bb23a28902f1015b6d088145e Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 29 Nov 2023 17:40:09 +0800
Subject: [PATCH 09/14] when build force install funasr pai-eacv etc Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14812168 * when
 build force install funasr pai-eacv etc

---
 .dev_scripts/build_image.sh     | 12 +++++++++---
 docker/Dockerfile.ubuntu        |  4 ----
 modelscope/utils/pre_compile.py |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh
index bb8c7e3d..abe7a1d9 100644
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -163,8 +163,9 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten
 docker_file_content=`cat docker/Dockerfile.ubuntu`
 if [ "$is_ci_test" != "True" ]; then
     echo "Building ModelScope lib, will install ModelScope lib to image"
-    docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'"
-    docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH  --single-branch $REPO_URL && cd MaaS-lib && python setup.py install && cd / && rm -fr /tmp/MaaS-lib"
+    docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'"
+    docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y && export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH  --single-branch $REPO_URL && cd MaaS-lib && pip install . && cd / && rm -fr /tmp/MaaS-lib"
+        MMCV_WITH_OPS=1 MAX_JOBS=32 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \
 fi
 echo "$is_dsw"
 if [ "$is_dsw" == "False" ]; then
@@ -173,12 +174,17 @@ else
     echo "Building dsw image will need set ModelScope lib cache location."
     docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope"
     # pre compile extension
-    docker_file_content="${docker_file_content} \nRUN python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'"
+    docker_file_content="${docker_file_content} \nRUN export TORCH_CUDA_ARCH_LIST='6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX' && python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'"
 fi
 if [ "$is_ci_test" == "True" ]; then
     echo "Building CI image, uninstall modelscope"
     docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y"
 fi
+docker_file_content="${docker_file_content} \n RUN cp /tmp/resources/conda.aliyun  ~/.condarc && \
+    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
+    pip config set install.trusted-host mirrors.aliyun.com && \
+    cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list "
+
 printf "$docker_file_content" > Dockerfile
 
 while true
diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index 55965f83..93308e25 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -56,7 +56,3 @@ RUN pip install --no-cache-dir --upgrade pip && \
 COPY examples /modelscope/examples
 ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 ENV VLLM_USE_MODELSCOPE=True
-RUN cp /tmp/resources/conda.aliyun  ~/.condarc && \
-    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
-    pip config set install.trusted-host mirrors.aliyun.com && \
-    cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list
diff --git a/modelscope/utils/pre_compile.py b/modelscope/utils/pre_compile.py
index 2d9d3b0d..6415f677 100644
--- a/modelscope/utils/pre_compile.py
+++ b/modelscope/utils/pre_compile.py
@@ -18,10 +18,10 @@ def pre_compile_megatron_util():
 
 def pre_compile_all():
     if torch.cuda.is_available():  # extension require cuda.
-        pre_compile_megatron_util()
         # pre compile pai-easycv
         from easycv.thirdparty.deformable_attention.functions import ms_deform_attn_func
         # extension for all platform.
+        pre_compile_megatron_util()
 
 
 if __name__ == '__main__':

From 51a1b76e91c53ae9278726c0eff228e8c9d179b1 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 29 Nov 2023 17:41:44 +0800
Subject: [PATCH 10/14] fix python3.10 compatible issue Link:
 https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14678226 * modify
 librosa version

* fix python3.10 compatible issue

* remove healpy in requirements for windowns compatible
---
 .../utils/postprocessing.py                                    | 2 +-
 modelscope/utils/pre_compile.py                                | 1 +
 requirements/audio/audio_signal.txt                            | 2 +-
 requirements/audio/audio_tts.txt                               | 2 +-
 requirements/cv.txt                                            | 3 ++-
 requirements/multi-modal.txt                                   | 2 +-
 6 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py b/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py
index 64582140..b9792688 100644
--- a/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py
+++ b/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py
@@ -109,7 +109,7 @@ class ReferYoutubeVOSPostProcess(nn.Module):
                                                     1)  # remove the padding
             # resize the masks back to their original frames dataset size for evaluation:
             original_frames_size = video_metadata['original_frame_size']
-            tuple_size = tuple(original_frames_size.cpu().numpy())
+            tuple_size = tuple(original_frames_size.cpu())
             video_pred_masks = F.interpolate(
                 video_pred_masks.float(), size=tuple_size, mode='nearest')
             video_pred_masks = video_pred_masks.to(torch.uint8).cpu()
diff --git a/modelscope/utils/pre_compile.py b/modelscope/utils/pre_compile.py
index 6415f677..cddf8704 100644
--- a/modelscope/utils/pre_compile.py
+++ b/modelscope/utils/pre_compile.py
@@ -20,6 +20,7 @@ def pre_compile_all():
     if torch.cuda.is_available():  # extension require cuda.
         # pre compile pai-easycv
         from easycv.thirdparty.deformable_attention.functions import ms_deform_attn_func
+        pre_compile_megatron_util()
         # extension for all platform.
         pre_compile_megatron_util()
 
diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt
index 023fbbdf..65f1ec61 100644
--- a/requirements/audio/audio_signal.txt
+++ b/requirements/audio/audio_signal.txt
@@ -1,6 +1,6 @@
 hdbscan
 hyperpyyaml
-librosa==0.9.2
+librosa==0.10.1
 MinDAEC
 mir_eval>=0.7
 rotary_embedding_torch>=0.1.5
diff --git a/requirements/audio/audio_tts.txt b/requirements/audio/audio_tts.txt
index 8b33f02f..5cff1b28 100644
--- a/requirements/audio/audio_tts.txt
+++ b/requirements/audio/audio_tts.txt
@@ -3,7 +3,7 @@ greenlet>=1.1.2
 inflect
 jedi>=0.18.1
 kantts
-librosa==0.9.2
+librosa==0.10.1
 lxml
 matplotlib
 msgpack>=1.0.4
diff --git a/requirements/cv.txt b/requirements/cv.txt
index ee9f5582..c8edb672 100644
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -17,7 +17,8 @@ ffmpeg>=1.4
 ffmpeg-python>=0.2.0
 ftfy
 fvcore
-healpy
+# remove for windows support
+# healpy
 imageio>=2.9.0
 imageio-ffmpeg>=0.4.2
 imgaug>=0.4.0
diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt
index 59415bb0..568ef76c 100644
--- a/requirements/multi-modal.txt
+++ b/requirements/multi-modal.txt
@@ -4,7 +4,7 @@ decord>=0.6.0
 diffusers>=0.19.0
 fairseq
 ftfy>=6.0.3
-librosa==0.9.2
+librosa==0.10.1
 opencv-python
 pycocoevalcap>=1.2
 pycocotools>=2.0.4

From a8e9e0a48f42207a6deee62b8b66e8e48726e6cc Mon Sep 17 00:00:00 2001
From: "xingjun.wxj" <xingjun.wxj@alibaba-inc.com>
Date: Fri, 1 Dec 2023 17:33:07 +0800
Subject: [PATCH 11/14] set datasets==2.14.6

---
 requirements/framework.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/framework.txt b/requirements/framework.txt
index 4efce85d..b77f6567 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -1,6 +1,6 @@
 addict
 attrs
-datasets>=2.13.0,<=2.14.6
+datasets==2.14.6
 einops
 filelock>=3.3.0
 gast>=0.2.2

From 2a991a5c6ba5a649f0135e85cfd1188de70cd374 Mon Sep 17 00:00:00 2001
From: "xingjun.wxj" <xingjun.wxj@alibaba-inc.com>
Date: Wed, 6 Dec 2023 16:25:20 +0800
Subject: [PATCH 12/14] update datasets version
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update datasets version.
compatibility check： 2.14.5, 2.14.6, 2.15.0
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14916111
---
 requirements/framework.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/framework.txt b/requirements/framework.txt
index b77f6567..8804fe8c 100644
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -1,6 +1,6 @@
 addict
 attrs
-datasets==2.14.6
+datasets>=2.14.5
 einops
 filelock>=3.3.0
 gast>=0.2.2

From 75ce66f824e6f6bb39e2d50dc92a5eecddc79cea Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Fri, 8 Dec 2023 14:16:37 +0800
Subject: [PATCH 13/14] fix exception when there is a version after sdk release
 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14949463 *
 fix exception when there is a version after sdk release

---
 modelscope/hub/api.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py
index 45d1d442..e11f2de5 100644
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -493,8 +493,9 @@ class HubApi:
                     if len(revisions) > 0:
                         revision = revisions[0]  # use latest revision before release time.
                     else:
+                        revision = MASTER_MODEL_BRANCH
                         vl = '[%s]' % ','.join(all_revisions)
-                        raise NoValidRevisionError('Model revision should be specified from revisions: %s' % (vl))
+                        logger.warning('Model revision should be specified from revisions: %s' % (vl))
                     logger.warning('Model revision not specified, use revision: %s' % revision)
                 else:
                     # use user-specified revision

From b16e24440e35f6473ea1b36eb494b3b1d4a22fea Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Fri, 8 Dec 2023 22:25:28 +0800
Subject: [PATCH 14/14] build whl with py310

---
 .github/workflows/publish.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
index 7c2e180a..dacf6df7 100644
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -15,10 +15,10 @@ jobs:
     #if: startsWith(github.event.ref, 'refs/tags')
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.7
+      - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: '3.7'
+          python-version: '3.10'
       - name: Install wheel
         run: pip install wheel && pip install -r requirements/framework.txt
       - name: Build ModelScope