From ea324b484121937cfdca1de4c82e97c3484a21c0 Mon Sep 17 00:00:00 2001 From: "suluyan.sly" Date: Wed, 8 Nov 2023 16:10:02 +0800 Subject: [PATCH 01/14] feat: deploy checker for swingdeploy Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14575909 * feat: deploy checker for swingdeploy * fix: configuration.json mismatch the revision. --- modelscope/utils/deploy_checker.py | 90 ++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 modelscope/utils/deploy_checker.py diff --git a/modelscope/utils/deploy_checker.py b/modelscope/utils/deploy_checker.py new file mode 100644 index 00000000..c57f7d64 --- /dev/null +++ b/modelscope/utils/deploy_checker.py @@ -0,0 +1,90 @@ +import argparse +import os +import traceback +from typing import List, Union + +import json + +from modelscope.hub.api import HubApi +from modelscope.hub.file_download import model_file_download +from modelscope.hub.utils.utils import get_cache_dir +from modelscope.pipelines import pipeline +from modelscope.utils.config import Config +from modelscope.utils.constant import ModelFile +from modelscope.utils.input_output import ( + call_pipeline_with_json, get_pipeline_information_by_pipeline, + get_task_input_examples, pipeline_output_to_service_base64_output) +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +class DeployChecker: + + def __init__(self): + self.api = HubApi() + + def check_model(self, model_id: str, model_revision=None): + # get model_revision & task info + if not model_revision: + model_revisions = self.api.list_model_revisions(model_id) + logger.info( + f'All model_revisions of `{model_id}`: {model_revisions}') + if len(model_revisions): + model_revision = model_revisions[0] + else: + logger.error(f'{model_id} has no revision.') + + configuration_file = model_file_download( + model_id=model_id, + file_path=ModelFile.CONFIGURATION, + revision=model_revision) + cfg = Config.from_file(configuration_file) + task = cfg.safe_get('task') + + # init pipeline + ppl = pipeline( + task=task, + model=model_id, + model_revision=model_revision, + llm_first=True) + pipeline_info = get_pipeline_information_by_pipeline(ppl) + + # call pipeline + data = get_task_input_examples(task) + + infer_result = call_pipeline_with_json(pipeline_info, ppl, data) + result = pipeline_output_to_service_base64_output(task, infer_result) + return result + + +def check_deploy(models: Union[str, List], revisions: Union[str, List] = None): + if not isinstance(models, list): + models = [models] + if not isinstance(revisions, list): + revisions = [revisions] * (1 if revisions else len(models)) + + if len(models) != len(revisions): + logger.error( + f'The number of models and revisions need to be equal: The number of models' + f' is {len(model)} while the number of revisions is {len(revision)}.' + ) + + checker = DeployChecker() + for model, revision in zip(models, revisions): + try: + res = checker.check_model(model, revision) + logger.info(f'{model} {revision}: Deploy pre-check pass. {res}\n') + except BaseException as e: + logger.info( + f'{model} {revision}: Deploy pre-check failed: {e}. {traceback.print_exc()}\n' + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model_id', type=str) + parser.add_argument('--revision', type=str, default=None) + args = parser.parse_args() + + check_deploy(args.model_id, args.revision) From 00eb4219a06686816d2e97d43eb7407d3371677a Mon Sep 17 00:00:00 2001 From: myf272609 Date: Wed, 8 Nov 2023 21:11:21 +0800 Subject: [PATCH 02/14] [to #42322933] fix issues for 3dhuman models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 角色驱动:添加自定义blender路径支持;移除模型位置标准化 - 角色渲染:添加自定义渲染分辨率支持;添加模型位置标准化 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14459360 * fix some issues * fix --- .../pipelines/cv/human3d_animation_pipeline.py | 10 ++++++---- .../pipelines/cv/human3d_render_pipeline.py | 18 ++++++++++++------ tests/pipelines/test_human3d_animation.py | 1 + tests/pipelines/test_human3d_render.py | 1 + 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/modelscope/pipelines/cv/human3d_animation_pipeline.py b/modelscope/pipelines/cv/human3d_animation_pipeline.py index d03cd8a3..4e5ab46d 100644 --- a/modelscope/pipelines/cv/human3d_animation_pipeline.py +++ b/modelscope/pipelines/cv/human3d_animation_pipeline.py @@ -72,7 +72,7 @@ class Human3DAnimationPipeline(Pipeline): (case_name, action_name)) exec_path = os.path.join(self.model_dir, 'skinning.py') - cmd = f'blender -b -P {exec_path} -- --input {self.case_dir}' \ + cmd = f'{self.blender} -b -P {exec_path} -- --input {self.case_dir}' \ f' --gltf_path {gltf_path} --action {self.action}' os.system(cmd) return gltf_path @@ -83,9 +83,6 @@ class Human3DAnimationPipeline(Pipeline): mesh = read_obj(mesh_path) tex = cv2.imread(tex_path) vertices = mesh['vertices'] - cent = (vertices.max(axis=0) + vertices.min(axis=0)) / 2 - new_cent = (0, 1.8 / 2, 0) - vertices -= (cent - new_cent) mesh['vertices'] = vertices mesh['texture_map'] = tex write_obj(mesh_path, mesh) @@ -108,6 +105,11 @@ class Human3DAnimationPipeline(Pipeline): else: save_dir = None + if 'blender' in input: + self.blender = input['blender'] + else: + self.blender = 'blender' + if case_id.endswith('.obj'): mesh_path = case_id else: diff --git a/modelscope/pipelines/cv/human3d_render_pipeline.py b/modelscope/pipelines/cv/human3d_render_pipeline.py index 44d0bb21..cf506d19 100644 --- a/modelscope/pipelines/cv/human3d_render_pipeline.py +++ b/modelscope/pipelines/cv/human3d_render_pipeline.py @@ -68,6 +68,8 @@ class Human3DRenderPipeline(Pipeline): def format_nvdiffrast_format(self, mesh, tex): vert = mesh['vertices'] + cent = (vert.max(axis=0) + vert.min(axis=0)) / 2 + vert -= cent tri = mesh['faces'] tri = tri - 1 if tri.min() == 1 else tri vert_uv = mesh['uvs'] @@ -81,7 +83,7 @@ class Human3DRenderPipeline(Pipeline): tex = torch.from_numpy(tex.astype(np.float32) / 255.0).cuda() return vtx_pos, pos_idx, vtx_uv, uv_idx, tex - def render_scene(self, mesh_path): + def render_scene(self, mesh_path, resolution=512): if not os.path.exists(mesh_path): logger.info('can not found %s, use default one' % mesh_path) mesh_path = os.path.join(self.model_dir, '3D-assets', @@ -99,8 +101,8 @@ class Human3DRenderPipeline(Pipeline): frames_normals = [] for i in tqdm.tqdm(range(frame_length)): proj = projection(x=0.4, n=1.0, f=200.0) - a_rot = np.matmul(rotate_x(-0.1), rotate_y(ang)) - a_mv = np.matmul(translate(0, 0, -2.5), a_rot) + a_rot = np.matmul(rotate_x(0.0), rotate_y(ang)) + a_mv = np.matmul(translate(0, 0, -2.7), a_rot) r_mvp = np.matmul(proj, a_mv).astype(np.float32) pred_img, pred_mask, normal = render( glctx, @@ -110,7 +112,7 @@ class Human3DRenderPipeline(Pipeline): vtx_uv, uv_idx, tex, - resolution=512, + resolution=resolution, enable_mip=False, max_mip_level=9) color = np.clip( @@ -123,7 +125,7 @@ class Human3DRenderPipeline(Pipeline): frames_normals.append(normals) ang = ang + step - logger.info('load case %s done' + logger.info('render case %s done' % os.path.basename(os.path.dirname(mesh_path))) return mesh, frames_color, frames_normals @@ -131,6 +133,10 @@ class Human3DRenderPipeline(Pipeline): def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: dataset_id = input['dataset_id'] case_id = input['case_id'] + if 'resolution' in input: + resolution = input['resolution'] + else: + resolution = 512 if case_id.endswith('.obj'): mesh_path = case_id else: @@ -142,7 +148,7 @@ class Human3DRenderPipeline(Pipeline): case_dir = os.path.join(data_dir, case_id) mesh_path = os.path.join(case_dir, 'body.obj') - mesh, colors, normals = self.render_scene(mesh_path) + mesh, colors, normals = self.render_scene(mesh_path, resolution) results = { 'mesh': mesh, diff --git a/tests/pipelines/test_human3d_animation.py b/tests/pipelines/test_human3d_animation.py index 75fc4c9d..97ee12f4 100644 --- a/tests/pipelines/test_human3d_animation.py +++ b/tests/pipelines/test_human3d_animation.py @@ -21,6 +21,7 @@ class Human3DAnimationTest(unittest.TestCase): 'action_dataset': 'damo/3DHuman_action_dataset', 'action': 'SwingDancing', 'save_dir': 'outputs', + 'blender': 'blender', } output = human3d(input) print('saved animation file to %s' % output) diff --git a/tests/pipelines/test_human3d_render.py b/tests/pipelines/test_human3d_render.py index e1840af4..47bb6a83 100644 --- a/tests/pipelines/test_human3d_render.py +++ b/tests/pipelines/test_human3d_render.py @@ -45,6 +45,7 @@ class Human3DRenderTest(unittest.TestCase): input = { 'dataset_id': 'damo/3DHuman_synthetic_dataset', 'case_id': '3f2a7538253e42a8', + 'resolution': 1024, } output = human3d(input) self.save_results(output, './human3d_results') From 6833bdabfc03b1afa8e3b3c30e485a41b032f004 Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Fri, 17 Nov 2023 10:46:58 +0800 Subject: [PATCH 03/14] set datasets==2.14.6 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14593950 --- requirements/framework.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index 83e69a00..4efce85d 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -datasets>=2.8.0,<=2.13.0 +datasets>=2.13.0,<=2.14.6 einops filelock>=3.3.0 gast>=0.2.2 From b8e86060f51b56b42f0944a07a1fabc6bbb3f613 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Mon, 27 Nov 2023 13:56:33 +0800 Subject: [PATCH 04/14] numpy version unrestrict Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/13398805 * numpy version unrestrict --- requirements/tensorflow1x.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/tensorflow1x.txt b/requirements/tensorflow1x.txt index 5d680652..c808f28f 100644 --- a/requirements/tensorflow1x.txt +++ b/requirements/tensorflow1x.txt @@ -1 +1 @@ -numpy<1.20.0 +numpy<=1.18.5 From 5ba9fd23079b87a14a8aa92ee297e744039bae22 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Mon, 27 Nov 2023 20:21:00 +0800 Subject: [PATCH 05/14] modify auto gptq and vllm env Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14790283 * upgrade to python3.10 * modify auto gptq and vllm env * fix lint issue * Merge remote-tracking branch 'origin/master' into python10_support * python310 support * build from repo * add commit id force install modelscope every build * add commit id force install modelscope every build * fix cpu build issue * fix datahub error message * Merge branch 'python10_support' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into python10_support * add --no-cache-dir install auto_gptq --- .dev_scripts/build_base_image.sh | 42 +++++++-- .dev_scripts/build_image.sh | 24 +++-- docker/Dockerfile.ubuntu | 90 +++++++++---------- docker/Dockerfile.ubuntu_base | 79 +++++++--------- docker/rcfiles/conda.aliyun | 14 +++ docker/rcfiles/conda.tuna | 15 ---- docker/rcfiles/pip.conf.tsinghua | 2 - docker/rcfiles/ubuntu2204.aliyun | 10 +++ docker/scripts/install_apex.sh | 2 +- docker/scripts/install_colmap.sh | 2 +- docker/scripts/install_flash_attension.sh | 4 +- .../scripts/install_pytorch3d_nvdiffrast.sh | 9 +- docker/scripts/install_tiny_cuda_nn.sh | 3 +- modelscope/hub/api.py | 6 +- modelscope/hub/errors.py | 5 +- 15 files changed, 167 insertions(+), 140 deletions(-) create mode 100644 docker/rcfiles/conda.aliyun delete mode 100644 docker/rcfiles/conda.tuna delete mode 100644 docker/rcfiles/pip.conf.tsinghua create mode 100644 docker/rcfiles/ubuntu2204.aliyun diff --git a/.dev_scripts/build_base_image.sh b/.dev_scripts/build_base_image.sh index 8c8c9a0e..872798cd 100644 --- a/.dev_scripts/build_base_image.sh +++ b/.dev_scripts/build_base_image.sh @@ -1,19 +1,24 @@ #!/bin/bash # default values. -BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04 +BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu BASE_GPU_CUDA113_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cudnn8-devel BASE_GPU_CUDA117_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.7.1-cudnn8-devel BASE_GPU_CUDA118_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.8.0-cudnn8-devel +BASE_GPU_CUDA121_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.8.0-cudnn8-devel +BASE_GPU_CUDA122_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.2.2-cudnn8-devel MODELSCOPE_REPO_ADDRESS=reg.docker.alibaba-inc.com/modelscope/modelscope python_version=3.7.13 torch_version=1.11.0 cuda_version=11.7.1 cudatoolkit_version=11.3 tensorflow_version=1.15.5 +os_version=20.04 version=None is_cpu=False +is_dryrun=False function usage(){ echo "usage: build.sh " + echo " --os=ubuntu_version set ubuntu os version, default: 20.04" echo " --python=python_version set python version, default: $python_version" echo " --cuda=cuda_version set cuda version,only[11.3.0, 11.7.1], fefault: $cuda_version" echo " --torch=torch_version set pytorch version, fefault: $torch_version" @@ -21,9 +26,14 @@ function usage(){ echo " --test option for run test before push image, only push on ci test pass" echo " --cpu option for build cpu version" echo " --push option for push image to remote repo" + echo " --dryrun create Dockerfile not build" } for i in "$@"; do case $i in + --os=*) + os_version="${i#*=}" + shift + ;; --python=*) python_version="${i#*=}" shift @@ -52,6 +62,10 @@ for i in "$@"; do is_push=True shift # option for push image to remote repo ;; + --dryrun) + is_dryrun=True + shift + ;; --help) usage exit 0 @@ -68,7 +82,7 @@ done if [ "$cuda_version" == 11.3.0 ]; then echo "Building base image cuda11.3.0" - BASE_GPU_IMAGE=$BASE_GPU_CUDA113_IMAGE + BASE_GPU_IMAGE=$os_version-$cudatoolkit_version-cudnn8-devel cudatoolkit_version=cu113 elif [ "$cuda_version" == 11.7.1 ]; then echo "Building base image cuda11.7.1" @@ -77,43 +91,55 @@ elif [ "$cuda_version" == 11.7.1 ]; then elif [ "$cuda_version" == 11.8.0 ]; then echo "Building base image cuda11.8.0" cudatoolkit_version=cu118 - BASE_GPU_IMAGE=$BASE_GPU_CUDA118_IMAGE + BASE_GPU_IMAGE=$MODELSCOPE_REPO_ADDRESS:$os_version-cuda$cuda_version-cudnn8-devel +elif [ "$cuda_version" == 12.1.0 ]; then + cudatoolkit_version=cu121 + BASE_GPU_IMAGE=$BASE_GPU_CUDA121_IMAGE else echo "Unsupport cuda version: $cuda_version" exit 1 fi if [ "$is_cpu" == "True" ]; then - export BASE_IMAGE=$BASE_CPU_IMAGE - base_tag=ubuntu20.04 + export BASE_IMAGE=$BASE_CPU_IMAGE:$os_version + base_tag=ubuntu$os_version export USE_GPU=False else export BASE_IMAGE=$BASE_GPU_IMAGE - base_tag=ubuntu20.04-cuda$cuda_version + base_tag=ubuntu$os_version-cuda$cuda_version export USE_GPU=True fi + if [[ $python_version == 3.7* ]]; then base_tag=$base_tag-py37 elif [[ $python_version == 3.8* ]]; then base_tag=$base_tag-py38 +elif [[ $python_version == 3.10* ]]; then + base_tag=$base_tag-py310 else echo "Unsupport python version: $python_version" exit 1 fi - target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-base export IMAGE_TO_BUILD=$MODELSCOPE_REPO_ADDRESS:$target_image_tag export PYTHON_VERSION=$python_version export TORCH_VERSION=$torch_version export CUDATOOLKIT_VERSION=$cudatoolkit_version export TENSORFLOW_VERSION=$tensorflow_version +echo "From: $BASE_IMAGE build: $target_image_tag" echo -e "Building image with:\npython$python_version\npytorch$torch_version\ntensorflow:$tensorflow_version\ncudatoolkit:$cudatoolkit_version\ncpu:$is_cpu\n" docker_file_content=`cat docker/Dockerfile.ubuntu_base` printf "$docker_file_content" > Dockerfile +if [ "$is_dryrun" == "True" ]; then + echo 'Dockerfile created' + exit 0 +fi + +# DOCKER_BUILDKIT=0 while true do - docker build -t $IMAGE_TO_BUILD \ + DOCKER_BUILDKIT=0 docker build -t $IMAGE_TO_BUILD \ --build-arg USE_GPU \ --build-arg BASE_IMAGE \ --build-arg PYTHON_VERSION \ diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh index dceaaa22..bb8c7e3d 100644 --- a/.dev_scripts/build_image.sh +++ b/.dev_scripts/build_image.sh @@ -44,6 +44,8 @@ for i in "$@"; do cudatoolkit_version=11.7 elif [ "$cuda_version" == "11.8.0" ]; then cudatoolkit_version=11.8 + elif [ "$cuda_version" == "12.1.0" ]; then + cudatoolkit_version=12.1 else echo "Unsupport cuda version $cuda_version" exit 1 @@ -130,6 +132,17 @@ elif [[ $python_version == 3.8* ]]; then export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda$cuda_version-py38-torch$torch_version-tf$tensorflow_version-base fi base_tag=$base_tag-py38 +elif [[ $python_version == 3.10* ]]; then + if [ "$is_cpu" == "True" ]; then + echo "Building python3.10 cpu image" + base_tag=ubuntu22.04-py310 + export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-py310-torch$torch_version-tf$tensorflow_version-base + else + echo "Building python3.10 gpu image" + base_tag=ubuntu22.04-cuda$cuda_version-py310 + # reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda12.1.0-py310-torch2.1.0-tf2.14.0-base + export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda$cuda_version-py310-torch$torch_version-tf$tensorflow_version-base + fi else echo "Unsupport python version: $python_version" exit 1 @@ -150,7 +163,8 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten docker_file_content=`cat docker/Dockerfile.ubuntu` if [ "$is_ci_test" != "True" ]; then echo "Building ModelScope lib, will install ModelScope lib to image" - docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U funasr transformers && pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl " + docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'" + docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH --single-branch $REPO_URL && cd MaaS-lib && python setup.py install && cd / && rm -fr /tmp/MaaS-lib" fi echo "$is_dsw" if [ "$is_dsw" == "False" ]; then @@ -160,12 +174,6 @@ else docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope" # pre compile extension docker_file_content="${docker_file_content} \nRUN python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'" - if [ "$is_cpu" == "True" ]; then - echo 'build cpu image' - else - # fix easycv extension and tinycudann conflict. - docker_file_content="${docker_file_content} \nRUN bash /tmp/install_tiny_cuda_nn.sh" - fi fi if [ "$is_ci_test" == "True" ]; then echo "Building CI image, uninstall modelscope" @@ -175,7 +183,7 @@ printf "$docker_file_content" > Dockerfile while true do - docker build -t $IMAGE_TO_BUILD \ + DOCKER_BUILDKIT=0 docker build -t $IMAGE_TO_BUILD \ --build-arg USE_GPU \ --build-arg BASE_IMAGE \ --build-arg PYTHON_VERSION \ diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 4ac4fd53..55965f83 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -1,10 +1,47 @@ ARG BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-base FROM $BASE_IMAGE - -RUN apt-get update && apt-get install -y iputils-ping net-tools iproute2 && \ +RUN apt-get update && \ + apt-get install -y libsox-dev unzip zip iputils-ping telnet && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# install modelscope + +# install jupyter plugin +RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \ + cp -r /tmp/resources/jupyter_plugins/* /root/.local/share/jupyter/labextensions/ + +COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh +# python3.8 pip install git+https://github.com/jin-s13/xtcocoapi.git@v1.13 +# pip install git+https://github.com/gatagat/lap.git@v0.4.0 +RUN pip install --no-cache-dir numpy 'cython<=0.29.36' funtextprocessing kwsbp==0.0.6 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + +RUN pip install --no-cache-dir adaseq text2sql_lgesql==1.3.0 \ + git+https://github.com/jin-s13/xtcocoapi.git@v1.14 \ + git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps + +RUN mv /opt/conda/compiler_compat/ld /opt/conda/compiler_compat/ldbk && \ + pip install --no-cache-dir mpi4py paint_ldm \ + mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 \ + ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + +ARG USE_GPU + + +RUN if [ "$USE_GPU" = "True" ] ; then \ + CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git'; \ + else \ + echo 'cpu unsupport detectron2'; \ + fi + +# torchmetrics==0.11.4 for ofa +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator bitsandbytes basicsr optimum && \ + pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ && \ + pip install --no-cache-dir -U xformers --index-url https://download.pytorch.org/whl/cu118 && \ + pip install --no-cache-dir flash_attn==2.3.3+torch2.1cu118 tinycudann==1.7+cu118 vllm==0.2.1+cu118torch2.1 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ + else \ + echo 'cpu unsupport vllm auto-gptq'; \ + fi + COPY requirements /var/modelscope RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ @@ -16,47 +53,10 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /var/modelscope/tests.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip cache purge -# install jupyter plugin -RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \ - cp -r /tmp/resources/jupyter_plugins/* /root/.local/share/jupyter/labextensions/ - -COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh -# python3.8 pip install git+https://github.com/jin-s13/xtcocoapi.git@v1.13 -# pip install git+https://github.com/gatagat/lap.git@v0.4.0 -RUN pip install --no-cache-dir text2sql_lgesql==1.3.0 \ - git+https://github.com/jin-s13/xtcocoapi.git@v1.13 \ - git+https://github.com/gatagat/lap.git@v0.4.0 \ - detectron2==0.3 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps - -RUN pip install --no-cache-dir mpi4py paint_ldm \ - mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 pai-easycv ms_swift \ - ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - -ARG USE_GPU -# for cpu install cpu version faiss, faiss depends on blas lib, we install libopenblas TODO rename gpu or cpu version faiss -RUN if [ "$USE_GPU" = "True" ] ; then \ - pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 faiss==1.7.2 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ - else \ - pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/faiss-1.7.2-py37-none-linux_x86_64.whl safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ - fi - -RUN pip install --no-cache-dir wenetruntime==1.11.0 adaseq --no-deps COPY examples /modelscope/examples - -# for pai-easycv setup compatiblity issue ENV SETUPTOOLS_USE_DISTUTILS=stdlib - -RUN if [ "$USE_GPU" = "True" ] ; then \ - CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git'; \ - else \ - echo 'cpu unsupport detectron2'; \ - fi - -# torchmetrics==0.11.4 for ofa -RUN pip install --no-cache-dir jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr -COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_flash_attension.sh; \ - else \ - echo 'cpu unsupport flash attention'; \ - fi +ENV VLLM_USE_MODELSCOPE=True +RUN cp /tmp/resources/conda.aliyun ~/.condarc && \ + pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + pip config set install.trusted-host mirrors.aliyun.com && \ + cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list diff --git a/docker/Dockerfile.ubuntu_base b/docker/Dockerfile.ubuntu_base index b848e1a1..7f8409fe 100644 --- a/docker/Dockerfile.ubuntu_base +++ b/docker/Dockerfile.ubuntu_base @@ -9,10 +9,11 @@ SHELL ["/bin/bash", "-c"] COPY docker/rcfiles /tmp/resources COPY docker/jupyter_plugins /tmp/resources/jupyter_plugins RUN apt-get update && apt-get install -y --reinstall ca-certificates && \ - apt-get clean && \ - cp /tmp/resources/sources.list.aliyun /etc/apt/sources.list && \ - apt-get update && \ - apt-get install -y locales wget git strace gdb sox libopenmpi-dev curl \ + apt-get install -y apt-utils openssh-server locales wget git strace gdb sox libopenmpi-dev curl \ + iputils-ping net-tools iproute2 autoconf automake gperf libre2-dev libssl-dev \ + libtool libcurl4-openssl-dev libb64-dev libgoogle-perftools-dev patchelf \ + rapidjson-dev scons software-properties-common pkg-config unzip zlib1g-dev \ + libarchive-dev libxml2-dev libnuma-dev \ libgeos-dev strace vim ffmpeg libsm6 tzdata language-pack-zh-hans \ ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy libxext6 build-essential ninja-build && \ wget https://packagecloud.io/github/git-lfs/packages/debian/bullseye/git-lfs_3.2.0_amd64.deb/download -O ./git-lfs_3.2.0_amd64.deb && \ @@ -27,33 +28,17 @@ RUN apt-get update && apt-get install -y --reinstall ca-certificates && \ rm -rf /var/lib/apt/lists/* ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8 +RUN wget -O /tmp/boost.tar.gz https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.gz && (cd /tmp && tar xzf boost.tar.gz) && mv /tmp/boost_1_80_0/boost /usr/include/boost #install and config python -ARG PYTHON_VERSION=3.7.13 +ARG PYTHON_VERSION=3.10.13 # Miniconda3-py37_23.1.0-1-Linux-x86_64.sh is last python3.7 version -RUN if [ "$PYTHON_VERSION" = "3.7.13" ] ; then \ - wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-py37_23.1.0-1-Linux-x86_64.sh -O ./miniconda.sh && \ +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py310_23.9.0-0-Linux-x86_64.sh -O ./miniconda.sh && \ /bin/bash miniconda.sh -b -p /opt/conda && \ rm -f miniconda.sh && \ ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - cp /tmp/resources/conda.tuna ~/.condarc && \ - source /root/.bashrc && \ - conda install --yes python==${PYTHON_VERSION} && \ - pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip config set install.trusted-host mirrors.aliyun.com;\ -else \ - wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \ - /bin/bash miniconda.sh -b -p /opt/conda && \ - rm -f miniconda.sh && \ - ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - cp /tmp/resources/conda.tuna ~/.condarc && \ - source /root/.bashrc && \ - conda install --yes python==${PYTHON_VERSION} && \ - pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip config set install.trusted-host mirrors.aliyun.com;\ -fi + source /root/.bashrc ARG USE_GPU=True @@ -85,12 +70,6 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ fi \ fi -# mmcv-full<=1.7.0 for mmdet3d compatible -RUN if [ "$USE_GPU" = "True" ] ; then \ - CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="5.0 5.2 6.0 6.1 7.0 7.5 8.0 8.6" MMCV_WITH_OPS=1 MAX_JOBS=8 FORCE_CUDA=1 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \ - else \ - MMCV_WITH_OPS=1 MAX_JOBS=8 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \ - fi # default shell bash ENV SHELL=/bin/bash @@ -98,12 +77,25 @@ ENV SHELL=/bin/bash RUN if [ "$USE_GPU" = "True" ] ; then \ pip install dgl -f https://data.dgl.ai/wheels/$CUDATOOLKIT_VERSION/repo.html; \ else \ - pip install --no-cache-dir dgl==0.9.0 dglgo -f https://data.dgl.ai/wheels/repo.html; \ + pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \ fi # copy install scripts COPY docker/scripts/install_unifold.sh docker/scripts/install_colmap.sh docker/scripts/install_pytorch3d_nvdiffrast.sh docker/scripts/install_tiny_cuda_nn.sh docker/scripts/install_apex.sh /tmp/ +# 3d supports +RUN if [ "$USE_GPU" = "True" ] ; then \ + bash /tmp/install_colmap.sh; \ + else \ + echo 'cpu unsupport colmap'; \ + fi +# install pytorch3d +RUN if [ "$USE_GPU" = "True" ] ; then \ + bash /tmp/install_pytorch3d_nvdiffrast.sh; \ + else \ + echo 'cpu unsupport pytorch3d nvdiffrast'; \ + fi + # for uniford RUN if [ "$USE_GPU" = "True" ] ; then \ bash /tmp/install_unifold.sh; \ @@ -112,28 +104,11 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ fi RUN if [ "$USE_GPU" = "True" ] ; then \ - export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \ + export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \ else \ echo 'cpu unsupport Pointnet2'; \ fi -# 3d supports -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_colmap.sh; \ - else \ - echo 'cpu unsupport colmap'; \ - fi -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_tiny_cuda_nn.sh \ - else \ - echo 'cpu unsupport tiny_cudann'; \ - fi -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_pytorch3d_nvdiffrast.sh; \ - else \ - echo 'cpu unsupport pytorch3d nvdiffrast'; \ - fi -# end of 3D # install apex after deepspeed RUN if [ "$USE_GPU" = "True" ] ; then \ bash /tmp/install_apex.sh; \ @@ -141,4 +116,10 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ echo 'cpu unsupport apex'; \ fi +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/mmcv_full-1.7.0-cp310-cp310-linux_x86_64.whl; \ + else \ + pip install --no-cache-dir mmcv_full==1.7.0+torch2.1cpu -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ + fi +RUN conda install imageio-ffmpeg -c conda-forge -y ENTRYPOINT [] diff --git a/docker/rcfiles/conda.aliyun b/docker/rcfiles/conda.aliyun new file mode 100644 index 00000000..d0aa2014 --- /dev/null +++ b/docker/rcfiles/conda.aliyun @@ -0,0 +1,14 @@ +channels: + - defaults +show_channel_urls: true +default_channels: + - http://mirrors.aliyun.com/anaconda/pkgs/main + - http://mirrors.aliyun.com/anaconda/pkgs/r + - http://mirrors.aliyun.com/anaconda/pkgs/msys2 +custom_channels: + conda-forge: http://mirrors.aliyun.com/anaconda/cloud + msys2: http://mirrors.aliyun.com/anaconda/cloud + bioconda: http://mirrors.aliyun.com/anaconda/cloud + menpo: http://mirrors.aliyun.com/anaconda/cloud + pytorch: http://mirrors.aliyun.com/anaconda/cloud + simpleitk: http://mirrors.aliyun.com/anaconda/cloud diff --git a/docker/rcfiles/conda.tuna b/docker/rcfiles/conda.tuna deleted file mode 100644 index ce8a2908..00000000 --- a/docker/rcfiles/conda.tuna +++ /dev/null @@ -1,15 +0,0 @@ -channels: - - defaults -show_channel_urls: true -default_channels: - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 -custom_channels: - conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud diff --git a/docker/rcfiles/pip.conf.tsinghua b/docker/rcfiles/pip.conf.tsinghua deleted file mode 100644 index 4242075a..00000000 --- a/docker/rcfiles/pip.conf.tsinghua +++ /dev/null @@ -1,2 +0,0 @@ -[global] -index-url=https://pypi.tuna.tsinghua.edu.cn/simple diff --git a/docker/rcfiles/ubuntu2204.aliyun b/docker/rcfiles/ubuntu2204.aliyun new file mode 100644 index 00000000..d5dce70c --- /dev/null +++ b/docker/rcfiles/ubuntu2204.aliyun @@ -0,0 +1,10 @@ +deb http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse +#deb http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse diff --git a/docker/scripts/install_apex.sh b/docker/scripts/install_apex.sh index 40d9f268..7ecd288b 100644 --- a/docker/scripts/install_apex.sh +++ b/docker/scripts/install_apex.sh @@ -2,6 +2,6 @@ export MAX_JOBS=16 \ && git clone https://github.com/NVIDIA/apex \ && cd apex \ && git checkout 6bd01c4b99a84648ad5e5238a959735e6936c813 \ -&& TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.6" pip install -v --disable-pip-version-check --no-cache --global-option="--cpp_ext" --global-option="--cuda_ext" ./ \ +&& TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.9;9.0;8.6+PTX" pip install -v --disable-pip-version-check --no-cache --global-option="--cpp_ext" --global-option="--cuda_ext" ./ \ && cd .. \ && rm -rf apex diff --git a/docker/scripts/install_colmap.sh b/docker/scripts/install_colmap.sh index f21fca1d..ada7077a 100644 --- a/docker/scripts/install_colmap.sh +++ b/docker/scripts/install_colmap.sh @@ -8,7 +8,7 @@ wget -q https://cmake.org/files/v3.25/cmake-3.25.2-linux-x86_64.sh \ && export CMAKE_BUILD_PARALLEL_LEVEL=36 \ && export MAX_JOBS=16 \ && export CUDA_ARCHITECTURES="all" \ - && git clone --depth 1 --branch 3.8 https://github.com/colmap/colmap.git \ + && git clone https://github.com/colmap/colmap.git \ && cd colmap \ && mkdir build \ && cd build \ diff --git a/docker/scripts/install_flash_attension.sh b/docker/scripts/install_flash_attension.sh index f37e567d..6413cca9 100644 --- a/docker/scripts/install_flash_attension.sh +++ b/docker/scripts/install_flash_attension.sh @@ -1,4 +1,4 @@ - git clone -b v2.3.2 https://github.com/Dao-AILab/flash-attention && \ - cd flash-attention && python setup.py install && \ + git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention && \ + cd flash-attention && MAX_JOBS=46 python setup.py install && \ cd .. && \ rm -rf flash-attention diff --git a/docker/scripts/install_pytorch3d_nvdiffrast.sh b/docker/scripts/install_pytorch3d_nvdiffrast.sh index c7880f92..c64ea7fb 100644 --- a/docker/scripts/install_pytorch3d_nvdiffrast.sh +++ b/docker/scripts/install_pytorch3d_nvdiffrast.sh @@ -1,6 +1,7 @@ export CMAKE_BUILD_PARALLEL_LEVEL=36 \ && export MAX_JOBS=36 \ - && export CMAKE_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;86" \ + && export CMAKE_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;8.6+PTX;87;89;90" \ + && export TORCH_CUDA_ARCH_LIST="5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.7;8.9;9.0" \ && git clone --branch 2.1.0 --recursive https://github.com/NVIDIA/thrust.git \ && cd thrust \ && mkdir build \ @@ -10,7 +11,11 @@ export CMAKE_BUILD_PARALLEL_LEVEL=36 \ && cd ../.. \ && rm -rf thrust \ && pip install --no-cache-dir fvcore iopath \ - && pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" \ + && curl -LO https://github.com/NVIDIA/cub/archive/2.1.0.tar.gz \ + && tar xzf 2.1.0.tar.gz \ + && export CUB_HOME=$PWD/cub-2.1.0 \ + && FORCE_CUDA=1 pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" \ + && rm -fr 2.1.0.tar.gz $PWD/cub-2.1.0 \ && apt-get update \ && apt-get install -y --no-install-recommends pkg-config libglvnd0 libgl1 libglx0 libegl1 libgles2 libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev -y \ && git clone https://github.com/NVlabs/nvdiffrast.git \ diff --git a/docker/scripts/install_tiny_cuda_nn.sh b/docker/scripts/install_tiny_cuda_nn.sh index 96ae5c72..1aaa2863 100644 --- a/docker/scripts/install_tiny_cuda_nn.sh +++ b/docker/scripts/install_tiny_cuda_nn.sh @@ -1,7 +1,6 @@ -export CMAKE_BUILD_PARALLEL_LEVEL=36 && export MAX_JOBS=36 && export TCNN_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;86" \ +export CMAKE_BUILD_PARALLEL_LEVEL=36 && export MAX_JOBS=36 && export TCNN_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;89;90;86" \ && git clone --recursive https://github.com/nvlabs/tiny-cuda-nn \ && cd tiny-cuda-nn \ - && git checkout v1.6 \ && cd bindings/torch \ && python setup.py install \ && cd ../../.. \ diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index f83defd0..45d1d442 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -600,7 +600,7 @@ class HubApi: cookies = ModelScopeConfig.get_cookies() r = self.session.get(datahub_url, cookies=cookies) resp = r.json() - datahub_raise_on_error(datahub_url, resp) + datahub_raise_on_error(datahub_url, resp, r) dataset_id = resp['Data']['Id'] dataset_type = resp['Data']['Type'] return dataset_id, dataset_type @@ -613,7 +613,7 @@ class HubApi: cookies=cookies, headers=self.builder_headers(self.headers)) resp = r.json() - datahub_raise_on_error(datahub_url, resp) + datahub_raise_on_error(datahub_url, resp, r) file_list = resp['Data'] if file_list is None: raise NotExistError( @@ -866,7 +866,7 @@ class HubApi: cookies=cookies, headers={'user-agent': ModelScopeConfig.get_user_agent()}) resp = r.json() - datahub_raise_on_error(url, resp) + datahub_raise_on_error(url, resp, r) return resp['Data'] def dataset_download_statistics(self, dataset_name: str, namespace: str, use_streaming: bool) -> None: diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py index 48bb5fe0..804cfe27 100644 --- a/modelscope/hub/errors.py +++ b/modelscope/hub/errors.py @@ -117,12 +117,13 @@ def raise_on_error(rsp): raise RequestError(rsp['Message']) -def datahub_raise_on_error(url, rsp): +def datahub_raise_on_error(url, rsp, http_response: requests.Response): """If response error, raise exception Args: url (str): The request url rsp (HTTPResponse): The server response. + http_response: the origin http response. Raises: RequestError: the http request error. @@ -133,7 +134,7 @@ def datahub_raise_on_error(url, rsp): if rsp.get('Code') == HTTPStatus.OK: return True else: - request_id = get_request_id(rsp) + request_id = get_request_id(http_response) raise RequestError( f"Url = {url}, Request id={request_id} Message = {rsp.get('Message')},\ Please specify correct dataset_name and namespace.") From a19fe73afb089ef4406e9fc7a68604459fff4373 Mon Sep 17 00:00:00 2001 From: "biwen.lbw" Date: Tue, 28 Nov 2023 17:17:29 +0800 Subject: [PATCH 06/14] fix numpy bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复numpy版本导致的bug Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14816762 * fix numpy bug --- modelscope/models/cv/face_reconstruction/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelscope/models/cv/face_reconstruction/utils.py b/modelscope/models/cv/face_reconstruction/utils.py index 655d8b2a..f23b2f70 100644 --- a/modelscope/models/cv/face_reconstruction/utils.py +++ b/modelscope/models/cv/face_reconstruction/utils.py @@ -767,6 +767,7 @@ def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.): # calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face t, s = POS(lm5p.transpose(), lm3D.transpose()) + t = t.squeeze() s = rescale_factor / s # processing the image From ae425433895e349b977137e4a67441aa59009715 Mon Sep 17 00:00:00 2001 From: "chenyafeng.cyf" Date: Wed, 29 Nov 2023 10:03:52 +0800 Subject: [PATCH 07/14] fix_gpu_bug Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14822269 --- modelscope/models/audio/sv/ERes2Net.py | 5 ++++- modelscope/models/audio/sv/ERes2Net_aug.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/modelscope/models/audio/sv/ERes2Net.py b/modelscope/models/audio/sv/ERes2Net.py index 0119783c..3c07390b 100644 --- a/modelscope/models/audio/sv/ERes2Net.py +++ b/modelscope/models/audio/sv/ERes2Net.py @@ -19,6 +19,7 @@ from modelscope.metainfo import Models from modelscope.models import MODELS, TorchModel from modelscope.models.audio.sv.fusion import AFF from modelscope.utils.constant import Tasks +from modelscope.utils.device import create_device class ReLU(nn.Hardtanh): @@ -314,6 +315,7 @@ class SpeakerVerificationERes2Net(TorchModel): self.m_channels = self.model_config['channels'] self.other_config = kwargs self.feature_dim = 80 + self.device = create_device(self.other_config['device']) self.embedding_model = ERes2Net( embed_dim=self.embed_dim, m_channels=self.m_channels) @@ -321,6 +323,7 @@ class SpeakerVerificationERes2Net(TorchModel): pretrained_model_name = kwargs['pretrained_model'] self.__load_check_point(pretrained_model_name) + self.embedding_model.to(self.device) self.embedding_model.eval() def forward(self, audio): @@ -333,7 +336,7 @@ class SpeakerVerificationERes2Net(TorchModel): ) == 2, 'modelscope error: the shape of input audio to model needs to be [N, T]' # audio shape: [N, T] feature = self.__extract_feature(audio) - embedding = self.embedding_model(feature) + embedding = self.embedding_model(feature.to(self.device)) return embedding.detach().cpu() diff --git a/modelscope/models/audio/sv/ERes2Net_aug.py b/modelscope/models/audio/sv/ERes2Net_aug.py index d0739cad..5540ff3e 100644 --- a/modelscope/models/audio/sv/ERes2Net_aug.py +++ b/modelscope/models/audio/sv/ERes2Net_aug.py @@ -19,6 +19,7 @@ from modelscope.metainfo import Models from modelscope.models import MODELS, TorchModel from modelscope.models.audio.sv.fusion import AFF from modelscope.utils.constant import Tasks +from modelscope.utils.device import create_device class ReLU(nn.Hardtanh): @@ -308,12 +309,13 @@ class SpeakerVerificationERes2Net(TorchModel): self.model_config = model_config self.other_config = kwargs self.feature_dim = 80 - + self.device = create_device(self.other_config['device']) self.embedding_model = ERes2Net_aug() pretrained_model_name = kwargs['pretrained_model'] self.__load_check_point(pretrained_model_name) + self.embedding_model.to(self.device) self.embedding_model.eval() def forward(self, audio): @@ -326,7 +328,7 @@ class SpeakerVerificationERes2Net(TorchModel): ) == 2, 'modelscope error: the shape of input audio to model needs to be [N, T]' # audio shape: [N, T] feature = self.__extract_feature(audio) - embedding = self.embedding_model(feature) + embedding = self.embedding_model(feature.to(self.device)) return embedding.detach().cpu() From 6c7fca830732d7356cb46826f3169a147e7fad38 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Wed, 29 Nov 2023 17:37:56 +0800 Subject: [PATCH 08/14] =?UTF-8?q?=E6=94=AF=E6=8C=81modelscope=E7=9B=B4?= =?UTF-8?q?=E6=8E=A5=E6=8B=89=E8=B5=B7=E6=8E=A8=E7=90=86=E6=9C=8D=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14702876 * add inference server code * add server requirement * fix import issue * debug * add command line * add llmpipeline support * modify port to int * add serer usage * remove unused code * fix lint issue * add inference server code * upgrade env to VLLM_USE_MODELSCOPE --- docs/source/server.md | 41 ++++++++++++++++ modelscope/cli/cli.py | 2 + modelscope/cli/server.py | 40 ++++++++++++++++ modelscope/server/__init__.py | 0 modelscope/server/api/__init__.py | 0 modelscope/server/api/routers/__init__.py | 0 modelscope/server/api/routers/health.py | 14 ++++++ modelscope/server/api/routers/model_router.py | 45 ++++++++++++++++++ modelscope/server/api/routers/router.py | 8 ++++ modelscope/server/api_server.py | 45 ++++++++++++++++++ modelscope/server/core/__init__.py | 0 modelscope/server/core/event_handlers.py | 47 +++++++++++++++++++ modelscope/server/models/__init__.py | 0 modelscope/server/models/input.py | 8 ++++ modelscope/server/models/output.py | 34 ++++++++++++++ modelscope/utils/input_output.py | 31 ++++++++---- requirements/svr.txt | 4 ++ 17 files changed, 310 insertions(+), 9 deletions(-) create mode 100644 docs/source/server.md create mode 100644 modelscope/cli/server.py create mode 100644 modelscope/server/__init__.py create mode 100644 modelscope/server/api/__init__.py create mode 100644 modelscope/server/api/routers/__init__.py create mode 100644 modelscope/server/api/routers/health.py create mode 100644 modelscope/server/api/routers/model_router.py create mode 100644 modelscope/server/api/routers/router.py create mode 100644 modelscope/server/api_server.py create mode 100644 modelscope/server/core/__init__.py create mode 100644 modelscope/server/core/event_handlers.py create mode 100644 modelscope/server/models/__init__.py create mode 100644 modelscope/server/models/input.py create mode 100644 modelscope/server/models/output.py create mode 100644 requirements/svr.txt diff --git a/docs/source/server.md b/docs/source/server.md new file mode 100644 index 00000000..150f5686 --- /dev/null +++ b/docs/source/server.md @@ -0,0 +1,41 @@ +# modelscope server使用 +## 1. 通用服务 +modelscope库基于fastapi开发一个简单模型服务,可以通过一条命令拉起绝大多数模型 +使用方法: + +```bash +modelscope server --model_id=modelscope/Llama-2-7b-chat-ms --revision=v1.0.5 +``` +我们提供的官方镜像中也可以一个命令启动(镜像还未完成) +```bash +docker run --rm --name maas_dev --shm-size=50gb --gpus='"device=0"' -e MODELSCOPE_CACHE=/modelscope_cache -v /host_path_to_modelscope_cache:/modelscope_cache -p 8000:8000 reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.9.5-server modelscope server --model_id=modelscope/Llama-2-7b-chat-ms --revision=v1.0.5 +``` +服务默认监听8000端口,您也可以通过--port改变端口,默认服务提供两个接口,接口文档您可以通过 +http://ip:port/docs查看 +通过describe接口,可以获取服务输入输出信息以及输入sample数据,如下图: +![describe](https://modelscope.oss-cn-beijing.aliyuncs.com/resource/describe.jpg) +服务调用接口,可以直接拷贝describe接口example示例数据,如下图: +![call](https://modelscope.oss-cn-beijing.aliyuncs.com/resource/call.jpg) + +## 2. vllm大模型推理 +对于LLM我们提供了vllm推理支持,目前只有部分模型支持vllm。 + +### 2.1 vllm直接支持modelscope模型 +可以通过设置环境变量使得vllm从www.modelscope.cn下载模型。 + +启动普通server +```bash +VLLM_USE_MODELSCOPE=True python -m vllm.entrypoints.api_server --model="damo/nlp_gpt2_text-generation_english-base" --revision="v1.0.0" +``` +启动openai兼容接口 +```bash +VLLM_USE_MODELSCOPE=True python -m vllm.entrypoints.openai.api_server --model="damo/nlp_gpt2_text-generation_english-base" --revision="v1.0.0" +``` + +如果模型在modelscope cache目录已经存在,则会直接使用cache中的模型,否则会从www.modelscope.cn下载模型。 + +通过modelscope官方镜像启动vllm,指定端口为9090 + +```bash +docker run --rm --name maas_dev --shm-size=50gb --gpus='"device=0"' -e MODELSCOPE_CACHE=/modelscope_cache -v /host_path_to_modelscope_cache:/modelscope_cache -p 9090:9090 reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.9.5-server python -m vllm.entrypoints.api_server --model "modelscope/Llama-2-7b-chat-ms" --revision "v1.0.5" --port 9090 +``` diff --git a/modelscope/cli/cli.py b/modelscope/cli/cli.py index a25502fd..d67e8aa1 100644 --- a/modelscope/cli/cli.py +++ b/modelscope/cli/cli.py @@ -6,6 +6,7 @@ from modelscope.cli.download import DownloadCMD from modelscope.cli.modelcard import ModelCardCMD from modelscope.cli.pipeline import PipelineCMD from modelscope.cli.plugins import PluginsCMD +from modelscope.cli.server import ServerCMD def run_cmd(): @@ -17,6 +18,7 @@ def run_cmd(): PluginsCMD.define_args(subparsers) PipelineCMD.define_args(subparsers) ModelCardCMD.define_args(subparsers) + ServerCMD.define_args(subparsers) args = parser.parse_args() diff --git a/modelscope/cli/server.py b/modelscope/cli/server.py new file mode 100644 index 00000000..2925d68f --- /dev/null +++ b/modelscope/cli/server.py @@ -0,0 +1,40 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +from argparse import ArgumentParser +from string import Template + +import uvicorn + +from modelscope.cli.base import CLICommand +from modelscope.server.api_server import add_server_args, get_app +from modelscope.utils.logger import get_logger + +logger = get_logger() + +current_path = os.path.dirname(os.path.abspath(__file__)) +template_path = os.path.join(current_path, 'template') + + +def subparser_func(args): + """ Function which will be called for a specific sub parser. + """ + return ServerCMD(args) + + +class ServerCMD(CLICommand): + name = 'server' + + def __init__(self, args): + self.args = args + + @staticmethod + def define_args(parsers: ArgumentParser): + """ define args for create pipeline template command. + """ + parser = parsers.add_parser(ServerCMD.name) + add_server_args(parser) + parser.set_defaults(func=subparser_func) + + def execute(self): + app = get_app(self.args) + uvicorn.run(app, host=self.args.host, port=self.args.port) diff --git a/modelscope/server/__init__.py b/modelscope/server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/server/api/__init__.py b/modelscope/server/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/server/api/routers/__init__.py b/modelscope/server/api/routers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/server/api/routers/health.py b/modelscope/server/api/routers/health.py new file mode 100644 index 00000000..2d88c58c --- /dev/null +++ b/modelscope/server/api/routers/health.py @@ -0,0 +1,14 @@ +from faulthandler import disable +from http import HTTPStatus +from typing import Any, Dict + +from fastapi import APIRouter + +from modelscope.server.models.output import ApiResponse + +router = APIRouter() + + +@router.get('', response_model=ApiResponse[Dict], status_code=200) +def health() -> Any: + return ApiResponse[Dict](Data={}, Code=HTTPStatus.OK, Success=True) diff --git a/modelscope/server/api/routers/model_router.py b/modelscope/server/api/routers/model_router.py new file mode 100644 index 00000000..8d3a33f2 --- /dev/null +++ b/modelscope/server/api/routers/model_router.py @@ -0,0 +1,45 @@ +from fastapi import APIRouter, Body +from pydantic import BaseModel +from starlette.requests import Request + +from modelscope.utils.input_output import \ + pipeline_output_to_service_base64_output # noqa E125 +from modelscope.utils.input_output import call_pipeline_with_json + +router = APIRouter() + + +@router.post('/call') +async def inference( + request: Request, + body: BaseModel = Body(examples=[{ + 'usage': 'copy body from describe' + }])): # noqa E125 + """Inference general interface. + + For image, video, audio etc binary data, need encoded with base64. + + Args: + request (Request): The request object. + request_info (ModelScopeRequest): The post body. + + Returns: + ApiResponse: For binary field, encoded with base64 + """ + pipeline_service = request.app.state.pipeline + pipeline_info = request.app.state.pipeline_info + request_json = await request.json() + result = call_pipeline_with_json(pipeline_info, pipeline_service, + request_json) + # convert output to json, if binary field, we need encoded. + output = pipeline_output_to_service_base64_output( + pipeline_info['task_name'], result) + return output + + +@router.get('/describe') +async def describe(request: Request): + info = {} + info['schema'] = request.app.state.pipeline_info + info['sample'] = request.app.state.pipeline_sample + return info diff --git a/modelscope/server/api/routers/router.py b/modelscope/server/api/routers/router.py new file mode 100644 index 00000000..df1a1868 --- /dev/null +++ b/modelscope/server/api/routers/router.py @@ -0,0 +1,8 @@ +from fastapi import APIRouter +from starlette.routing import Route, WebSocketRoute + +from modelscope.server.api.routers import health, model_router + +api_router = APIRouter() +api_router.include_router(model_router.router, tags=['prediction'], prefix='') +api_router.include_router(health.router, tags=['health'], prefix='/health') diff --git a/modelscope/server/api_server.py b/modelscope/server/api_server.py new file mode 100644 index 00000000..99d20275 --- /dev/null +++ b/modelscope/server/api_server.py @@ -0,0 +1,45 @@ +import argparse + +import uvicorn +from fastapi import FastAPI + +from modelscope.server.api.routers.router import api_router +from modelscope.server.core.event_handlers import (start_app_handler, + stop_app_handler) + + +def get_app(args) -> FastAPI: + app = FastAPI( + title='modelscope_server', + version='0.1', + debug=True, + swagger_ui_parameters={'tryItOutEnabled': True}) + app.state.args = args + app.include_router(api_router) + + app.add_event_handler('startup', start_app_handler(app)) + app.add_event_handler('shutdown', stop_app_handler(app)) + return app + + +def add_server_args(parser): + parser.add_argument( + '--model_id', required=True, type=str, help='The target model id') + parser.add_argument( + '--revision', required=True, type=str, help='Model revision') + parser.add_argument('--host', default='0.0.0.0', help='Host to listen') + parser.add_argument('--port', type=int, default=8000, help='Server port') + parser.add_argument('--debug', default='debug', help='Set debug level.') + parser.add_argument( + '--llm_first', + type=bool, + default=True, + help='Use LLMPipeline first for llm models.') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('modelscope_server') + add_server_args(parser) + args = parser.parse_args() + app = get_app(args) + uvicorn.run(app, host=args.host, port=args.port) diff --git a/modelscope/server/core/__init__.py b/modelscope/server/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/server/core/event_handlers.py b/modelscope/server/core/event_handlers.py new file mode 100644 index 00000000..a4f515a2 --- /dev/null +++ b/modelscope/server/core/event_handlers.py @@ -0,0 +1,47 @@ +from typing import Callable + +from fastapi import FastAPI + +from modelscope.utils.input_output import ( # yapf: disable + create_pipeline, get_pipeline_information_by_pipeline, + get_task_input_examples, get_task_schemas) +from modelscope.utils.logger import get_logger + +# control the model start stop + +logger = get_logger() + + +def _startup_model(app: FastAPI) -> None: + logger.info('download model and create pipeline') + app.state.pipeline = create_pipeline(app.state.args.model_id, + app.state.args.revision, + app.state.args.llm_first) + info = {} + info['task_name'] = app.state.pipeline.group_key + info['schema'] = get_task_schemas(app.state.pipeline.group_key) + app.state.pipeline_info = info + app.state.pipeline_sample = get_task_input_examples( + app.state.pipeline.group_key) + logger.info('pipeline created.') + + +def _shutdown_model(app: FastAPI) -> None: + app.state.pipeline = None + logger.info('shutdown model service') + + +def start_app_handler(app: FastAPI) -> Callable: + + def startup() -> None: + _startup_model(app) + + return startup + + +def stop_app_handler(app: FastAPI) -> Callable: + + def shutdown() -> None: + _shutdown_model(app) + + return shutdown diff --git a/modelscope/server/models/__init__.py b/modelscope/server/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/server/models/input.py b/modelscope/server/models/input.py new file mode 100644 index 00000000..08ff9851 --- /dev/null +++ b/modelscope/server/models/input.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + + +class ModelScopeRequest(BaseModel): + + def __init__(self, input: object, parameters: object): + self.input = input + self.parameters = parameters diff --git a/modelscope/server/models/output.py b/modelscope/server/models/output.py new file mode 100644 index 00000000..39abcac2 --- /dev/null +++ b/modelscope/server/models/output.py @@ -0,0 +1,34 @@ +import datetime +from http import HTTPStatus +from typing import Generic, Optional, Type, TypeVar + +import json +from pydantic.generics import GenericModel + +ResultType = TypeVar('ResultType') + + +class ApiResponse(GenericModel, Generic[ResultType]): + Code: Optional[int] = HTTPStatus.OK + Success: Optional[bool] = True + RequestId: Optional[str] = '' + Message: Optional[str] = 'success' + Data: Optional[ResultType] = {} + """ + ResultType (_type_): The response data type. + Failed: {'Code': 10010101004, 'Message': 'get model info failed, err: unauthorized permission', + 'RequestId': '', 'Success': False} + Success: {'Code': 200, 'Data': {}, 'Message': 'success', 'RequestId': '', 'Success': True} + + + + def set_data(self, data=Type[ResultType]): + self.Data = data + + def set_message(self, message): + self.Message = message + + def toJSON(self): + return json.dumps(self, default=lambda o: o.isoformat() if (isinstance(o, datetime.datetime)) + else o.__dict__, sort_keys=True, indent=4) + """ diff --git a/modelscope/utils/input_output.py b/modelscope/utils/input_output.py index 679069c1..5e3e1305 100644 --- a/modelscope/utils/input_output.py +++ b/modelscope/utils/input_output.py @@ -36,16 +36,18 @@ decodes relevant fields. Example: # create pipeine instance and pipeline information, save it to app pipeline_instance = create_pipeline('damo/cv_gpen_image-portrait-enhancement', 'v1.0.0') + # get pipeline information, input,output, request example. pipeline_info = get_pipeline_information_by_pipeline(pipeline_instance) + # save the pipeline and info to the app for use in subsequent request processing app.state.pipeline = pipeline_instance app.state.pipeline_info = pipeline_info - # for service schema request. - pipeline_info = request.app.state.pipeline_info - return pipeline_info.schema - - # for service call request. - def inference(request: Request): + # for inference request, use call_pipeline_with_json to decode input and + # call pipeline, call pipeline_output_to_service_base64_output + # to encode necessary fields, and return the result. + # request and response are json format. + @router.post('/call') + async def inference(request: Request): pipeline_service = request.app.state.pipeline pipeline_info = request.app.state.pipeline_info request_json = await request.json() @@ -55,19 +57,30 @@ Example: # convert output to json, if binary field, we need encoded. output = pipeline_output_to_service_base64_output(pipeline_info.task_name, result) return output + + # Inference service input and output and sample information can be obtained through the docs interface + @router.get('/describe') + async def index(request: Request): + pipeline_info = request.app.state.pipeline_info + return pipeline_info.schema + Todo: * Support more service input type, such as form. """ -def create_pipeline(model_id: str, revision: str): +def create_pipeline(model_id: str, revision: str, llm_first: bool = True): model_configuration_file = model_file_download( model_id=model_id, file_path=ModelFile.CONFIGURATION, revision=revision) cfg = Config.from_file(model_configuration_file) - return pipeline(task=cfg.task, model=model_id, model_revision=revision) + return pipeline( + task=cfg.task, + model=model_id, + model_revision=revision, + llm_first=llm_first) def get_class_user_attributes(cls): @@ -632,7 +645,7 @@ def call_pipeline_with_json(pipeline_info: PipelineInfomation, # result = pipeline(**pipeline_inputs) # else: pipeline_inputs, parameters = service_base64_input_to_pipeline_input( - pipeline_info.task_name, body) + pipeline_info['task_name'], body) result = pipeline(pipeline_inputs, **parameters) return result diff --git a/requirements/svr.txt b/requirements/svr.txt new file mode 100644 index 00000000..ea439c66 --- /dev/null +++ b/requirements/svr.txt @@ -0,0 +1,4 @@ +fastapi +requests +sse-starlette +uvicorn From fe8bfa921996bf4bb23a28902f1015b6d088145e Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Wed, 29 Nov 2023 17:40:09 +0800 Subject: [PATCH 09/14] when build force install funasr pai-eacv etc Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14812168 * when build force install funasr pai-eacv etc --- .dev_scripts/build_image.sh | 12 +++++++++--- docker/Dockerfile.ubuntu | 4 ---- modelscope/utils/pre_compile.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh index bb8c7e3d..abe7a1d9 100644 --- a/.dev_scripts/build_image.sh +++ b/.dev_scripts/build_image.sh @@ -163,8 +163,9 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten docker_file_content=`cat docker/Dockerfile.ubuntu` if [ "$is_ci_test" != "True" ]; then echo "Building ModelScope lib, will install ModelScope lib to image" - docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'" - docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH --single-branch $REPO_URL && cd MaaS-lib && python setup.py install && cd / && rm -fr /tmp/MaaS-lib" + docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'" + docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y && export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH --single-branch $REPO_URL && cd MaaS-lib && pip install . && cd / && rm -fr /tmp/MaaS-lib" + MMCV_WITH_OPS=1 MAX_JOBS=32 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \ fi echo "$is_dsw" if [ "$is_dsw" == "False" ]; then @@ -173,12 +174,17 @@ else echo "Building dsw image will need set ModelScope lib cache location." docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope" # pre compile extension - docker_file_content="${docker_file_content} \nRUN python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'" + docker_file_content="${docker_file_content} \nRUN export TORCH_CUDA_ARCH_LIST='6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX' && python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'" fi if [ "$is_ci_test" == "True" ]; then echo "Building CI image, uninstall modelscope" docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y" fi +docker_file_content="${docker_file_content} \n RUN cp /tmp/resources/conda.aliyun ~/.condarc && \ + pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + pip config set install.trusted-host mirrors.aliyun.com && \ + cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list " + printf "$docker_file_content" > Dockerfile while true diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 55965f83..93308e25 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -56,7 +56,3 @@ RUN pip install --no-cache-dir --upgrade pip && \ COPY examples /modelscope/examples ENV SETUPTOOLS_USE_DISTUTILS=stdlib ENV VLLM_USE_MODELSCOPE=True -RUN cp /tmp/resources/conda.aliyun ~/.condarc && \ - pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip config set install.trusted-host mirrors.aliyun.com && \ - cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list diff --git a/modelscope/utils/pre_compile.py b/modelscope/utils/pre_compile.py index 2d9d3b0d..6415f677 100644 --- a/modelscope/utils/pre_compile.py +++ b/modelscope/utils/pre_compile.py @@ -18,10 +18,10 @@ def pre_compile_megatron_util(): def pre_compile_all(): if torch.cuda.is_available(): # extension require cuda. - pre_compile_megatron_util() # pre compile pai-easycv from easycv.thirdparty.deformable_attention.functions import ms_deform_attn_func # extension for all platform. + pre_compile_megatron_util() if __name__ == '__main__': From 51a1b76e91c53ae9278726c0eff228e8c9d179b1 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Wed, 29 Nov 2023 17:41:44 +0800 Subject: [PATCH 10/14] fix python3.10 compatible issue Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14678226 * modify librosa version * fix python3.10 compatible issue * remove healpy in requirements for windowns compatible --- .../utils/postprocessing.py | 2 +- modelscope/utils/pre_compile.py | 1 + requirements/audio/audio_signal.txt | 2 +- requirements/audio/audio_tts.txt | 2 +- requirements/cv.txt | 3 ++- requirements/multi-modal.txt | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py b/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py index 64582140..b9792688 100644 --- a/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py +++ b/modelscope/models/cv/referring_video_object_segmentation/utils/postprocessing.py @@ -109,7 +109,7 @@ class ReferYoutubeVOSPostProcess(nn.Module): 1) # remove the padding # resize the masks back to their original frames dataset size for evaluation: original_frames_size = video_metadata['original_frame_size'] - tuple_size = tuple(original_frames_size.cpu().numpy()) + tuple_size = tuple(original_frames_size.cpu()) video_pred_masks = F.interpolate( video_pred_masks.float(), size=tuple_size, mode='nearest') video_pred_masks = video_pred_masks.to(torch.uint8).cpu() diff --git a/modelscope/utils/pre_compile.py b/modelscope/utils/pre_compile.py index 6415f677..cddf8704 100644 --- a/modelscope/utils/pre_compile.py +++ b/modelscope/utils/pre_compile.py @@ -20,6 +20,7 @@ def pre_compile_all(): if torch.cuda.is_available(): # extension require cuda. # pre compile pai-easycv from easycv.thirdparty.deformable_attention.functions import ms_deform_attn_func + pre_compile_megatron_util() # extension for all platform. pre_compile_megatron_util() diff --git a/requirements/audio/audio_signal.txt b/requirements/audio/audio_signal.txt index 023fbbdf..65f1ec61 100644 --- a/requirements/audio/audio_signal.txt +++ b/requirements/audio/audio_signal.txt @@ -1,6 +1,6 @@ hdbscan hyperpyyaml -librosa==0.9.2 +librosa==0.10.1 MinDAEC mir_eval>=0.7 rotary_embedding_torch>=0.1.5 diff --git a/requirements/audio/audio_tts.txt b/requirements/audio/audio_tts.txt index 8b33f02f..5cff1b28 100644 --- a/requirements/audio/audio_tts.txt +++ b/requirements/audio/audio_tts.txt @@ -3,7 +3,7 @@ greenlet>=1.1.2 inflect jedi>=0.18.1 kantts -librosa==0.9.2 +librosa==0.10.1 lxml matplotlib msgpack>=1.0.4 diff --git a/requirements/cv.txt b/requirements/cv.txt index ee9f5582..c8edb672 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -17,7 +17,8 @@ ffmpeg>=1.4 ffmpeg-python>=0.2.0 ftfy fvcore -healpy +# remove for windows support +# healpy imageio>=2.9.0 imageio-ffmpeg>=0.4.2 imgaug>=0.4.0 diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt index 59415bb0..568ef76c 100644 --- a/requirements/multi-modal.txt +++ b/requirements/multi-modal.txt @@ -4,7 +4,7 @@ decord>=0.6.0 diffusers>=0.19.0 fairseq ftfy>=6.0.3 -librosa==0.9.2 +librosa==0.10.1 opencv-python pycocoevalcap>=1.2 pycocotools>=2.0.4 From a8e9e0a48f42207a6deee62b8b66e8e48726e6cc Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Fri, 1 Dec 2023 17:33:07 +0800 Subject: [PATCH 11/14] set datasets==2.14.6 --- requirements/framework.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index 4efce85d..b77f6567 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -datasets>=2.13.0,<=2.14.6 +datasets==2.14.6 einops filelock>=3.3.0 gast>=0.2.2 From 2a991a5c6ba5a649f0135e85cfd1188de70cd374 Mon Sep 17 00:00:00 2001 From: "xingjun.wxj" Date: Wed, 6 Dec 2023 16:25:20 +0800 Subject: [PATCH 12/14] update datasets version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update datasets version. compatibility check: 2.14.5, 2.14.6, 2.15.0 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14916111 --- requirements/framework.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index b77f6567..8804fe8c 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -datasets==2.14.6 +datasets>=2.14.5 einops filelock>=3.3.0 gast>=0.2.2 From 75ce66f824e6f6bb39e2d50dc92a5eecddc79cea Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Fri, 8 Dec 2023 14:16:37 +0800 Subject: [PATCH 13/14] fix exception when there is a version after sdk release Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14949463 * fix exception when there is a version after sdk release --- modelscope/hub/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 45d1d442..e11f2de5 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -493,8 +493,9 @@ class HubApi: if len(revisions) > 0: revision = revisions[0] # use latest revision before release time. else: + revision = MASTER_MODEL_BRANCH vl = '[%s]' % ','.join(all_revisions) - raise NoValidRevisionError('Model revision should be specified from revisions: %s' % (vl)) + logger.warning('Model revision should be specified from revisions: %s' % (vl)) logger.warning('Model revision not specified, use revision: %s' % revision) else: # use user-specified revision From b16e24440e35f6473ea1b36eb494b3b1d4a22fea Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Fri, 8 Dec 2023 22:25:28 +0800 Subject: [PATCH 14/14] build whl with py310 --- .github/workflows/publish.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 7c2e180a..dacf6df7 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -15,10 +15,10 @@ jobs: #if: startsWith(github.event.ref, 'refs/tags') steps: - uses: actions/checkout@v2 - - name: Set up Python 3.7 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: '3.7' + python-version: '3.10' - name: Install wheel run: pip install wheel && pip install -r requirements/framework.txt - name: Build ModelScope