From 5ba9fd23079b87a14a8aa92ee297e744039bae22 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Mon, 27 Nov 2023 20:21:00 +0800 Subject: [PATCH] modify auto gptq and vllm env Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14790283 * upgrade to python3.10 * modify auto gptq and vllm env * fix lint issue * Merge remote-tracking branch 'origin/master' into python10_support * python310 support * build from repo * add commit id force install modelscope every build * add commit id force install modelscope every build * fix cpu build issue * fix datahub error message * Merge branch 'python10_support' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into python10_support * add --no-cache-dir install auto_gptq --- .dev_scripts/build_base_image.sh | 42 +++++++-- .dev_scripts/build_image.sh | 24 +++-- docker/Dockerfile.ubuntu | 90 +++++++++---------- docker/Dockerfile.ubuntu_base | 79 +++++++--------- docker/rcfiles/conda.aliyun | 14 +++ docker/rcfiles/conda.tuna | 15 ---- docker/rcfiles/pip.conf.tsinghua | 2 - docker/rcfiles/ubuntu2204.aliyun | 10 +++ docker/scripts/install_apex.sh | 2 +- docker/scripts/install_colmap.sh | 2 +- docker/scripts/install_flash_attension.sh | 4 +- .../scripts/install_pytorch3d_nvdiffrast.sh | 9 +- docker/scripts/install_tiny_cuda_nn.sh | 3 +- modelscope/hub/api.py | 6 +- modelscope/hub/errors.py | 5 +- 15 files changed, 167 insertions(+), 140 deletions(-) create mode 100644 docker/rcfiles/conda.aliyun delete mode 100644 docker/rcfiles/conda.tuna delete mode 100644 docker/rcfiles/pip.conf.tsinghua create mode 100644 docker/rcfiles/ubuntu2204.aliyun diff --git a/.dev_scripts/build_base_image.sh b/.dev_scripts/build_base_image.sh index 8c8c9a0e..872798cd 100644 --- a/.dev_scripts/build_base_image.sh +++ b/.dev_scripts/build_base_image.sh @@ -1,19 +1,24 @@ #!/bin/bash # default values. -BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04 +BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu BASE_GPU_CUDA113_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cudnn8-devel BASE_GPU_CUDA117_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.7.1-cudnn8-devel BASE_GPU_CUDA118_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.8.0-cudnn8-devel +BASE_GPU_CUDA121_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.8.0-cudnn8-devel +BASE_GPU_CUDA122_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.2.2-cudnn8-devel MODELSCOPE_REPO_ADDRESS=reg.docker.alibaba-inc.com/modelscope/modelscope python_version=3.7.13 torch_version=1.11.0 cuda_version=11.7.1 cudatoolkit_version=11.3 tensorflow_version=1.15.5 +os_version=20.04 version=None is_cpu=False +is_dryrun=False function usage(){ echo "usage: build.sh " + echo " --os=ubuntu_version set ubuntu os version, default: 20.04" echo " --python=python_version set python version, default: $python_version" echo " --cuda=cuda_version set cuda version,only[11.3.0, 11.7.1], fefault: $cuda_version" echo " --torch=torch_version set pytorch version, fefault: $torch_version" @@ -21,9 +26,14 @@ function usage(){ echo " --test option for run test before push image, only push on ci test pass" echo " --cpu option for build cpu version" echo " --push option for push image to remote repo" + echo " --dryrun create Dockerfile not build" } for i in "$@"; do case $i in + --os=*) + os_version="${i#*=}" + shift + ;; --python=*) python_version="${i#*=}" shift @@ -52,6 +62,10 @@ for i in "$@"; do is_push=True shift # option for push image to remote repo ;; + --dryrun) + is_dryrun=True + shift + ;; --help) usage exit 0 @@ -68,7 +82,7 @@ done if [ "$cuda_version" == 11.3.0 ]; then echo "Building base image cuda11.3.0" - BASE_GPU_IMAGE=$BASE_GPU_CUDA113_IMAGE + BASE_GPU_IMAGE=$os_version-$cudatoolkit_version-cudnn8-devel cudatoolkit_version=cu113 elif [ "$cuda_version" == 11.7.1 ]; then echo "Building base image cuda11.7.1" @@ -77,43 +91,55 @@ elif [ "$cuda_version" == 11.7.1 ]; then elif [ "$cuda_version" == 11.8.0 ]; then echo "Building base image cuda11.8.0" cudatoolkit_version=cu118 - BASE_GPU_IMAGE=$BASE_GPU_CUDA118_IMAGE + BASE_GPU_IMAGE=$MODELSCOPE_REPO_ADDRESS:$os_version-cuda$cuda_version-cudnn8-devel +elif [ "$cuda_version" == 12.1.0 ]; then + cudatoolkit_version=cu121 + BASE_GPU_IMAGE=$BASE_GPU_CUDA121_IMAGE else echo "Unsupport cuda version: $cuda_version" exit 1 fi if [ "$is_cpu" == "True" ]; then - export BASE_IMAGE=$BASE_CPU_IMAGE - base_tag=ubuntu20.04 + export BASE_IMAGE=$BASE_CPU_IMAGE:$os_version + base_tag=ubuntu$os_version export USE_GPU=False else export BASE_IMAGE=$BASE_GPU_IMAGE - base_tag=ubuntu20.04-cuda$cuda_version + base_tag=ubuntu$os_version-cuda$cuda_version export USE_GPU=True fi + if [[ $python_version == 3.7* ]]; then base_tag=$base_tag-py37 elif [[ $python_version == 3.8* ]]; then base_tag=$base_tag-py38 +elif [[ $python_version == 3.10* ]]; then + base_tag=$base_tag-py310 else echo "Unsupport python version: $python_version" exit 1 fi - target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-base export IMAGE_TO_BUILD=$MODELSCOPE_REPO_ADDRESS:$target_image_tag export PYTHON_VERSION=$python_version export TORCH_VERSION=$torch_version export CUDATOOLKIT_VERSION=$cudatoolkit_version export TENSORFLOW_VERSION=$tensorflow_version +echo "From: $BASE_IMAGE build: $target_image_tag" echo -e "Building image with:\npython$python_version\npytorch$torch_version\ntensorflow:$tensorflow_version\ncudatoolkit:$cudatoolkit_version\ncpu:$is_cpu\n" docker_file_content=`cat docker/Dockerfile.ubuntu_base` printf "$docker_file_content" > Dockerfile +if [ "$is_dryrun" == "True" ]; then + echo 'Dockerfile created' + exit 0 +fi + +# DOCKER_BUILDKIT=0 while true do - docker build -t $IMAGE_TO_BUILD \ + DOCKER_BUILDKIT=0 docker build -t $IMAGE_TO_BUILD \ --build-arg USE_GPU \ --build-arg BASE_IMAGE \ --build-arg PYTHON_VERSION \ diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh index dceaaa22..bb8c7e3d 100644 --- a/.dev_scripts/build_image.sh +++ b/.dev_scripts/build_image.sh @@ -44,6 +44,8 @@ for i in "$@"; do cudatoolkit_version=11.7 elif [ "$cuda_version" == "11.8.0" ]; then cudatoolkit_version=11.8 + elif [ "$cuda_version" == "12.1.0" ]; then + cudatoolkit_version=12.1 else echo "Unsupport cuda version $cuda_version" exit 1 @@ -130,6 +132,17 @@ elif [[ $python_version == 3.8* ]]; then export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda$cuda_version-py38-torch$torch_version-tf$tensorflow_version-base fi base_tag=$base_tag-py38 +elif [[ $python_version == 3.10* ]]; then + if [ "$is_cpu" == "True" ]; then + echo "Building python3.10 cpu image" + base_tag=ubuntu22.04-py310 + export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-py310-torch$torch_version-tf$tensorflow_version-base + else + echo "Building python3.10 gpu image" + base_tag=ubuntu22.04-cuda$cuda_version-py310 + # reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda12.1.0-py310-torch2.1.0-tf2.14.0-base + export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-cuda$cuda_version-py310-torch$torch_version-tf$tensorflow_version-base + fi else echo "Unsupport python version: $python_version" exit 1 @@ -150,7 +163,8 @@ echo -e "Building image with:\npython$python_version\npytorch$torch_version\nten docker_file_content=`cat docker/Dockerfile.ubuntu` if [ "$is_ci_test" != "True" ]; then echo "Building ModelScope lib, will install ModelScope lib to image" - docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U funasr transformers && pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/build/modelscope-$modelscope_version-py3-none-any.whl " + docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir -U adaseq pai-easycv ms_swift funasr 'transformers<4.35.0'" + docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$CIS_ENV_COMMIT_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $CIS_ENV_BRANCH --single-branch $REPO_URL && cd MaaS-lib && python setup.py install && cd / && rm -fr /tmp/MaaS-lib" fi echo "$is_dsw" if [ "$is_dsw" == "False" ]; then @@ -160,12 +174,6 @@ else docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope" # pre compile extension docker_file_content="${docker_file_content} \nRUN python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'" - if [ "$is_cpu" == "True" ]; then - echo 'build cpu image' - else - # fix easycv extension and tinycudann conflict. - docker_file_content="${docker_file_content} \nRUN bash /tmp/install_tiny_cuda_nn.sh" - fi fi if [ "$is_ci_test" == "True" ]; then echo "Building CI image, uninstall modelscope" @@ -175,7 +183,7 @@ printf "$docker_file_content" > Dockerfile while true do - docker build -t $IMAGE_TO_BUILD \ + DOCKER_BUILDKIT=0 docker build -t $IMAGE_TO_BUILD \ --build-arg USE_GPU \ --build-arg BASE_IMAGE \ --build-arg PYTHON_VERSION \ diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 4ac4fd53..55965f83 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -1,10 +1,47 @@ ARG BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-base FROM $BASE_IMAGE - -RUN apt-get update && apt-get install -y iputils-ping net-tools iproute2 && \ +RUN apt-get update && \ + apt-get install -y libsox-dev unzip zip iputils-ping telnet && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# install modelscope + +# install jupyter plugin +RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \ + cp -r /tmp/resources/jupyter_plugins/* /root/.local/share/jupyter/labextensions/ + +COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh +# python3.8 pip install git+https://github.com/jin-s13/xtcocoapi.git@v1.13 +# pip install git+https://github.com/gatagat/lap.git@v0.4.0 +RUN pip install --no-cache-dir numpy 'cython<=0.29.36' funtextprocessing kwsbp==0.0.6 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + +RUN pip install --no-cache-dir adaseq text2sql_lgesql==1.3.0 \ + git+https://github.com/jin-s13/xtcocoapi.git@v1.14 \ + git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps + +RUN mv /opt/conda/compiler_compat/ld /opt/conda/compiler_compat/ldbk && \ + pip install --no-cache-dir mpi4py paint_ldm \ + mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 \ + ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + +ARG USE_GPU + + +RUN if [ "$USE_GPU" = "True" ] ; then \ + CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git'; \ + else \ + echo 'cpu unsupport detectron2'; \ + fi + +# torchmetrics==0.11.4 for ofa +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator bitsandbytes basicsr optimum && \ + pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ && \ + pip install --no-cache-dir -U xformers --index-url https://download.pytorch.org/whl/cu118 && \ + pip install --no-cache-dir flash_attn==2.3.3+torch2.1cu118 tinycudann==1.7+cu118 vllm==0.2.1+cu118torch2.1 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ + else \ + echo 'cpu unsupport vllm auto-gptq'; \ + fi + COPY requirements /var/modelscope RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /var/modelscope/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ @@ -16,47 +53,10 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /var/modelscope/tests.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip cache purge -# install jupyter plugin -RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \ - cp -r /tmp/resources/jupyter_plugins/* /root/.local/share/jupyter/labextensions/ - -COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh -# python3.8 pip install git+https://github.com/jin-s13/xtcocoapi.git@v1.13 -# pip install git+https://github.com/gatagat/lap.git@v0.4.0 -RUN pip install --no-cache-dir text2sql_lgesql==1.3.0 \ - git+https://github.com/jin-s13/xtcocoapi.git@v1.13 \ - git+https://github.com/gatagat/lap.git@v0.4.0 \ - detectron2==0.3 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps - -RUN pip install --no-cache-dir mpi4py paint_ldm \ - mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 pai-easycv ms_swift \ - ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - -ARG USE_GPU -# for cpu install cpu version faiss, faiss depends on blas lib, we install libopenblas TODO rename gpu or cpu version faiss -RUN if [ "$USE_GPU" = "True" ] ; then \ - pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 faiss==1.7.2 safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ - else \ - pip install --no-cache-dir funtextprocessing kwsbp==0.0.6 https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/faiss-1.7.2-py37-none-linux_x86_64.whl safetensors typeguard==2.13.3 scikit-learn librosa==0.9.2 funasr -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ - fi - -RUN pip install --no-cache-dir wenetruntime==1.11.0 adaseq --no-deps COPY examples /modelscope/examples - -# for pai-easycv setup compatiblity issue ENV SETUPTOOLS_USE_DISTUTILS=stdlib - -RUN if [ "$USE_GPU" = "True" ] ; then \ - CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6" pip install --no-cache-dir 'git+https://github.com/facebookresearch/detectron2.git'; \ - else \ - echo 'cpu unsupport detectron2'; \ - fi - -# torchmetrics==0.11.4 for ofa -RUN pip install --no-cache-dir jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator 'protobuf<=3.20.0' bitsandbytes basicsr -COPY docker/scripts/install_flash_attension.sh /tmp/install_flash_attension.sh -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_flash_attension.sh; \ - else \ - echo 'cpu unsupport flash attention'; \ - fi +ENV VLLM_USE_MODELSCOPE=True +RUN cp /tmp/resources/conda.aliyun ~/.condarc && \ + pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + pip config set install.trusted-host mirrors.aliyun.com && \ + cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list diff --git a/docker/Dockerfile.ubuntu_base b/docker/Dockerfile.ubuntu_base index b848e1a1..7f8409fe 100644 --- a/docker/Dockerfile.ubuntu_base +++ b/docker/Dockerfile.ubuntu_base @@ -9,10 +9,11 @@ SHELL ["/bin/bash", "-c"] COPY docker/rcfiles /tmp/resources COPY docker/jupyter_plugins /tmp/resources/jupyter_plugins RUN apt-get update && apt-get install -y --reinstall ca-certificates && \ - apt-get clean && \ - cp /tmp/resources/sources.list.aliyun /etc/apt/sources.list && \ - apt-get update && \ - apt-get install -y locales wget git strace gdb sox libopenmpi-dev curl \ + apt-get install -y apt-utils openssh-server locales wget git strace gdb sox libopenmpi-dev curl \ + iputils-ping net-tools iproute2 autoconf automake gperf libre2-dev libssl-dev \ + libtool libcurl4-openssl-dev libb64-dev libgoogle-perftools-dev patchelf \ + rapidjson-dev scons software-properties-common pkg-config unzip zlib1g-dev \ + libarchive-dev libxml2-dev libnuma-dev \ libgeos-dev strace vim ffmpeg libsm6 tzdata language-pack-zh-hans \ ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy libxext6 build-essential ninja-build && \ wget https://packagecloud.io/github/git-lfs/packages/debian/bullseye/git-lfs_3.2.0_amd64.deb/download -O ./git-lfs_3.2.0_amd64.deb && \ @@ -27,33 +28,17 @@ RUN apt-get update && apt-get install -y --reinstall ca-certificates && \ rm -rf /var/lib/apt/lists/* ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8 +RUN wget -O /tmp/boost.tar.gz https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.gz && (cd /tmp && tar xzf boost.tar.gz) && mv /tmp/boost_1_80_0/boost /usr/include/boost #install and config python -ARG PYTHON_VERSION=3.7.13 +ARG PYTHON_VERSION=3.10.13 # Miniconda3-py37_23.1.0-1-Linux-x86_64.sh is last python3.7 version -RUN if [ "$PYTHON_VERSION" = "3.7.13" ] ; then \ - wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-py37_23.1.0-1-Linux-x86_64.sh -O ./miniconda.sh && \ +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py310_23.9.0-0-Linux-x86_64.sh -O ./miniconda.sh && \ /bin/bash miniconda.sh -b -p /opt/conda && \ rm -f miniconda.sh && \ ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - cp /tmp/resources/conda.tuna ~/.condarc && \ - source /root/.bashrc && \ - conda install --yes python==${PYTHON_VERSION} && \ - pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip config set install.trusted-host mirrors.aliyun.com;\ -else \ - wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \ - /bin/bash miniconda.sh -b -p /opt/conda && \ - rm -f miniconda.sh && \ - ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - cp /tmp/resources/conda.tuna ~/.condarc && \ - source /root/.bashrc && \ - conda install --yes python==${PYTHON_VERSION} && \ - pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ - pip config set install.trusted-host mirrors.aliyun.com;\ -fi + source /root/.bashrc ARG USE_GPU=True @@ -85,12 +70,6 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ fi \ fi -# mmcv-full<=1.7.0 for mmdet3d compatible -RUN if [ "$USE_GPU" = "True" ] ; then \ - CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="5.0 5.2 6.0 6.1 7.0 7.5 8.0 8.6" MMCV_WITH_OPS=1 MAX_JOBS=8 FORCE_CUDA=1 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \ - else \ - MMCV_WITH_OPS=1 MAX_JOBS=8 pip install --no-cache-dir 'mmcv-full<=1.7.0' && pip cache purge; \ - fi # default shell bash ENV SHELL=/bin/bash @@ -98,12 +77,25 @@ ENV SHELL=/bin/bash RUN if [ "$USE_GPU" = "True" ] ; then \ pip install dgl -f https://data.dgl.ai/wheels/$CUDATOOLKIT_VERSION/repo.html; \ else \ - pip install --no-cache-dir dgl==0.9.0 dglgo -f https://data.dgl.ai/wheels/repo.html; \ + pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \ fi # copy install scripts COPY docker/scripts/install_unifold.sh docker/scripts/install_colmap.sh docker/scripts/install_pytorch3d_nvdiffrast.sh docker/scripts/install_tiny_cuda_nn.sh docker/scripts/install_apex.sh /tmp/ +# 3d supports +RUN if [ "$USE_GPU" = "True" ] ; then \ + bash /tmp/install_colmap.sh; \ + else \ + echo 'cpu unsupport colmap'; \ + fi +# install pytorch3d +RUN if [ "$USE_GPU" = "True" ] ; then \ + bash /tmp/install_pytorch3d_nvdiffrast.sh; \ + else \ + echo 'cpu unsupport pytorch3d nvdiffrast'; \ + fi + # for uniford RUN if [ "$USE_GPU" = "True" ] ; then \ bash /tmp/install_unifold.sh; \ @@ -112,28 +104,11 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ fi RUN if [ "$USE_GPU" = "True" ] ; then \ - export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \ + export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX" && pip install --no-cache-dir git+https://github.com/gxd1994/Pointnet2.PyTorch.git@master#subdirectory=pointnet2; \ else \ echo 'cpu unsupport Pointnet2'; \ fi -# 3d supports -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_colmap.sh; \ - else \ - echo 'cpu unsupport colmap'; \ - fi -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_tiny_cuda_nn.sh \ - else \ - echo 'cpu unsupport tiny_cudann'; \ - fi -RUN if [ "$USE_GPU" = "True" ] ; then \ - bash /tmp/install_pytorch3d_nvdiffrast.sh; \ - else \ - echo 'cpu unsupport pytorch3d nvdiffrast'; \ - fi -# end of 3D # install apex after deepspeed RUN if [ "$USE_GPU" = "True" ] ; then \ bash /tmp/install_apex.sh; \ @@ -141,4 +116,10 @@ RUN if [ "$USE_GPU" = "True" ] ; then \ echo 'cpu unsupport apex'; \ fi +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/mmcv_full-1.7.0-cp310-cp310-linux_x86_64.whl; \ + else \ + pip install --no-cache-dir mmcv_full==1.7.0+torch2.1cpu -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ + fi +RUN conda install imageio-ffmpeg -c conda-forge -y ENTRYPOINT [] diff --git a/docker/rcfiles/conda.aliyun b/docker/rcfiles/conda.aliyun new file mode 100644 index 00000000..d0aa2014 --- /dev/null +++ b/docker/rcfiles/conda.aliyun @@ -0,0 +1,14 @@ +channels: + - defaults +show_channel_urls: true +default_channels: + - http://mirrors.aliyun.com/anaconda/pkgs/main + - http://mirrors.aliyun.com/anaconda/pkgs/r + - http://mirrors.aliyun.com/anaconda/pkgs/msys2 +custom_channels: + conda-forge: http://mirrors.aliyun.com/anaconda/cloud + msys2: http://mirrors.aliyun.com/anaconda/cloud + bioconda: http://mirrors.aliyun.com/anaconda/cloud + menpo: http://mirrors.aliyun.com/anaconda/cloud + pytorch: http://mirrors.aliyun.com/anaconda/cloud + simpleitk: http://mirrors.aliyun.com/anaconda/cloud diff --git a/docker/rcfiles/conda.tuna b/docker/rcfiles/conda.tuna deleted file mode 100644 index ce8a2908..00000000 --- a/docker/rcfiles/conda.tuna +++ /dev/null @@ -1,15 +0,0 @@ -channels: - - defaults -show_channel_urls: true -default_channels: - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 -custom_channels: - conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud - simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud diff --git a/docker/rcfiles/pip.conf.tsinghua b/docker/rcfiles/pip.conf.tsinghua deleted file mode 100644 index 4242075a..00000000 --- a/docker/rcfiles/pip.conf.tsinghua +++ /dev/null @@ -1,2 +0,0 @@ -[global] -index-url=https://pypi.tuna.tsinghua.edu.cn/simple diff --git a/docker/rcfiles/ubuntu2204.aliyun b/docker/rcfiles/ubuntu2204.aliyun new file mode 100644 index 00000000..d5dce70c --- /dev/null +++ b/docker/rcfiles/ubuntu2204.aliyun @@ -0,0 +1,10 @@ +deb http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse +#deb http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse +deb http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse +#deb-src http://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse diff --git a/docker/scripts/install_apex.sh b/docker/scripts/install_apex.sh index 40d9f268..7ecd288b 100644 --- a/docker/scripts/install_apex.sh +++ b/docker/scripts/install_apex.sh @@ -2,6 +2,6 @@ export MAX_JOBS=16 \ && git clone https://github.com/NVIDIA/apex \ && cd apex \ && git checkout 6bd01c4b99a84648ad5e5238a959735e6936c813 \ -&& TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.6" pip install -v --disable-pip-version-check --no-cache --global-option="--cpp_ext" --global-option="--cuda_ext" ./ \ +&& TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.9;9.0;8.6+PTX" pip install -v --disable-pip-version-check --no-cache --global-option="--cpp_ext" --global-option="--cuda_ext" ./ \ && cd .. \ && rm -rf apex diff --git a/docker/scripts/install_colmap.sh b/docker/scripts/install_colmap.sh index f21fca1d..ada7077a 100644 --- a/docker/scripts/install_colmap.sh +++ b/docker/scripts/install_colmap.sh @@ -8,7 +8,7 @@ wget -q https://cmake.org/files/v3.25/cmake-3.25.2-linux-x86_64.sh \ && export CMAKE_BUILD_PARALLEL_LEVEL=36 \ && export MAX_JOBS=16 \ && export CUDA_ARCHITECTURES="all" \ - && git clone --depth 1 --branch 3.8 https://github.com/colmap/colmap.git \ + && git clone https://github.com/colmap/colmap.git \ && cd colmap \ && mkdir build \ && cd build \ diff --git a/docker/scripts/install_flash_attension.sh b/docker/scripts/install_flash_attension.sh index f37e567d..6413cca9 100644 --- a/docker/scripts/install_flash_attension.sh +++ b/docker/scripts/install_flash_attension.sh @@ -1,4 +1,4 @@ - git clone -b v2.3.2 https://github.com/Dao-AILab/flash-attention && \ - cd flash-attention && python setup.py install && \ + git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention && \ + cd flash-attention && MAX_JOBS=46 python setup.py install && \ cd .. && \ rm -rf flash-attention diff --git a/docker/scripts/install_pytorch3d_nvdiffrast.sh b/docker/scripts/install_pytorch3d_nvdiffrast.sh index c7880f92..c64ea7fb 100644 --- a/docker/scripts/install_pytorch3d_nvdiffrast.sh +++ b/docker/scripts/install_pytorch3d_nvdiffrast.sh @@ -1,6 +1,7 @@ export CMAKE_BUILD_PARALLEL_LEVEL=36 \ && export MAX_JOBS=36 \ - && export CMAKE_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;86" \ + && export CMAKE_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;8.6+PTX;87;89;90" \ + && export TORCH_CUDA_ARCH_LIST="5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.7;8.9;9.0" \ && git clone --branch 2.1.0 --recursive https://github.com/NVIDIA/thrust.git \ && cd thrust \ && mkdir build \ @@ -10,7 +11,11 @@ export CMAKE_BUILD_PARALLEL_LEVEL=36 \ && cd ../.. \ && rm -rf thrust \ && pip install --no-cache-dir fvcore iopath \ - && pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" \ + && curl -LO https://github.com/NVIDIA/cub/archive/2.1.0.tar.gz \ + && tar xzf 2.1.0.tar.gz \ + && export CUB_HOME=$PWD/cub-2.1.0 \ + && FORCE_CUDA=1 pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" \ + && rm -fr 2.1.0.tar.gz $PWD/cub-2.1.0 \ && apt-get update \ && apt-get install -y --no-install-recommends pkg-config libglvnd0 libgl1 libglx0 libegl1 libgles2 libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev -y \ && git clone https://github.com/NVlabs/nvdiffrast.git \ diff --git a/docker/scripts/install_tiny_cuda_nn.sh b/docker/scripts/install_tiny_cuda_nn.sh index 96ae5c72..1aaa2863 100644 --- a/docker/scripts/install_tiny_cuda_nn.sh +++ b/docker/scripts/install_tiny_cuda_nn.sh @@ -1,7 +1,6 @@ -export CMAKE_BUILD_PARALLEL_LEVEL=36 && export MAX_JOBS=36 && export TCNN_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;86" \ +export CMAKE_BUILD_PARALLEL_LEVEL=36 && export MAX_JOBS=36 && export TCNN_CUDA_ARCHITECTURES="50;52;60;61;70;75;80;89;90;86" \ && git clone --recursive https://github.com/nvlabs/tiny-cuda-nn \ && cd tiny-cuda-nn \ - && git checkout v1.6 \ && cd bindings/torch \ && python setup.py install \ && cd ../../.. \ diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index f83defd0..45d1d442 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -600,7 +600,7 @@ class HubApi: cookies = ModelScopeConfig.get_cookies() r = self.session.get(datahub_url, cookies=cookies) resp = r.json() - datahub_raise_on_error(datahub_url, resp) + datahub_raise_on_error(datahub_url, resp, r) dataset_id = resp['Data']['Id'] dataset_type = resp['Data']['Type'] return dataset_id, dataset_type @@ -613,7 +613,7 @@ class HubApi: cookies=cookies, headers=self.builder_headers(self.headers)) resp = r.json() - datahub_raise_on_error(datahub_url, resp) + datahub_raise_on_error(datahub_url, resp, r) file_list = resp['Data'] if file_list is None: raise NotExistError( @@ -866,7 +866,7 @@ class HubApi: cookies=cookies, headers={'user-agent': ModelScopeConfig.get_user_agent()}) resp = r.json() - datahub_raise_on_error(url, resp) + datahub_raise_on_error(url, resp, r) return resp['Data'] def dataset_download_statistics(self, dataset_name: str, namespace: str, use_streaming: bool) -> None: diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py index 48bb5fe0..804cfe27 100644 --- a/modelscope/hub/errors.py +++ b/modelscope/hub/errors.py @@ -117,12 +117,13 @@ def raise_on_error(rsp): raise RequestError(rsp['Message']) -def datahub_raise_on_error(url, rsp): +def datahub_raise_on_error(url, rsp, http_response: requests.Response): """If response error, raise exception Args: url (str): The request url rsp (HTTPResponse): The server response. + http_response: the origin http response. Raises: RequestError: the http request error. @@ -133,7 +134,7 @@ def datahub_raise_on_error(url, rsp): if rsp.get('Code') == HTTPStatus.OK: return True else: - request_id = get_request_id(rsp) + request_id = get_request_id(http_response) raise RequestError( f"Url = {url}, Request id={request_id} Message = {rsp.get('Message')},\ Please specify correct dataset_name and namespace.")