From 909e54fa336d649480714fa2a0b7254983702930 Mon Sep 17 00:00:00 2001
From: "mulin.lyh" <mulin.lyh@taobao.com>
Date: Wed, 5 Jun 2024 19:08:46 +0800
Subject: [PATCH] modify build script

---
 .dev_scripts/build_base_image.sh |   9 +-
 .dev_scripts/build_image.sh      |  18 ++--
 docker/Dockerfile.ubuntu         |  19 ++--
 docker/Dockerfile.ubuntu_base    | 179 ++++++++++++++++++++++---------
 docker/rcfiles/conda.aliyun      |  14 ---
 5 files changed, 161 insertions(+), 78 deletions(-)
 delete mode 100644 docker/rcfiles/conda.aliyun

diff --git a/.dev_scripts/build_base_image.sh b/.dev_scripts/build_base_image.sh
index d2f636a8..c338d6a6 100644
--- a/.dev_scripts/build_base_image.sh
+++ b/.dev_scripts/build_base_image.sh
@@ -120,7 +120,14 @@ else
     echo "Unsupport python version: $python_version"
     exit 1
 fi
-target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-base
+# target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-base
+# cpu no tensorflow
+if [ "$is_cpu" == "True" ]; then
+    target_image_tag=$base_tag-torch$torch_version-base
+else
+    target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-base
+fi
+
 export IMAGE_TO_BUILD=$MODELSCOPE_REPO_ADDRESS:$target_image_tag
 export PYTHON_VERSION=$python_version
 export TORCH_VERSION=$torch_version
diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh
index 50fbd57f..73f972da 100644
--- a/.dev_scripts/build_image.sh
+++ b/.dev_scripts/build_image.sh
@@ -130,7 +130,7 @@ elif [[ $python_version == 3.10* ]]; then
     if [ "$is_cpu" == "True" ]; then
         echo "Building python3.10 cpu image"
         base_tag=ubuntu22.04-py310
-        export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-py310-torch$torch_version-tf$tensorflow_version-base
+        export BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu22.04-py310-torch$torch_version-base
     else
         echo "Building python3.10 gpu image"
         base_tag=ubuntu22.04-cuda$cuda_version-py310
@@ -141,9 +141,13 @@ else
     echo "Unsupport python version: $python_version"
     exit 1
 fi
-
-target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-$modelscope_version-test
-
+# cpu not intall tensorflow
+# target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-$modelscope_version-test
+if [ "$is_cpu" == "True" ]; then
+    target_image_tag=$base_tag-torch$torch_version-$modelscope_version-test
+else
+    target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version-$modelscope_version-test
+fi
 export IMAGE_TO_BUILD=$MODELSCOPE_REPO_ADDRESS:$target_image_tag
 export PYTHON_VERSION=$python_version
 export TORCH_VERSION=$torch_version
@@ -155,7 +159,7 @@ docker_file_content=`cat docker/Dockerfile.ubuntu`
 
 BUILD_HASH_ID=$(git rev-parse HEAD)
 # install thrid part library
-docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv &&  pip install --no-cache-dir -U 'ms-swift==2.0.2' 'funasr==1.0.14' autoawq 'timm>0.9.5' 'transformers==4.38.2'"
+docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv &&  pip install --no-cache-dir -U 'ms-swift' 'funasr' autoawq 'timm>0.9.5' 'transformers'"
 
 docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y && export COMMIT_ID=$BUILD_HASH_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $build_branch  --single-branch $REPO_URL && cd modelscope && pip install . && cd / && rm -fr /tmp/modelscope && pip cache purge;"
 
@@ -169,8 +173,8 @@ else
     docker_file_content="${docker_file_content} \nRUN pip uninstall -y tb-nightly tensorboard && pip install --no-cache-dir -U tensorboard && TORCH_CUDA_ARCH_LIST='6.0 6.1 7.0 7.5 8.0 8.9 9.0 8.6+PTX' python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'"
 fi
 
-docker_file_content="${docker_file_content} \n RUN cp /tmp/resources/conda.aliyun  ~/.condarc && \
-    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
+
+docker_file_content="${docker_file_content} \n RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
     pip config set install.trusted-host mirrors.aliyun.com && \
     cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list "
 
diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index e6120d6e..6d4b4c0f 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -1,7 +1,7 @@
 ARG BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-base
 FROM $BASE_IMAGE
 RUN apt-get update && \
-    apt-get install -y libsox-dev unzip  zip iputils-ping telnet sudo && \
+    apt-get install -y libsox-dev unzip libaio-dev zip iputils-ping telnet sudo && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -19,10 +19,9 @@ RUN pip install --no-cache-dir adaseq text2sql_lgesql==1.3.0 \
          git+https://github.com/jin-s13/xtcocoapi.git@v1.14 \
          git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps
 
-RUN mv /opt/conda/compiler_compat/ld /opt/conda/compiler_compat/ldbk && \
-         pip install --no-cache-dir mpi4py paint_ldm \
+RUN pip install --no-cache-dir mpi4py paint_ldm \
          mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 \
-         ipykernel fasttext fairseq deepspeed -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+         ipykernel fasttext fairseq deepspeed apex -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
 
 ARG USE_GPU
 
@@ -36,12 +35,14 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
 # torchmetrics==0.11.4 for ofa
 # tinycudann for cuda12.1.0 pytorch 2.1.2
 RUN if [ "$USE_GPU" = "True" ] ; then \
+    pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 tiktoken transformers_stream_generator bitsandbytes basicsr optimum && \
+    pip install --no-cache-dir flash_attn==2.5.9.post1 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
     pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu121/ && \
-    pip install --no-cache-dir -U 'xformers<0.0.24' --index-url https://download.pytorch.org/whl/cu121 && \
-    pip install --no-cache-dir --force https://modelscope.oss-cn-beijing.aliyuncs.com/packages/tinycudann-1.7-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir -U 'xformers' --index-url https://download.pytorch.org/whl/cu121 && \
+    pip install --no-cache-dir --force tinycudann==1.7  -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
     pip uninstall -y torch-scatter && TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.6;8.9;9.0" pip install --no-cache-dir -U torch-scatter && \
-    pip install --no-cache-dir -U flash_attn vllm; \
+    pip install --no-cache-dir -U vllm; \
     else \
         echo 'cpu unsupport vllm auto-gptq'; \
     fi
@@ -56,8 +57,10 @@ RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r /var/modelscope/science.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
     pip install --no-cache-dir -r /var/modelscope/tests.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
     pip install --no-cache-dir -r /var/modelscope/svr.txt && \
+    pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/imageio_ffmpeg-0.4.9-py3-none-any.whl --force && \
+    pip install --no-cache-dir 'scipy<1.13.0' && \
     pip cache purge
-
+# 'scipy<1.13.0' for cannot import name 'kaiser' from 'scipy.signal'
 COPY examples /modelscope/examples
 ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 ENV VLLM_USE_MODELSCOPE=True
diff --git a/docker/Dockerfile.ubuntu_base b/docker/Dockerfile.ubuntu_base
index 24a63f3c..360f216f 100644
--- a/docker/Dockerfile.ubuntu_base
+++ b/docker/Dockerfile.ubuntu_base
@@ -2,20 +2,20 @@ ARG BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cud
 FROM $BASE_IMAGE
 ARG DEBIAN_FRONTEND=noninteractive
 ENV TZ=Asia/Shanghai
-ENV CONDA_DIR /opt/conda
-ENV PATH="${CONDA_DIR}/bin:${PATH}"
 ENV arch=x86_64
 SHELL ["/bin/bash", "-c"]
 COPY docker/rcfiles /tmp/resources
 COPY docker/jupyter_plugins /tmp/resources/jupyter_plugins
-RUN apt-get update && apt-get install -y --reinstall ca-certificates && \
-    apt-get install -y apt-utils openssh-server locales wget git strace gdb sox libopenmpi-dev curl \
+RUN apt-get update && apt-get upgrade -y && apt-get install -y --reinstall ca-certificates && \
+    apt-get install -y make apt-utils openssh-server locales wget git strace gdb sox libopenmpi-dev curl \
     iputils-ping net-tools iproute2 autoconf automake gperf libre2-dev libssl-dev \
     libtool libcurl4-openssl-dev libb64-dev libgoogle-perftools-dev patchelf \
     rapidjson-dev scons software-properties-common pkg-config unzip zlib1g-dev \
-    libarchive-dev libxml2-dev libnuma-dev \
+    libbz2-dev libreadline-dev libsqlite3-dev llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev liblzma-dev \
+    libarchive-dev libxml2-dev libnuma-dev cmake \
     libgeos-dev strace vim ffmpeg libsm6 tzdata language-pack-zh-hans \
-    ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy libxext6 build-essential ninja-build && \
+    ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy libxext6 build-essential ninja-build \
+    libjpeg-dev libpng-dev && \
     wget https://packagecloud.io/github/git-lfs/packages/debian/bullseye/git-lfs_3.2.0_amd64.deb/download -O ./git-lfs_3.2.0_amd64.deb && \
     dpkg -i ./git-lfs_3.2.0_amd64.deb && \
     rm -f ./git-lfs_3.2.0_amd64.deb && \
@@ -28,48 +28,130 @@ RUN apt-get update && apt-get install -y --reinstall ca-certificates && \
     rm -rf /var/lib/apt/lists/*
 
 ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8
-RUN wget -O /tmp/boost.tar.gz         https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.gz &&     (cd /tmp && tar xzf boost.tar.gz) &&     mv /tmp/boost_1_80_0/boost /usr/include/boost
+RUN wget -O /tmp/boost.tar.gz https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.gz && \
+    cd /tmp && tar xzf boost.tar.gz  && \
+    mv /tmp/boost_1_80_0/boost /usr/include/boost && \
+    rm -rf /tmp/boost_1_80_0 && rm -rf boost.tar.gz
 
-#install and config python
-ARG PYTHON_VERSION=3.10.13
-# Miniconda3-py37_23.1.0-1-Linux-x86_64.sh is last python3.7 version
-RUN  wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py310_23.9.0-0-Linux-x86_64.sh -O ./miniconda.sh && \
-    /bin/bash  miniconda.sh -b -p /opt/conda && \
-    rm  -f miniconda.sh && \
-    ln  -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
-    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
-    source /root/.bashrc
+#install and config python copy from https://github.com/docker-library/python/blob/1b7a1106674a21e699b155cbd53bf39387284cca/3.10/bookworm/Dockerfile
+ARG PYTHON_VERSION=3.10.14
+ENV PATH /usr/local/bin:$PATH
+ENV GPG_KEY A035C8C19219BA821ECEA86B64E628F8D684696D
+ENV PYTHON_VERSION 3.10.14
+
+RUN set -eux; \
+        \
+        wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz"; \
+        wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc"; \
+        GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; \
+        gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY"; \
+        gpg --batch --verify python.tar.xz.asc python.tar.xz; \
+        gpgconf --kill all; \
+        rm -rf "$GNUPGHOME" python.tar.xz.asc; \
+        mkdir -p /usr/src/python; \
+        tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz; \
+        rm python.tar.xz; \
+        \
+        cd /usr/src/python; \
+        gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; \
+        ./configure \
+                --build="$gnuArch" \
+                --enable-loadable-sqlite-extensions \
+                --enable-optimizations \
+                --enable-option-checking=fatal \
+                --enable-shared \
+                --with-lto \
+                --with-system-expat \
+                --without-ensurepip \
+        ; \
+        nproc="$(nproc)"; \
+        EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"; \
+        LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"; \
+        make -j "$nproc" \
+                "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
+                "LDFLAGS=${LDFLAGS:-}" \
+                "PROFILE_TASK=${PROFILE_TASK:-}" \
+        ; \
+# https://github.com/docker-library/python/issues/784
+# prevent accidental usage of a system installed libpython of the same version
+        rm python; \
+        make -j "$nproc" \
+                "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
+                "LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" \
+                "PROFILE_TASK=${PROFILE_TASK:-}" \
+                python \
+        ; \
+        make install; \
+        \
+# enable GDB to load debugging data: https://github.com/docker-library/python/pull/701
+        bin="$(readlink -ve /usr/local/bin/python3)"; \
+        dir="$(dirname "$bin")"; \
+        mkdir -p "/usr/share/gdb/auto-load/$dir"; \
+        cp -vL Tools/gdb/libpython.py "/usr/share/gdb/auto-load/$bin-gdb.py"; \
+        \
+        cd /; \
+        rm -rf /usr/src/python; \
+        \
+        find /usr/local -depth \
+                \( \
+                        \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \
+                        -o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \) \) \
+                \) -exec rm -rf '{}' + \
+        ; \
+        \
+        ldconfig; \
+        \
+        python3 --version
+
+# make some useful symlinks that are expected to exist ("/usr/local/bin/python" and friends)
+RUN set -eux; \
+        for src in idle3 pydoc3 python3 python3-config; do \
+                dst="$(echo "$src" | tr -d 3)"; \
+                [ -s "/usr/local/bin/$src" ]; \
+                [ ! -e "/usr/local/bin/$dst" ]; \
+                ln -svT "$src" "/usr/local/bin/$dst"; \
+        done
+
+# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
+ENV PYTHON_PIP_VERSION 23.0.1
+# https://github.com/docker-library/python/issues/365
+ENV PYTHON_SETUPTOOLS_VERSION 65.5.1
+# https://github.com/pypa/get-pip
+ENV PYTHON_GET_PIP_URL https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py
+ENV PYTHON_GET_PIP_SHA256 dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9
+
+RUN set -eux; \
+        \
+        wget -O get-pip.py "$PYTHON_GET_PIP_URL"; \
+        echo "$PYTHON_GET_PIP_SHA256 *get-pip.py" | sha256sum -c -; \
+        \
+        export PYTHONDONTWRITEBYTECODE=1; \
+        \
+        python get-pip.py \
+                --disable-pip-version-check \
+                --no-cache-dir \
+                --no-compile \
+                "pip==$PYTHON_PIP_VERSION" \
+                "setuptools==$PYTHON_SETUPTOOLS_VERSION" \
+        ; \
+        rm -f get-pip.py; \
+        \
+        pip --version
+# end of install python
 
 ARG USE_GPU=True
 
 # install pytorch
-ARG TORCH_VERSION=1.12.0
-ARG CUDATOOLKIT_VERSION=cu117
+ARG TORCH_VERSION=2.3.0
+ARG CUDATOOLKIT_VERSION=cu121
+
 RUN if [ "$USE_GPU" = "True" ] ; then \
-        pip install --no-cache-dir torch==$TORCH_VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDATOOLKIT_VERSION; \
+       pip install --no-cache-dir "torch==2.3.0" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
+       pip install --no-cache-dir torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
     else \
         pip install --no-cache-dir torch==$TORCH_VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu; \
     fi
 
-# install tensorflow
-ARG TENSORFLOW_VERSION=1.15.5
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        if [ "$TENSORFLOW_VERSION" = "1.15.5" ] ; then \
-            pip install --no-cache-dir tensorflow==$TENSORFLOW_VERSION -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
-        else \
-            pip install --no-cache-dir tensorflow==$TENSORFLOW_VERSION; \
-         fi \
-    else \
-        # only python 3.7 has tensorflow 1.15.5
-        if [ "$PYTHON_VERSION" = "3.7.13" ] ; then \
-            pip install --no-cache-dir tensorflow==$TENSORFLOW_VERSION; \
-        elif [ "$TENSORFLOW_VERSION" = "1.15.5" ] ; then \
-            pip install --no-cache-dir numpy==1.18.5 https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/tensorflow-1.15.5-cp38-cp38-linux_x86_64.whl; \
-        else \
-            pip install --no-cache-dir tensorflow==$TENSORFLOW_VERSION; \
-        fi \
-    fi
-
 
 # default shell bash
 ENV SHELL=/bin/bash
@@ -109,17 +191,18 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
      echo 'cpu unsupport Pointnet2'; \
     fi
 
-# install apex after deepspeed
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        bash /tmp/install_apex.sh; \
-    else \
-     echo 'cpu unsupport apex'; \
+
+ARG TENSORFLOW_VERSION=1.15.5
+ RUN if [ "$USE_GPU" = "True" ] ; then \
+            pip install --no-cache-dir tensorflow==$TENSORFLOW_VERSION; \
+        else \
+            echo 'cpu not install tensorflow'; \
     fi
 
-RUN if [ "$USE_GPU" = "True" ] ; then \
-        pip install --no-cache-dir mmcv-full==1.7.0+torch2.1.1cu121 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
-    else \
-        pip install --no-cache-dir mmcv_full==1.7.0+torch2.1cpu -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
+ RUN if [ "$USE_GPU" = "True" ] ; then \
+            pip install --no-cache-dir "https://modelscope.oss-cn-beijing.aliyuncs.com/packages/mmcv/mmcv_full-1.7.0-cp310-cp310-linux_x86_64.whl"; \
+        else \
+            pip install --no-cache-dir mmcv_full==1.7.0+cputorch230 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
     fi
-RUN conda install imageio-ffmpeg -c conda-forge -y
+
 ENTRYPOINT []
diff --git a/docker/rcfiles/conda.aliyun b/docker/rcfiles/conda.aliyun
deleted file mode 100644
index d0aa2014..00000000
--- a/docker/rcfiles/conda.aliyun
+++ /dev/null
@@ -1,14 +0,0 @@
-channels:
-  - defaults
-show_channel_urls: true
-default_channels:
-  - http://mirrors.aliyun.com/anaconda/pkgs/main
-  - http://mirrors.aliyun.com/anaconda/pkgs/r
-  - http://mirrors.aliyun.com/anaconda/pkgs/msys2
-custom_channels:
-  conda-forge: http://mirrors.aliyun.com/anaconda/cloud
-  msys2: http://mirrors.aliyun.com/anaconda/cloud
-  bioconda: http://mirrors.aliyun.com/anaconda/cloud
-  menpo: http://mirrors.aliyun.com/anaconda/cloud
-  pytorch: http://mirrors.aliyun.com/anaconda/cloud
-  simpleitk: http://mirrors.aliyun.com/anaconda/cloud