Refine dockerfile (#1042)

2026-05-18 05:05:00 +02:00 · 2024-10-23 09:59:51 +08:00
parent eac004d7f2
commit 7a57ee418c
5 changed files with 47 additions and 38 deletions
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -16,12 +16,18 @@ RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
    pip config set install.trusted-host mirrors.aliyun.com && \
    cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list

-RUN echo "cache bust $(date +%Y%m%d%H%M%S)"
-
 COPY {meta_file} /tmp/install.sh

 RUN sh /tmp/install.sh {version_args}

+RUN pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/imageio_ffmpeg-0.4.9-py3-none-any.whl --force
+
+RUN pip uninstall ms-swift modelscope -y
+
+RUN cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b  {modelscope_branch}  --single-branch https://github.com/modelscope/modelscope.git && cd modelscope && pip install .[all] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && cd / && rm -fr /tmp/modelscope && pip cache purge;
+
+RUN cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {swift_branch}  --single-branch https://github.com/modelscope/ms-swift.git && cd ms-swift && pip install .[all] && cd / && rm -fr /tmp/ms-swift && pip cache purge;
+
 ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 ENV VLLM_USE_MODELSCOPE=True
 ENV LMDEPLOY_USE_MODELSCOPE=True
--- a/docker/Dockerfile.ubuntu_base
+++ b/docker/Dockerfile.ubuntu_base
@@ -155,7 +155,7 @@ RUN if [ "$USE_GPU" = "True" ] ; then \
 ENV SHELL=/bin/bash
 # install special package
 RUN if [ "$USE_GPU" = "True" ] ; then \
-        pip install  dgl -f https://data.dgl.ai/wheels/$CUDATOOLKIT_VERSION/repo.html; \
+        pip install --no-cache-dir dgl -f https://data.dgl.ai/wheels/$CUDATOOLKIT_VERSION/repo.html; \
    else \
        pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \
    fi
@@ -198,9 +198,18 @@ ARG TENSORFLOW_VERSION={tf_version}
    fi

 RUN if [ "$USE_GPU" = "True" ] ; then \
-            cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX" MMCV_WITH_OPS=1 MAX_JOBS=32 FORCE_CUDA=1 python setup.py bdist_wheel && cd / && rm -fr /tmp/mmcv && pip cache purge; \
+            cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX" MMCV_WITH_OPS=1 MAX_JOBS=32 FORCE_CUDA=1 pip install . && cd / && rm -fr /tmp/mmcv && pip cache purge; \
        else \
-            cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && MMCV_WITH_OPS=1 MAX_JOBS=32 python setup.py bdist_wheel && cd / && rm -fr /tmp/mmcv && pip cache purge; \
+            cd /tmp && git clone -b ms_build --single-branch https://github.com/tastelikefeet/mmcv.git && cd mmcv && MMCV_WITH_OPS=1 MAX_JOBS=32 pip install . && cd / && rm -fr /tmp/mmcv && pip cache purge; \
    fi

+ # This limits the cuda121 version
+ RUN if [ "$USE_GPU" = "True" ] ; then \
+            pip install --no-cache-dir --force tinycudann==1.7  -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \
+        else \
+            echo 'cpu not install tinycudann'; \
+    fi
+
+ RUN pip install --no-cache-dir fairseq
+
 ENTRYPOINT []
--- a/docker/build_image.py
+++ b/docker/build_image.py
@@ -18,9 +18,7 @@ class Builder:
    def init_args(self, args: Any) -> Any:
        if not args.base_image:
            # A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04
-            args.base_image = (
-                'modelscope-image-registry.cn-wulanchabu.cr.aliyuncs.com/'
-                'modelscope/mirror:12.4.0-devel-ubuntu22.04')
+            args.base_image = 'nvidia/cuda:12.1.0-devel-ubuntu22.04'
        if not args.torch_version:
            args.torch_version = '2.3.0'
            args.torchaudio_version = '2.3.0'
@@ -130,8 +128,7 @@ class CPUImageBuilder(Builder):
        meta_file = './docker/install_cpu.sh'
        version_args = (
            f'{self.args.torch_version} {self.args.torchvision_version} '
-            f'{self.args.torchaudio_version} {self.args.modelscope_branch} {self.args.swift_branch}'
-        )
+            f'{self.args.torchaudio_version}')
        base_image = f'{docker_registry}:ubuntu{self.args.ubuntu_version}-torch{self.args.torch_version}-base'
        extra_content = """\nRUN pip install adaseq\nRUN pip install pai-easycv"""

@@ -141,6 +138,9 @@ class CPUImageBuilder(Builder):
            content = content.replace('{extra_content}', extra_content)
            content = content.replace('{meta_file}', meta_file)
            content = content.replace('{version_args}', version_args)
+            content = content.replace('{modelscope_branch}',
+                                      self.args.modelscope_branch)
+            content = content.replace('{swift_branch}', self.args.swift_branch)
        return content

    def build(self) -> int:
@@ -165,8 +165,8 @@ class GPUImageBuilder(Builder):
        extra_content = """\nRUN pip install adaseq\nRUN pip install pai-easycv"""
        version_args = (
            f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
-            f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} '
-            f'{self.args.modelscope_branch} {self.args.swift_branch}')
+            f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version}'
+        )
        base_image = (
            f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-'
            f'torch{self.args.torch_version}-tf{self.args.tf_version}-base')
@@ -176,6 +176,9 @@ class GPUImageBuilder(Builder):
            content = content.replace('{extra_content}', extra_content)
            content = content.replace('{meta_file}', meta_file)
            content = content.replace('{version_args}', version_args)
+            content = content.replace('{modelscope_branch}',
+                                      self.args.modelscope_branch)
+            content = content.replace('{swift_branch}', self.args.swift_branch)
        return content

    def build(self) -> int:
@@ -198,9 +201,7 @@ class LLMImageBuilder(Builder):
    def init_args(self, args) -> Any:
        if not args.base_image:
            # A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04
-            args.base_image = (
-                'modelscope-image-registry.cn-wulanchabu.cr.aliyuncs.com/modelscope/'
-                'mirror:12.4.0-devel-ubuntu22.04')
+            args.base_image = 'nvidia/cuda:12.4.0-devel-ubuntu22.04'
        if not args.torch_version:
            args.torch_version = '2.4.0'
            args.torchaudio_version = '2.4.0'
@@ -223,14 +224,17 @@ class LLMImageBuilder(Builder):
                                                  self.args.python_version)
        version_args = (
            f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
-            f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} '
-            f'{self.args.modelscope_branch} {self.args.swift_branch}')
+            f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version}'
+        )
        with open('docker/Dockerfile.ubuntu', 'r') as f:
            content = f.read()
            content = content.replace('{base_image}', self.args.base_image)
            content = content.replace('{extra_content}', extra_content)
            content = content.replace('{meta_file}', meta_file)
            content = content.replace('{version_args}', version_args)
+            content = content.replace('{modelscope_branch}',
+                                      self.args.modelscope_branch)
+            content = content.replace('{swift_branch}', self.args.swift_branch)
        return content

    def build(self) -> int:
--- a/docker/install.sh
+++ b/docker/install.sh
@@ -6,12 +6,10 @@ torchaudio_version=${3:-2.4.0}
 vllm_version=${4:-0.6.0}
 lmdeploy_version=${5:-0.6.1}
 autogptq_version=${6:-0.7.1}
-modelscope_branch=${7:-master}
-swift_branch=${8:-main}

 pip uninstall -y torch torchvision torchaudio

-pip install --no-cache-dir -U torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
+pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version

 pip install --no-cache-dir funtextprocessing typeguard==2.13.3 scikit-learn -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html

@@ -25,7 +23,11 @@ pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 tiktoken tra

 pip install --no-cache-dir text2sql_lgesql==1.3.0 git+https://github.com/jin-s13/xtcocoapi.git@v1.14 git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps

-pip install --no-cache-dir mpi4py paint_ldm mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 ipykernel fasttext fairseq deepspeed apex -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+pip install --no-cache-dir mpi4py paint_ldm -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+
+pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+
+pip install --no-cache-dir ipykernel fasttext deepspeed apex -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html

 CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" pip install --no-cache-dir  'git+https://github.com/facebookresearch/detectron2.git';

@@ -35,20 +37,14 @@ cd /tmp && git clone https://github.com/Dao-AILab/flash-attention.git && cd flas

 pip install --no-cache-dir auto-gptq==$autogptq_version

-pip install --no-cache-dir --force tinycudann==1.7  -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
-
 # pip uninstall -y torch-scatter && TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.6;8.9;9.0" pip install --no-cache-dir -U torch-scatter

 pip install --no-cache-dir -U triton

-pip install vllm==$vllm_version -U
+pip install --no-cache-dir vllm==$vllm_version -U

 pip install --no-cache-dir -U lmdeploy==$lmdeploy_version --no-deps

-pip install pynvml shortuuid
+pip install --no-cache-dir pynvml shortuuid

-pip uninstall ms-swift modelscope -y
-
-cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $modelscope_branch  --single-branch https://github.com/modelscope/modelscope.git && cd modelscope && pip install .[all] && cd / && rm -fr /tmp/modelscope && pip cache purge;
-
-cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $swift_branch  --single-branch https://github.com/modelscope/ms-swift.git && cd ms-swift && pip install .[all] && cd / && rm -fr /tmp/ms-swift && pip cache purge;
+pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
--- a/docker/install_cpu.sh
+++ b/docker/install_cpu.sh
@@ -3,8 +3,6 @@
 torch_version=${1:-2.4.0}
 torchvision_version=${2:-0.19.0}
 torchaudio_version=${3:-2.4.0}
-modelscope_branch=${4:-master}
-swift_branch=${5:-main}

 pip uninstall -y torch torchvision torchaudio

@@ -20,10 +18,6 @@ pip install --no-cache-dir -U qwen_vl_utils pyav librosa timm transformers accel

 pip install --no-cache-dir text2sql_lgesql==1.3.0 git+https://github.com/jin-s13/xtcocoapi.git@v1.14 git+https://github.com/gatagat/lap.git@v0.4.0 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html --force --no-deps

-pip install --no-cache-dir mpi4py paint_ldm mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 ipykernel fasttext fairseq -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+pip install --no-cache-dir mpi4py paint_ldm mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 ipykernel fasttext -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html

-pip uninstall ms-swift modelscope -y
-
-cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $modelscope_branch  --single-branch https://github.com/modelscope/modelscope.git && cd modelscope && pip install .[all] && cd / && rm -fr /tmp/modelscope && pip cache purge;
-
-cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $swift_branch  --single-branch https://github.com/modelscope/ms-swift.git && cd ms-swift && pip install .[all] && cd / && rm -fr /tmp/ms-swift && pip cache purge;
+pip install --no-cache-dir -U torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version --index-url https://download.pytorch.org/whl/cpu