From fd98e3d82c50ebf08ff49573d876fefc7af36fb3 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sat, 11 Oct 2025 01:55:01 +0800 Subject: [PATCH] fix llm image --- docker/Dockerfile.ubuntu | 11 ++++++----- docker/build_image.py | 9 ++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index c68adf93..d804eea5 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -16,7 +16,7 @@ COPY {meta_file} /tmp/install.sh ARG INSTALL_MS_DEPS={install_ms_deps} -ARG INSTALL_MEGATRON_DEPS={install_megatron_deps} +ARG IMAGE_TYPE={image_type} # install dependencies COPY requirements /var/modelscope @@ -59,8 +59,7 @@ RUN echo $CUR_TIME RUN bash /tmp/install.sh {version_args} && \ curl -fsSL https://ollama.com/install.sh | sh && \ pip install --no-cache-dir -U funasr scikit-learn && \ - pip install transformers trl && \ - pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils librosa timm accelerate peft safetensors && \ + pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils librosa timm transformers accelerate peft trl safetensors && \ cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {swift_branch} --single-branch https://github.com/modelscope/ms-swift.git && \ cd ms-swift && pip install .[llm] && \ pip install .[eval] && pip install evalscope -U --no-dependencies && pip install ms-agent -U --no-dependencies && \ @@ -77,9 +76,9 @@ RUN bash /tmp/install.sh {version_args} && \ RUN if [ "$INSTALL_MS_DEPS" = "True" ]; then \ - pip install --no-cache-dir huggingface-hub transformers peft; \ + pip install --no-cache-dir huggingface-hub transformers peft -U; \ fi; \ -if [ "$INSTALL_MEGATRON_DEPS" = "True" ]; then \ +if [ "$IMAGE_TYPE" = "swift" ]; then \ pip install "sglang[all]<0.5" "math_verify==0.5.2" "gradio<5.33" -U && \ pip install liger_kernel wandb swanlab nvitop pre-commit "transformers<4.57" "trl<0.21" huggingface-hub -U && \ SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && echo $SITE_PACKAGES && \ @@ -89,6 +88,8 @@ if [ "$INSTALL_MEGATRON_DEPS" = "True" ]; then \ cd apex && git checkout e13873debc4699d39c6861074b9a3b2a02327f92 && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ && \ cd / && rm -fr /tmp/apex && pip cache purge; \ pip install git+https://github.com/NVIDIA/Megatron-LM.git@core_r0.13.0; \ +elif [ "$IMAGE_TYPE" = "gpu" ]; then \ + pip install "transformers<4.56" "trl<0.23" --no-dependencies; \ fi # install nvm and set node version to 18 diff --git a/docker/build_image.py b/docker/build_image.py index affdefcd..8d8bf540 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -162,7 +162,7 @@ class CPUImageBuilder(Builder): content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'True') - content = content.replace('{install_megatron_deps}', 'False') + content = content.replace('{image_type}', 'cpu') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', @@ -208,7 +208,6 @@ class GPUImageBuilder(Builder): extra_content = """ RUN pip install tf-keras==2.16.0 --no-dependencies && \ pip install onnx==1.18.0 --no-dependencies && \ - pip install "transformers<4.56" "trl<0.23" --no-dependencies && \ pip install deepspeed==0.17.4 --no-dependencies && \ pip install --no-cache-dir torchsde jupyterlab torchmetrics==0.11.4 basicsr pynvml shortuuid && \ CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0" \ @@ -233,7 +232,7 @@ RUN pushd $(dirname $(python -c 'print(__import__("tensorflow").__file__)')) && content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'True') - content = content.replace('{install_megatron_deps}', 'False') + content = content.replace('{image_type}', 'gpu') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', @@ -309,7 +308,7 @@ class LLMImageBuilder(Builder): content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'False') - content = content.replace('{install_megatron_deps}', 'False') + content = content.replace('{image_type}', 'llm') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', @@ -385,7 +384,7 @@ RUN pip install --no-cache-dir -U icecream soundfile pybind11 py-spy content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'False') - content = content.replace('{install_megatron_deps}', 'True') + content = content.replace('{image_type}', 'swift') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}',