update cuda 12.9

2026-02-24 04:01:10 +01:00 · 2025-11-09 20:01:42 +08:00
parent 5639c67133
commit e3184d481c
3 changed files with 7 additions and 9 deletions
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -78,16 +78,14 @@ RUN bash /tmp/install.sh {version_args} && \
 RUN if [ "$IMAGE_TYPE" = "swift" ]; then \
    pip install "sglang[all]<0.6" "math_verify==0.5.2" "gradio<5.33" "deepspeed<0.18" -U && \
    pip install liger_kernel wandb swanlab nvitop pre-commit "transformers<4.58" "trl<0.24" huggingface-hub -U && \
-    SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && echo $SITE_PACKAGES && \
-    CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
    pip install --no-build-isolation transformer_engine[pytorch]; \
    cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/NVIDIA/apex && \
    cd apex && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ && \
    cd / && rm -fr /tmp/apex && pip cache purge; \
-    pip install git+https://github.com/NVIDIA/Megatron-LM.git@core_r0.13.0; \
+    pip install git+https://github.com/NVIDIA/Megatron-LM.git@core_r0.14.0; \
    pip uninstall autoawq -y; \
    mkdir -p /root/.cache/modelscope/_github; \
-    git -C /root/.cache/modelscope/_github clone https://github.com/NVIDIA/Megatron-LM.git Megatron-LM --branch core_r0.13.0; \
+    git -C /root/.cache/modelscope/_github clone https://github.com/NVIDIA/Megatron-LM.git Megatron-LM --branch core_r0.14.0; \
 elif [ "$IMAGE_TYPE" = "llm" ]; then \
    pip install --no-cache-dir huggingface-hub transformers peft diffusers -U; \
    pip uninstall autoawq -y; \
--- a/docker/build_image.py
+++ b/docker/build_image.py
@@ -348,9 +348,9 @@ class SwiftImageBuilder(LLMImageBuilder):

    def init_args(self, args) -> Any:
        if not args.base_image:
-            args.base_image = 'nvidia/cuda:12.8.1-devel-ubuntu22.04'
+            args.base_image = 'nvidia/cuda:12.9.1-cudnn-devel-ubuntu22.04'
        if not args.cuda_version:
-            args.cuda_version = '12.8.1'
+            args.cuda_version = '12.9.1'
        if not args.torch_version:
            args.torch_version = '2.8.0'
            args.torchaudio_version = '2.8.0'
@@ -358,7 +358,7 @@ class SwiftImageBuilder(LLMImageBuilder):
        if not args.vllm_version:
            args.vllm_version = '0.11.0'
        if not args.lmdeploy_version:
-            args.lmdeploy_version = '0.10.1'
+            args.lmdeploy_version = '0.10.2'
        if not args.flashattn_version:
            args.flashattn_version = '2.8.1'
        return super().init_args(args)
--- a/docker/install.sh
+++ b/docker/install.sh
@@ -10,11 +10,11 @@ flashattn_version=${7:-2.7.1.post4}

 pip uninstall -y torch torchvision torchaudio

-pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
+pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version --index-url https://download.pytorch.org/whl/cu129

 pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version

-pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
+pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version --index-url https://download.pytorch.org/whl/cu129

 pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum openai-whisper