diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
index fd51af0e..cc084455 100644
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -16,6 +16,8 @@ COPY {meta_file} /tmp/install.sh
 
 ARG INSTALL_MS_DEPS={install_ms_deps}
 
+ARG INSTALL_MEGATRON_DEPS={install_megatron_deps}
+
 # install dependencies
 COPY requirements /var/modelscope
 
@@ -55,7 +57,6 @@ ARG CUR_TIME={cur_time}
 RUN echo $CUR_TIME
 
 RUN sh /tmp/install.sh {version_args} && \
-    pip install --no-cache-dir xformers==0.0.27 && \
     curl -fsSL https://ollama.com/install.sh | sh && \
     pip install --no-cache-dir -U funasr scikit-learn && \
     pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \
@@ -67,12 +68,25 @@ RUN sh /tmp/install.sh {version_args} && \
     pip install .[eval] && pip install evalscope -U --no-dependencies && pip install xtuner --no-dependencies && \
     cd / && rm -fr /tmp/ms-swift && pip cache purge; \
     pip install --no-cache-dir torch=={torch_version} torchvision=={torchvision_version} torchaudio=={torchaudio_version} {index_url} && \
-    pip install --no-cache-dir transformers huggingface-hub==0.25.* -U && pip install --no-cache-dr timm>=0.9.0 && pip cache purge; \
+    pip install --no-cache-dir transformers timm>=0.9.0 && pip cache purge; \
+    pip install --no-cache-dir omegaconf==2.3.0 && pip cache purge; \
     pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
     pip config set install.trusted-host mirrors.aliyun.com && \
     cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list
 
-RUN pip install --no-cache-dir omegaconf==2.3.0 && pip cache purge
+
+RUN if [ "$INSTALL_MS_DEPS" = "True" ]; then \
+    pip install --no-cache-dir huggingface-hub==0.25.* -U; \
+fi; \
+if [ "$INSTALL_MEGATRON_DEPS" = "True" ]; then \
+    pip install liger_kernel nvitop pre-commit transformers huggingface-hub -U && \
+    SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && echo $SITE_PACKAGES && \
+    CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
+    pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable; \
+    cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/NVIDIA/apex && \
+    cd apex && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ && \
+    cd / && rm -fr /tmp/apex && pip cache purge; \
+fi
 
 ENV SETUPTOOLS_USE_DISTUTILS=stdlib
 ENV VLLM_USE_MODELSCOPE=True
diff --git a/docker/build_image.py b/docker/build_image.py
index 268ba658..c493039d 100644
--- a/docker/build_image.py
+++ b/docker/build_image.py
@@ -31,7 +31,7 @@ class Builder:
         if not args.cuda_version:
             args.cuda_version = '12.1.0'
         if not args.vllm_version:
-            args.vllm_version = '0.7.2'
+            args.vllm_version = '0.5.3'
         if not args.lmdeploy_version:
             args.lmdeploy_version = '0.6.2'
         if not args.autogptq_version:
@@ -162,6 +162,7 @@ class CPUImageBuilder(Builder):
             content = content.replace('{version_args}', version_args)
             content = content.replace('{cur_time}', formatted_time)
             content = content.replace('{install_ms_deps}', 'True')
+            content = content.replace('{install_megatron_deps}', 'False')
             content = content.replace('{torch_version}',
                                       self.args.torch_version)
             content = content.replace('{torchvision_version}',
@@ -225,6 +226,7 @@ RUN pip install tf-keras==2.16.0 --no-dependencies && \
             content = content.replace('{version_args}', version_args)
             content = content.replace('{cur_time}', formatted_time)
             content = content.replace('{install_ms_deps}', 'True')
+            content = content.replace('{install_megatron_deps}', 'False')
             content = content.replace('{torch_version}',
                                       self.args.torch_version)
             content = content.replace('{torchvision_version}',
@@ -267,15 +269,15 @@ class LLMImageBuilder(Builder):
             # A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04
             args.base_image = 'nvidia/cuda:12.4.0-devel-ubuntu22.04'
         if not args.torch_version:
-            args.torch_version = '2.5.1'
-            args.torchaudio_version = '2.5.1'
-            args.torchvision_version = '0.20.1'
+            args.torch_version = '2.6.0'
+            args.torchaudio_version = '2.6.0'
+            args.torchvision_version = '0.21.0'
         if not args.cuda_version:
             args.cuda_version = '12.4.0'
         if not args.vllm_version:
-            args.vllm_version = '0.7.2'
+            args.vllm_version = '0.8.3'
         if not args.lmdeploy_version:
-            args.lmdeploy_version = '0.7.0.post2'
+            args.lmdeploy_version = '0.7.2.post1'
         if not args.autogptq_version:
             args.autogptq_version = '0.7.1'
         if not args.flashattn_version:
@@ -300,6 +302,7 @@ class LLMImageBuilder(Builder):
             content = content.replace('{version_args}', version_args)
             content = content.replace('{cur_time}', formatted_time)
             content = content.replace('{install_ms_deps}', 'False')
+            content = content.replace('{install_megatron_deps}', 'False')
             content = content.replace('{torch_version}',
                                       self.args.torch_version)
             content = content.replace('{torchvision_version}',
@@ -337,17 +340,6 @@ class LLMImageBuilder(Builder):
 
 class SwiftImageBuilder(LLMImageBuilder):
 
-    def init_args(self, args) -> Any:
-        if not args.torch_version:
-            args.torch_version = '2.5.1'
-            args.torchaudio_version = '2.5.1'
-            args.torchvision_version = '0.20.1'
-        if not args.cuda_version:
-            args.cuda_version = '12.4.0'
-        if not args.vllm_version:
-            args.vllm_version = '0.7.3'
-        return super().init_args(args)
-
     def generate_dockerfile(self) -> str:
         meta_file = './docker/install.sh'
         with open('docker/Dockerfile.extra_install', 'r') as f:
@@ -355,11 +347,8 @@ class SwiftImageBuilder(LLMImageBuilder):
             extra_content = extra_content.replace('{python_version}',
                                                   self.args.python_version)
         extra_content += """
-RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \
-    pip install --no-cache-dir -U icecream soundfile pybind11 && \
-    SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \
-    CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
-    pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
+RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps && \
+    pip install --no-cache-dir -U icecream soundfile pybind11
 """
         version_args = (
             f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
@@ -373,6 +362,7 @@ RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \
             content = content.replace('{version_args}', version_args)
             content = content.replace('{cur_time}', formatted_time)
             content = content.replace('{install_ms_deps}', 'False')
+            content = content.replace('{install_megatron_deps}', 'True')
             content = content.replace('{torch_version}',
                                       self.args.torch_version)
             content = content.replace('{torchvision_version}',
diff --git a/docker/install.sh b/docker/install.sh
index ee747d20..d8380091 100644
--- a/docker/install.sh
+++ b/docker/install.sh
@@ -16,7 +16,7 @@ pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version
 
 pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version
 
-pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum
+pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum openai-whisper
 
 # pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
 # find on: https://github.com/Dao-AILab/flash-attention/releases
diff --git a/requirements/audio/audio_kws.txt b/requirements/audio/audio_kws.txt
index 622ce981..6d2c8791 100644
--- a/requirements/audio/audio_kws.txt
+++ b/requirements/audio/audio_kws.txt
@@ -1,5 +1,5 @@
 kaldiio
-kwsbp==0.0.6
+# kwsbp==0.0.6  # Incompatible with Python >= 3.10
 matplotlib
 py_sound_connect>=0.1
 scipy