diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index fd51af0e..cc084455 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -16,6 +16,8 @@ COPY {meta_file} /tmp/install.sh ARG INSTALL_MS_DEPS={install_ms_deps} +ARG INSTALL_MEGATRON_DEPS={install_megatron_deps} + # install dependencies COPY requirements /var/modelscope @@ -55,7 +57,6 @@ ARG CUR_TIME={cur_time} RUN echo $CUR_TIME RUN sh /tmp/install.sh {version_args} && \ - pip install --no-cache-dir xformers==0.0.27 && \ curl -fsSL https://ollama.com/install.sh | sh && \ pip install --no-cache-dir -U funasr scikit-learn && \ pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils pyav librosa timm transformers accelerate peft trl safetensors && \ @@ -67,12 +68,25 @@ RUN sh /tmp/install.sh {version_args} && \ pip install .[eval] && pip install evalscope -U --no-dependencies && pip install xtuner --no-dependencies && \ cd / && rm -fr /tmp/ms-swift && pip cache purge; \ pip install --no-cache-dir torch=={torch_version} torchvision=={torchvision_version} torchaudio=={torchaudio_version} {index_url} && \ - pip install --no-cache-dir transformers huggingface-hub==0.25.* -U && pip install --no-cache-dr timm>=0.9.0 && pip cache purge; \ + pip install --no-cache-dir transformers timm>=0.9.0 && pip cache purge; \ + pip install --no-cache-dir omegaconf==2.3.0 && pip cache purge; \ pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ pip config set install.trusted-host mirrors.aliyun.com && \ cp /tmp/resources/ubuntu2204.aliyun /etc/apt/sources.list -RUN pip install --no-cache-dir omegaconf==2.3.0 && pip cache purge + +RUN if [ "$INSTALL_MS_DEPS" = "True" ]; then \ + pip install --no-cache-dir huggingface-hub==0.25.* -U; \ +fi; \ +if [ "$INSTALL_MEGATRON_DEPS" = "True" ]; then \ + pip install liger_kernel nvitop pre-commit transformers huggingface-hub -U && \ + SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && echo $SITE_PACKAGES && \ + CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \ + pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable; \ + cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/NVIDIA/apex && \ + cd apex && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ && \ + cd / && rm -fr /tmp/apex && pip cache purge; \ +fi ENV SETUPTOOLS_USE_DISTUTILS=stdlib ENV VLLM_USE_MODELSCOPE=True diff --git a/docker/build_image.py b/docker/build_image.py index 268ba658..c493039d 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -31,7 +31,7 @@ class Builder: if not args.cuda_version: args.cuda_version = '12.1.0' if not args.vllm_version: - args.vllm_version = '0.7.2' + args.vllm_version = '0.5.3' if not args.lmdeploy_version: args.lmdeploy_version = '0.6.2' if not args.autogptq_version: @@ -162,6 +162,7 @@ class CPUImageBuilder(Builder): content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'True') + content = content.replace('{install_megatron_deps}', 'False') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', @@ -225,6 +226,7 @@ RUN pip install tf-keras==2.16.0 --no-dependencies && \ content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'True') + content = content.replace('{install_megatron_deps}', 'False') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', @@ -267,15 +269,15 @@ class LLMImageBuilder(Builder): # A mirrored image of nvidia/cuda:12.4.0-devel-ubuntu22.04 args.base_image = 'nvidia/cuda:12.4.0-devel-ubuntu22.04' if not args.torch_version: - args.torch_version = '2.5.1' - args.torchaudio_version = '2.5.1' - args.torchvision_version = '0.20.1' + args.torch_version = '2.6.0' + args.torchaudio_version = '2.6.0' + args.torchvision_version = '0.21.0' if not args.cuda_version: args.cuda_version = '12.4.0' if not args.vllm_version: - args.vllm_version = '0.7.2' + args.vllm_version = '0.8.3' if not args.lmdeploy_version: - args.lmdeploy_version = '0.7.0.post2' + args.lmdeploy_version = '0.7.2.post1' if not args.autogptq_version: args.autogptq_version = '0.7.1' if not args.flashattn_version: @@ -300,6 +302,7 @@ class LLMImageBuilder(Builder): content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'False') + content = content.replace('{install_megatron_deps}', 'False') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', @@ -337,17 +340,6 @@ class LLMImageBuilder(Builder): class SwiftImageBuilder(LLMImageBuilder): - def init_args(self, args) -> Any: - if not args.torch_version: - args.torch_version = '2.5.1' - args.torchaudio_version = '2.5.1' - args.torchvision_version = '0.20.1' - if not args.cuda_version: - args.cuda_version = '12.4.0' - if not args.vllm_version: - args.vllm_version = '0.7.3' - return super().init_args(args) - def generate_dockerfile(self) -> str: meta_file = './docker/install.sh' with open('docker/Dockerfile.extra_install', 'r') as f: @@ -355,11 +347,8 @@ class SwiftImageBuilder(LLMImageBuilder): extra_content = extra_content.replace('{python_version}', self.args.python_version) extra_content += """ -RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \ - pip install --no-cache-dir -U icecream soundfile pybind11 && \ - SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") && \ - CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \ - pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable +RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps && \ + pip install --no-cache-dir -U icecream soundfile pybind11 """ version_args = ( f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} ' @@ -373,6 +362,7 @@ RUN pip install --no-cache-dir deepspeed==0.14.5 --no-deps \ content = content.replace('{version_args}', version_args) content = content.replace('{cur_time}', formatted_time) content = content.replace('{install_ms_deps}', 'False') + content = content.replace('{install_megatron_deps}', 'True') content = content.replace('{torch_version}', self.args.torch_version) content = content.replace('{torchvision_version}', diff --git a/docker/install.sh b/docker/install.sh index ee747d20..d8380091 100644 --- a/docker/install.sh +++ b/docker/install.sh @@ -16,7 +16,7 @@ pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version pip install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version -pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum +pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes deepspeed torchmetrics decord optimum openai-whisper # pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl # find on: https://github.com/Dao-AILab/flash-attention/releases diff --git a/requirements/audio/audio_kws.txt b/requirements/audio/audio_kws.txt index 622ce981..6d2c8791 100644 --- a/requirements/audio/audio_kws.txt +++ b/requirements/audio/audio_kws.txt @@ -1,5 +1,5 @@ kaldiio -kwsbp==0.0.6 +# kwsbp==0.0.6 # Incompatible with Python >= 3.10 matplotlib py_sound_connect>=0.1 scipy