modelscope/docker/Dockerfile.ascend

FROM {base_image}

ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_DEFAULT_TIMEOUT=300 \
    PIP_RETRIES=10 \
    SOC_VERSION={soc_version}

SHELL ["/bin/bash", "-c"]

# ---------- System dependencies ----------
RUN rm -f /etc/apt/apt.conf.d/docker-clean && \
    find /etc/apt/apt.conf.d -maxdepth 1 -type f | xargs -r grep -l "APT::Update::Post-Invoke\|docker-clean" | xargs -r rm -f && \
    apt-get update -y && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        gcc g++ cmake ninja-build libnuma-dev libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 \
        wget git curl jq vim build-essential ca-certificates && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
    pip config set install.trusted-host mirrors.aliyun.com && \
    ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        pip config set global.extra-index-url "https://download.pytorch.org/whl/cpu/"; \
    fi

{extra_content}
# ---------- Install vllm + vllm-ascend ----------
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
    if [ -f /usr/local/Ascend/nnal/atb/set_env.sh ]; then source /usr/local/Ascend/nnal/atb/set_env.sh; fi && \
    git clone --depth 1 --branch v0.18.0 https://github.com/vllm-project/vllm && \
    git clone --depth 1 --branch v0.18.0 https://github.com/vllm-project/vllm-ascend.git

RUN ARCH=$(uname -m) && \
    source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
    source /usr/local/Ascend/nnal/atb/set_env.sh && \
    # Install torch & torch_npu & torchvision
    pip install torch==2.9.0 torch_npu==2.9.0 torchvision==0.24.0 && \
    # Install vllm
    cd vllm && VLLM_TARGET_DEVICE=empty pip install -v -e . && cd .. && \
    # Install vllm-ascend
    cd vllm-ascend && pip install -v -e . && cd ..

# ---------- Clone training-side repositories ----------
RUN git clone --depth 1 --branch v0.15.3 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM && \
    git clone --depth 1 --branch core_r0.15.3 https://gitcode.com/Ascend/MindSpeed.git /MindSpeed && \
    GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 -b {swift_branch} --single-branch https://github.com/modelscope/ms-swift.git /ms-swift && \
    git clone --depth 1 https://github.com/modelscope/mcore-bridge.git /mcore-bridge

# ---------- Install training-side repositories ----------
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
    if [ -f /usr/local/Ascend/nnal/atb/set_env.sh ]; then source /usr/local/Ascend/nnal/atb/set_env.sh; fi && \
    cd /MindSpeed && pip install --no-cache-dir -e . && \
    cd /mcore-bridge && pip install --no-cache-dir -e . && \
    cd /ms-swift && pip install --no-cache-dir -e .

# ---------- Pin torch to the correct version + torch_npu ----------
# x86: must force-install the CPU build from pytorch.org/whl/cpu
# aarch64: PyPI only provides the CPU build, so install it directly from the Aliyun mirror
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
    if [ -f /usr/local/Ascend/nnal/atb/set_env.sh ]; then source /usr/local/Ascend/nnal/atb/set_env.sh; fi && \
    ARCH=$(uname -m) && \
    if [ "$ARCH" = "x86_64" ]; then \
        pip install --no-cache-dir --force-reinstall --no-deps \
            --index-url https://download.pytorch.org/whl/cpu \
            torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0; \
    else \
        pip install --no-cache-dir --force-reinstall --no-deps \
            torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0; \
    fi && \
    pip install --no-cache-dir --force-reinstall --no-deps \
        torch_npu==2.9.0 && \
    rm -rf /root/.cache/pip

# ---------- Remove CUDA-only dependencies pulled in by vllm (they cause missing libtorch_cuda.so errors on NPU) ----------
RUN pip uninstall -y flashinfer tvm-ffi torch-c-dlpack-ext 2>/dev/null || true
ARG INSTALL_MS_DEPS={install_ms_deps}

ENV MEGATRON_LM_PATH=/Megatron-LM
ENV PYTHONPATH=/Megatron-LM:${PYTHONPATH}
# install dependencies
COPY requirements /var/modelscope

RUN pip uninstall ms-swift modelscope -y && pip install --no-cache-dir pip==23.* -U && \
if [ "$INSTALL_MS_DEPS" = "True" ]; then \
    pip install --no-cache-dir omegaconf==2.0.6 && \
    pip install 'editdistance==0.8.1' && \
    pip install --no-cache-dir 'cython<=0.29.36' versioneer 'numpy<2.0' && \
    pip install --no-cache-dir -r /var/modelscope/framework.txt && \
    pip install --no-cache-dir -r /var/modelscope/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
    pip install --no-cache-dir -r /var/modelscope/tests.txt && \
    pip install --no-cache-dir -r /var/modelscope/server.txt && \
    pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/packages/imageio_ffmpeg-0.4.9-py3-none-any.whl --no-dependencies --force && \
    pip install --no-cache-dir 'scipy<1.13.0' && \
    pip install --no-cache-dir funtextprocessing typeguard==2.13.3 scikit-learn && \
    pip install --no-cache-dir 'decord>=0.6.0' mpi4py paint_ldm ipykernel fasttext && \
    pip install --no-cache-dir 'blobfile>=1.0.5' && \
    pip uninstall MinDAEC -y && \
    pip install https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/MinDAEC-0.0.2-py3-none-any.whl && \
    pip cache purge; \
else \
    pip install --no-cache-dir -r /var/modelscope/framework.txt && \
    pip cache purge; \
fi

ARG CUR_TIME={cur_time}
RUN echo $CUR_TIME

RUN pip install --no-cache-dir --no-build-isolation OpenCC

RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
    if [ -f /usr/local/Ascend/nnal/atb/set_env.sh ]; then source /usr/local/Ascend/nnal/atb/set_env.sh; fi && \
    pip install --no-cache-dir -U funasr scikit-learn && \
    pip install --no-cache-dir -U qwen_vl_utils qwen_omni_utils librosa 'timm>=0.9.0' transformers accelerate peft trl safetensors && \
    cd /ms-swift && pip install --no-cache-dir -e '.[llm]' && \
    pip install --no-cache-dir -e '.[eval]' && pip install evalscope -U --no-dependencies && pip install ms-agent -U --no-dependencies && \
    cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b {modelscope_branch} --single-branch https://github.com/modelscope/modelscope.git && \
    cd modelscope && pip install . -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
    cd / && rm -fr /tmp/modelscope && \
    pip install --no-cache-dir diffusers && \
    pip install --no-cache-dir omegaconf==2.3.0 && \
    pip cache purge

RUN echo 'source /usr/local/Ascend/ascend-toolkit/set_env.sh' >> /root/.bashrc && \
    echo '[ -f /usr/local/Ascend/nnal/atb/set_env.sh ] && source /usr/local/Ascend/nnal/atb/set_env.sh' >> /root/.bashrc && \
    echo 'set +H' >> /root/.bashrc

ENV SETUPTOOLS_USE_DISTUTILS=stdlib
ENV VLLM_USE_MODELSCOPE=True
ENV LMDEPLOY_USE_MODELSCOPE=True
ENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope/hub

# Show install results
RUN pip list
WORKDIR /workspace

CMD ["/bin/bash"]