upgrade cuda to 12.1.0

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/15183132
* upgrade cuda to 12.1.0

* remove tb-nightly, remove dup compile megatron util

* uninstall tb-nightly reinstall tensorboard
This commit is contained in:
mulin.lyh
2023-12-28 17:47:58 +08:00
committed by yuze.zyz
parent e7f86a751e
commit 385486daf2
4 changed files with 7 additions and 6 deletions

View File

@@ -4,7 +4,7 @@ BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu
BASE_GPU_CUDA113_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cudnn8-devel
BASE_GPU_CUDA117_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.7.1-cudnn8-devel
BASE_GPU_CUDA118_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.8.0-cudnn8-devel
BASE_GPU_CUDA121_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.8.0-cudnn8-devel
BASE_GPU_CUDA121_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda12.1.0-cudnn8-devel
BASE_GPU_CUDA122_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:22.04-cuda11.2.2-cudnn8-devel
MODELSCOPE_REPO_ADDRESS=reg.docker.alibaba-inc.com/modelscope/modelscope
python_version=3.7.13

View File

@@ -160,6 +160,7 @@ export TORCH_VERSION=$torch_version
export CUDATOOLKIT_VERSION=$cudatoolkit_version
export TENSORFLOW_VERSION=$tensorflow_version
echo -e "Building image with:\npython$python_version\npytorch$torch_version\ntensorflow:$tensorflow_version\ncudatoolkit:$cudatoolkit_version\ncpu:$is_cpu\nis_ci:$is_ci_test\nis_dsw:$is_dsw\n"
echo -e "Base iamge: $BASE_IMAGE"
docker_file_content=`cat docker/Dockerfile.ubuntu`
if [ "$is_ci_test" != "True" ]; then
echo "Building ModelScope lib, will install ModelScope lib to image"
@@ -174,7 +175,7 @@ else
echo "Building dsw image will need set ModelScope lib cache location."
docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope"
# pre compile extension
docker_file_content="${docker_file_content} \nRUN export TORCH_CUDA_ARCH_LIST='6.0;6.1;7.0;7.5;8.0;8.9;9.0;8.6+PTX' && python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'"
docker_file_content="${docker_file_content} \nRUN pip uninstall -y tb-nightly && pip install --no-cache-dir -U tensorboard && TORCH_CUDA_ARCH_LIST='6.0 6.1 7.0 7.5 8.0 8.9 9.0 8.6+PTX' python -c 'from modelscope.utils.pre_compile import pre_compile_all;pre_compile_all()'"
fi
if [ "$is_ci_test" == "True" ]; then
echo "Building CI image, uninstall modelscope"