From 24876ae74c902cec3bb5245d76de8d9f5d02e424 Mon Sep 17 00:00:00 2001 From: suluyana Date: Mon, 6 Jan 2025 14:36:16 +0800 Subject: [PATCH] flash-attn version --- docker/build_image.py | 12 +++++++++--- docker/install.sh | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docker/build_image.py b/docker/build_image.py index 8ba4fd68..72067a57 100644 --- a/docker/build_image.py +++ b/docker/build_image.py @@ -36,6 +36,8 @@ class Builder: args.lmdeploy_version = '0.6.2' if not args.autogptq_version: args.autogptq_version = '0.7.1' + if not args.flashattn_version: + args.flashattn_version = '2.7.1.post4' return args def _generate_cudatoolkit_version(self, cuda_version: str) -> str: @@ -209,7 +211,8 @@ RUN pip install tf-keras==2.16.0 --no-dependencies && \ version_args = ( f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} ' - f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version}' + f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} ' + f'{self.args.flashattn_version}' ) base_image = ( f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-{self.args.python_tag}-' @@ -274,6 +277,8 @@ class LLMImageBuilder(Builder): args.lmdeploy_version = '0.6.2' if not args.autogptq_version: args.autogptq_version = '0.7.1' + if not args.flashattn_version: + args.flashattn_version = '2.7.1.post4' return args def generate_dockerfile(self) -> str: @@ -284,7 +289,8 @@ class LLMImageBuilder(Builder): self.args.python_version) version_args = ( f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} ' - f'{self.args.lmdeploy_version} {self.args.vllm_version} {self.args.autogptq_version}' + f'{self.args.lmdeploy_version} {self.args.vllm_version} {self.args.autogptq_version} ' + f'{self.args.flashattn_version}' ) with open('docker/Dockerfile.ubuntu', 'r') as f: content = f.read() @@ -341,12 +347,12 @@ parser.add_argument('--torchaudio_version', type=str, default=None) parser.add_argument('--tf_version', type=str, default=None) parser.add_argument('--vllm_version', type=str, default=None) parser.add_argument('--lmdeploy_version', type=str, default=None) +parser.add_argument('--flashattn_version', type=str, default=None) parser.add_argument('--autogptq_version', type=str, default=None) parser.add_argument('--modelscope_branch', type=str, default='master') parser.add_argument('--modelscope_version', type=str, default='9.99.0') parser.add_argument('--swift_branch', type=str, default='main') parser.add_argument('--dry_run', type=int, default=0) - args = parser.parse_args() if args.image_type.lower() == 'base_cpu': diff --git a/docker/install.sh b/docker/install.sh index 3a6ffc13..d7d367dc 100644 --- a/docker/install.sh +++ b/docker/install.sh @@ -6,6 +6,7 @@ torchaudio_version=${3:-2.4.0} vllm_version=${4:-0.6.0} lmdeploy_version=${5:-0.6.1} autogptq_version=${6:-0.7.1} +flashattn_version=${7:-2.7.1.post4} pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version @@ -17,7 +18,8 @@ pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes d # pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl # find on: https://github.com/Dao-AILab/flash-attention/releases -cd /tmp && git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention && python setup.py install && cd / && rm -fr /tmp/flash-attention && pip cache purge; +# cd /tmp && git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention && python setup.py install && cd / && rm -fr /tmp/flash-attention && pip cache purge; +pip install --no-cache-dir flash_attn==$flashattn_version pip install --no-cache-dir triton auto-gptq==$autogptq_version vllm==$vllm_version -U && pip cache purge