flash-attn version

This commit is contained in:
suluyana
2025-01-06 14:36:16 +08:00
parent 776398bcab
commit 24876ae74c
2 changed files with 12 additions and 4 deletions

View File

@@ -36,6 +36,8 @@ class Builder:
args.lmdeploy_version = '0.6.2'
if not args.autogptq_version:
args.autogptq_version = '0.7.1'
if not args.flashattn_version:
args.flashattn_version = '2.7.1.post4'
return args
def _generate_cudatoolkit_version(self, cuda_version: str) -> str:
@@ -209,7 +211,8 @@ RUN pip install tf-keras==2.16.0 --no-dependencies && \
version_args = (
f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version}'
f'{self.args.vllm_version} {self.args.lmdeploy_version} {self.args.autogptq_version} '
f'{self.args.flashattn_version}'
)
base_image = (
f'{docker_registry}:ubuntu{self.args.ubuntu_version}-cuda{self.args.cuda_version}-{self.args.python_tag}-'
@@ -274,6 +277,8 @@ class LLMImageBuilder(Builder):
args.lmdeploy_version = '0.6.2'
if not args.autogptq_version:
args.autogptq_version = '0.7.1'
if not args.flashattn_version:
args.flashattn_version = '2.7.1.post4'
return args
def generate_dockerfile(self) -> str:
@@ -284,7 +289,8 @@ class LLMImageBuilder(Builder):
self.args.python_version)
version_args = (
f'{self.args.torch_version} {self.args.torchvision_version} {self.args.torchaudio_version} '
f'{self.args.lmdeploy_version} {self.args.vllm_version} {self.args.autogptq_version}'
f'{self.args.lmdeploy_version} {self.args.vllm_version} {self.args.autogptq_version} '
f'{self.args.flashattn_version}'
)
with open('docker/Dockerfile.ubuntu', 'r') as f:
content = f.read()
@@ -341,12 +347,12 @@ parser.add_argument('--torchaudio_version', type=str, default=None)
parser.add_argument('--tf_version', type=str, default=None)
parser.add_argument('--vllm_version', type=str, default=None)
parser.add_argument('--lmdeploy_version', type=str, default=None)
parser.add_argument('--flashattn_version', type=str, default=None)
parser.add_argument('--autogptq_version', type=str, default=None)
parser.add_argument('--modelscope_branch', type=str, default='master')
parser.add_argument('--modelscope_version', type=str, default='9.99.0')
parser.add_argument('--swift_branch', type=str, default='main')
parser.add_argument('--dry_run', type=int, default=0)
args = parser.parse_args()
if args.image_type.lower() == 'base_cpu':

View File

@@ -6,6 +6,7 @@ torchaudio_version=${3:-2.4.0}
vllm_version=${4:-0.6.0}
lmdeploy_version=${5:-0.6.1}
autogptq_version=${6:-0.7.1}
flashattn_version=${7:-2.7.1.post4}
pip install --no-cache-dir -U autoawq lmdeploy==$lmdeploy_version
@@ -17,7 +18,8 @@ pip install --no-cache-dir tiktoken transformers_stream_generator bitsandbytes d
# pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
# find on: https://github.com/Dao-AILab/flash-attention/releases
cd /tmp && git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention && python setup.py install && cd / && rm -fr /tmp/flash-attention && pip cache purge;
# cd /tmp && git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention && python setup.py install && cd / && rm -fr /tmp/flash-attention && pip cache purge;
pip install --no-cache-dir flash_attn==$flashattn_version
pip install --no-cache-dir triton auto-gptq==$autogptq_version vllm==$vllm_version -U && pip cache purge