Merge pull request #969 from tastelikefeet/fix/patch-hf

Fix patching hf
This commit is contained in:
Yingda Chen
2024-09-01 19:10:35 +08:00
committed by GitHub
3 changed files with 3 additions and 53 deletions

View File

@@ -159,7 +159,7 @@ docker_file_content=`cat docker/Dockerfile.ubuntu`
BUILD_HASH_ID=$(git rev-parse HEAD)
# install thrid part library
docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv && pip install --no-cache-dir -U 'ms-swift' 'funasr' autoawq 'timm>0.9.5' 'transformers'"
docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv && pip install --no-cache-dir -U 'ms-swift' 'decord' 'qwen_vl_utils' 'pyav' 'librosa' 'funasr' autoawq 'timm>0.9.5' 'transformers' 'accelerate' 'peft' 'optimum' 'trl'"
docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y && export COMMIT_ID=$BUILD_HASH_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $build_branch --single-branch $REPO_URL && cd modelscope && pip install . && cd / && rm -fr /tmp/modelscope && pip cache purge;"

View File

@@ -34,54 +34,6 @@ def user_agent(invoked_by=None):
return uagent
def patch_tokenizer_base():
""" Monkey patch PreTrainedTokenizerBase.from_pretrained to adapt to modelscope hub.
"""
ori_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args,
**kwargs):
ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors']
if not os.path.exists(pretrained_model_name_or_path):
revision = kwargs.pop('revision', None)
model_dir = snapshot_download(
pretrained_model_name_or_path,
revision=revision,
ignore_file_pattern=ignore_file_pattern)
else:
model_dir = pretrained_model_name_or_path
return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)
PreTrainedTokenizerBase.from_pretrained = from_pretrained
def patch_model_base():
""" Monkey patch PreTrainedModel.from_pretrained to adapt to modelscope hub.
"""
ori_from_pretrained = PreTrainedModel.from_pretrained.__func__
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args,
**kwargs):
ignore_file_pattern = [r'\w+\.safetensors']
if not os.path.exists(pretrained_model_name_or_path):
revision = kwargs.pop('revision', None)
model_dir = snapshot_download(
pretrained_model_name_or_path,
revision=revision,
ignore_file_pattern=ignore_file_pattern)
else:
model_dir = pretrained_model_name_or_path
return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)
PreTrainedModel.from_pretrained = from_pretrained
patch_tokenizer_base()
patch_model_base()
def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
"""Get a custom wrapper class for auto classes to download the models from the ModelScope hub
Args:

View File

@@ -2,8 +2,6 @@
import unittest
from transformers import LlamaForCausalLM, LlamaTokenizer
from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM,
AutoTokenizer, GenerationConfig)
@@ -47,10 +45,10 @@ class HFUtilTest(unittest.TestCase):
self.assertEqual(gen_config.assistant_token_id, 196)
def test_transformer_patch(self):
tokenizer = LlamaTokenizer.from_pretrained(
tokenizer = AutoTokenizer.from_pretrained(
'skyline2006/llama-7b', revision='v1.0.1')
self.assertIsNotNone(tokenizer)
model = LlamaForCausalLM.from_pretrained(
model = AutoModelForCausalLM.from_pretrained(
'skyline2006/llama-7b', revision='v1.0.1')
self.assertIsNotNone(model)