diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh index 73f972da..d281b786 100644 --- a/.dev_scripts/build_image.sh +++ b/.dev_scripts/build_image.sh @@ -159,7 +159,7 @@ docker_file_content=`cat docker/Dockerfile.ubuntu` BUILD_HASH_ID=$(git rev-parse HEAD) # install thrid part library -docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv && pip install --no-cache-dir -U 'ms-swift' 'funasr' autoawq 'timm>0.9.5' 'transformers'" +docker_file_content="${docker_file_content} \nRUN export COMMIT_ID=$BUILD_HASH_ID && pip install --no-cache-dir -U adaseq pai-easycv && pip install --no-cache-dir -U 'ms-swift' 'decord' 'qwen_vl_utils' 'pyav' 'librosa' 'funasr' autoawq 'timm>0.9.5' 'transformers' 'accelerate' 'peft' 'optimum' 'trl'" docker_file_content="${docker_file_content} \nRUN pip uninstall modelscope -y && export COMMIT_ID=$BUILD_HASH_ID && cd /tmp && GIT_LFS_SKIP_SMUDGE=1 git clone -b $build_branch --single-branch $REPO_URL && cd modelscope && pip install . && cd / && rm -fr /tmp/modelscope && pip cache purge;" diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py index f2cad81c..14a3713c 100644 --- a/modelscope/utils/hf_util.py +++ b/modelscope/utils/hf_util.py @@ -34,54 +34,6 @@ def user_agent(invoked_by=None): return uagent -def patch_tokenizer_base(): - """ Monkey patch PreTrainedTokenizerBase.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors'] - if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', None) - model_dir = snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern) - else: - model_dir = pretrained_model_name_or_path - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - PreTrainedTokenizerBase.from_pretrained = from_pretrained - - -def patch_model_base(): - """ Monkey patch PreTrainedModel.from_pretrained to adapt to modelscope hub. - """ - ori_from_pretrained = PreTrainedModel.from_pretrained.__func__ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *model_args, - **kwargs): - ignore_file_pattern = [r'\w+\.safetensors'] - if not os.path.exists(pretrained_model_name_or_path): - revision = kwargs.pop('revision', None) - model_dir = snapshot_download( - pretrained_model_name_or_path, - revision=revision, - ignore_file_pattern=ignore_file_pattern) - else: - model_dir = pretrained_model_name_or_path - return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) - - PreTrainedModel.from_pretrained = from_pretrained - - -patch_tokenizer_base() -patch_model_base() - - def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs): """Get a custom wrapper class for auto classes to download the models from the ModelScope hub Args: diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py index e16bc6fa..cf6b12ce 100644 --- a/tests/utils/test_hf_util.py +++ b/tests/utils/test_hf_util.py @@ -2,8 +2,6 @@ import unittest -from transformers import LlamaForCausalLM, LlamaTokenizer - from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, GenerationConfig) @@ -47,10 +45,10 @@ class HFUtilTest(unittest.TestCase): self.assertEqual(gen_config.assistant_token_id, 196) def test_transformer_patch(self): - tokenizer = LlamaTokenizer.from_pretrained( + tokenizer = AutoTokenizer.from_pretrained( 'skyline2006/llama-7b', revision='v1.0.1') self.assertIsNotNone(tokenizer) - model = LlamaForCausalLM.from_pretrained( + model = AutoModelForCausalLM.from_pretrained( 'skyline2006/llama-7b', revision='v1.0.1') self.assertIsNotNone(model)