Files
modelscope/tests/utils/test_hf_util.py

302 lines
12 KiB
Python
Raw Normal View History

# Copyright (c) Alibaba, Inc. and its affiliates.
2025-02-06 11:09:37 +08:00
import os
import shutil
import tempfile
import unittest
2025-02-06 11:09:37 +08:00
import uuid
import torch
from huggingface_hub import CommitInfo, RepoUrl
from modelscope import HubApi
from modelscope.utils.hf_util.patcher import patch_context
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import TEST_MODEL_ORG, test_level
2025-02-06 11:09:37 +08:00
logger = get_logger()
class HFUtilTest(unittest.TestCase):
def setUp(self):
2025-02-06 11:09:37 +08:00
logger.info('SetUp')
self.api = HubApi()
self.user = TEST_MODEL_ORG
print(self.user)
self.create_model_name = '%s/%s_%s' % (self.user, 'test_model_upload',
uuid.uuid4().hex)
logger.info('create %s' % self.create_model_name)
temporary_dir = tempfile.mkdtemp()
self.work_dir = temporary_dir
self.model_dir = os.path.join(temporary_dir, self.create_model_name)
self.repo_path = os.path.join(self.work_dir, 'repo_path')
self.test_folder = os.path.join(temporary_dir, 'test_folder')
self.test_file1 = os.path.join(
os.path.join(temporary_dir, 'test_folder', '1.json'))
self.test_file2 = os.path.join(os.path.join(temporary_dir, '2.json'))
os.makedirs(self.test_folder, exist_ok=True)
with open(self.test_file1, 'w') as f:
f.write('{}')
with open(self.test_file2, 'w') as f:
f.write('{}')
self.pipeline_qa_context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example
of a question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would
like to fine-tune a model on a SQuAD task, you may leverage the
examples/pytorch/question-answering/run_squad.py script.
"""
self.pipeline_qa_question = 'What is a good example of a question answering dataset?'
def tearDown(self):
2025-02-06 11:09:37 +08:00
logger.info('TearDown')
shutil.rmtree(self.model_dir, ignore_errors=True)
try:
self.api.delete_model(model_id=self.create_model_name)
except Exception:
pass
def test_auto_tokenizer(self):
2025-02-06 11:09:37 +08:00
from modelscope import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
'baichuan-inc/Baichuan2-7B-Chat',
trust_remote_code=True,
revision='v1.0.3')
self.assertEqual(tokenizer.vocab_size, 125696)
self.assertEqual(tokenizer.model_max_length, 4096)
self.assertFalse(tokenizer.is_fast)
def test_quantization_import(self):
2025-02-06 11:09:37 +08:00
from modelscope import BitsAndBytesConfig
self.assertTrue(BitsAndBytesConfig is not None)
def test_auto_model(self):
2025-02-06 11:09:37 +08:00
from modelscope import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
'baichuan-inc/baichuan-7B', trust_remote_code=True)
self.assertTrue(model is not None)
def test_auto_config(self):
2025-02-06 11:09:37 +08:00
from modelscope import AutoConfig, GenerationConfig
config = AutoConfig.from_pretrained(
'baichuan-inc/Baichuan-13B-Chat',
trust_remote_code=True,
revision='v1.0.3')
self.assertEqual(config.model_type, 'baichuan')
gen_config = GenerationConfig.from_pretrained(
'baichuan-inc/Baichuan-13B-Chat',
trust_remote_code=True,
revision='v1.0.3')
self.assertEqual(gen_config.assistant_token_id, 196)
def test_transformer_patch(self):
2025-02-06 11:09:37 +08:00
with patch_context():
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
self.assertIsNotNone(tokenizer)
model = AutoModelForCausalLM.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
self.assertIsNotNone(model)
def test_patch_model(self):
from modelscope.utils.hf_util.patcher import patch_context
with patch_context():
from transformers import AutoModel
model = AutoModel.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
self.assertTrue(model is not None)
try:
model = AutoModel.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
except Exception:
pass
else:
self.assertTrue(False)
def test_patch_config_bert(self):
from transformers import BertConfig
try:
BertConfig.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
except Exception:
pass
else:
self.assertTrue(False)
def test_patch_config(self):
with patch_context():
from transformers import AutoConfig
config = AutoConfig.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
self.assertTrue(config is not None)
try:
config = AutoConfig.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
except Exception:
pass
else:
self.assertTrue(False)
# Test patch again
with patch_context():
from transformers import AutoConfig
config = AutoConfig.from_pretrained(
'iic/nlp_structbert_sentiment-classification_chinese-tiny')
self.assertTrue(config is not None)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_patch_diffusers(self):
with patch_context():
from diffusers import StableDiffusionPipeline
pipe = StableDiffusionPipeline.from_pretrained(
'AI-ModelScope/stable-diffusion-v1-5')
self.assertTrue(pipe is not None)
try:
pipe = StableDiffusionPipeline.from_pretrained(
'AI-ModelScope/stable-diffusion-v1-5')
except Exception:
pass
else:
self.assertTrue(False)
from modelscope import StableDiffusionPipeline
pipe = StableDiffusionPipeline.from_pretrained(
'AI-ModelScope/stable-diffusion-v1-5')
self.assertTrue(pipe is not None)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_patch_peft(self):
with patch_context():
from transformers import AutoModelForCausalLM
from peft import PeftModel
model = AutoModelForCausalLM.from_pretrained(
'Qwen/Qwen1.5-0.5B-Chat',
trust_remote_code=True,
torch_dtype=torch.float32)
model = PeftModel.from_pretrained(
model,
'tastelikefeet/test_lora',
trust_remote_code=True,
torch_dtype=torch.float32)
self.assertTrue(model is not None)
self.assertFalse(hasattr(PeftModel, '_from_pretrained_origin'))
def test_patch_file_exists(self):
with patch_context():
from huggingface_hub import file_exists
self.assertTrue(
file_exists('AI-ModelScope/stable-diffusion-v1-5',
'feature_extractor/preprocessor_config.json'))
try:
# Import again
from huggingface_hub import file_exists # noqa
exists = file_exists('AI-ModelScope/stable-diffusion-v1-5',
'feature_extractor/preprocessor_config.json')
except Exception:
pass
else:
self.assertFalse(exists)
def test_patch_file_download(self):
with patch_context():
from huggingface_hub import hf_hub_download
local_dir = hf_hub_download(
'AI-ModelScope/stable-diffusion-v1-5',
'feature_extractor/preprocessor_config.json')
logger.info('patch file_download dir: ' + local_dir)
self.assertTrue(local_dir is not None)
def test_patch_create_repo(self):
with patch_context():
from huggingface_hub import create_repo
repo_url: RepoUrl = create_repo(self.create_model_name)
logger.info('patch create repo result: ' + repo_url.repo_id)
self.assertTrue(repo_url is not None)
from huggingface_hub import upload_folder
commit_info: CommitInfo = upload_folder(
repo_id=self.create_model_name,
folder_path=self.test_folder,
path_in_repo='')
logger.info('patch create repo result: ' + commit_info.commit_url)
self.assertTrue(commit_info is not None)
from huggingface_hub import file_exists
self.assertTrue(file_exists(self.create_model_name, '1.json'))
from huggingface_hub import upload_file
commit_info: CommitInfo = upload_file(
path_or_fileobj=self.test_file2,
path_in_repo='test_folder2',
repo_id=self.create_model_name)
self.assertTrue(
file_exists(self.create_model_name, 'test_folder2/2.json'))
def test_who_am_i(self):
with patch_context():
from huggingface_hub import whoami
self.assertTrue(whoami()['name'] == self.user)
2025-02-06 18:22:29 +08:00
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
2025-02-06 11:09:37 +08:00
def test_push_to_hub(self):
with patch_context():
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
'Qwen/Qwen1.5-0.5B-Chat', trust_remote_code=True)
model.push_to_hub(self.create_model_name)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_pipeline_model_id(self):
from modelscope import pipeline
model_id = 'damotestx/distilbert-base-cased-distilled-squad'
qa = pipeline('question-answering', model=model_id)
assert qa(
question=self.pipeline_qa_question,
context=self.pipeline_qa_context)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_pipeline_auto_model(self):
from modelscope import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
model_id = 'damotestx/distilbert-base-cased-distilled-squad'
model = AutoModelForQuestionAnswering.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
qa = pipeline('question-answering', model=model, tokenizer=tokenizer)
assert qa(
question=self.pipeline_qa_question,
context=self.pipeline_qa_context)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_pipeline_save_pretrained(self):
from modelscope import pipeline
model_id = 'damotestx/distilbert-base-cased-distilled-squad'
pipe_ori = pipeline('question-answering', model=model_id)
result_ori = pipe_ori(
question=self.pipeline_qa_question,
context=self.pipeline_qa_context)
# save_pretrained
repo_id = self.create_model_name
save_dir = './tmp_test_hf_pipeline'
try:
os.system(f'rm -rf {save_dir}')
self.api.delete_model(repo_id)
# wait for delete repo
import time
time.sleep(5)
except Exception:
# if repo not exists
pass
pipe_ori.save_pretrained(save_dir, push_to_hub=True, repo_id=repo_id)
# load from saved
pipe_new = pipeline('question-answering', model=repo_id)
result_new = pipe_new(
question=self.pipeline_qa_question,
context=self.pipeline_qa_context)
assert result_new == result_ori
if __name__ == '__main__':
unittest.main()