diff --git a/modelscope/utils/hf_util/patcher.py b/modelscope/utils/hf_util/patcher.py index c0ebdb22..5104ca4b 100644 --- a/modelscope/utils/hf_util/patcher.py +++ b/modelscope/utils/hf_util/patcher.py @@ -16,16 +16,16 @@ from modelscope.utils.repo_utils import (CommitInfo, CommitOperation, CommitOperationAdd) ignore_file_pattern = [ - r'\w+\.bin', - r'\w+\.safetensors', - r'\w+\.pth', - r'\w+\.pt', - r'\w+\.h5', - r'\w+\.ckpt', - r'\w+\.zip', - r'\w+\.onnx', - r'\w+\.tar', - r'\w+\.gz', + r'*.bin', + r'*.safetensors', + r'*.pth', + r'*.pt', + r'*.h5', + r'*.ckpt', + r'*.zip', + r'*.onnx', + r'*.tar', + r'*.gz', ] diff --git a/tests/hub/test_download_tokenizer.py b/tests/hub/test_download_tokenizer.py new file mode 100644 index 00000000..ada748db --- /dev/null +++ b/tests/hub/test_download_tokenizer.py @@ -0,0 +1,44 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os.path +import shutil +import tempfile +import unittest + +from modelscope import snapshot_download + + +class TestDownloadTokenizer(unittest.TestCase): + + def setUp(self): + temporary_dir = tempfile.mkdtemp() + self.work_dir = temporary_dir + + def tearDown(self): + shutil.rmtree(self.work_dir, ignore_errors=True) + + def test_download_tokenizer(self): + ignore_file_pattern = [ + r'*.bin', + r'*.safetensors', + r'*.pth', + r'*.pt', + r'*.h5', + r'*.ckpt', + r'*.zip', + r'*.onnx', + r'*.tar', + r'*.gz', + ] + model_dir = snapshot_download( + 'Qwen/Qwen3-0.6B', + cache_dir=self.work_dir, + ignore_file_pattern=ignore_file_pattern) + self.assertTrue(model_dir is not None) + self.assertTrue( + os.path.exists(os.path.join(model_dir, 'tokenizer.json'))) + self.assertFalse( + os.path.exists(os.path.join(model_dir, 'model.safetensors'))) + + +if __name__ == '__main__': + unittest.main()