2022-07-27 17:29:16 +08:00
|
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import shutil
|
|
|
|
|
import tempfile
|
|
|
|
|
import time
|
|
|
|
|
import unittest
|
2022-08-24 13:35:42 +08:00
|
|
|
from pathlib import Path
|
2022-07-27 17:29:16 +08:00
|
|
|
|
2022-12-02 10:06:24 +08:00
|
|
|
from modelscope.utils.ast_utils import (FILES_MTIME_KEY, INDEX_KEY, MD5_KEY,
|
|
|
|
|
MODELSCOPE_PATH_KEY, REQUIREMENT_KEY,
|
2023-02-04 10:26:00 +00:00
|
|
|
VERSION_KEY, AstScanning,
|
|
|
|
|
FilesAstScanning,
|
|
|
|
|
generate_ast_template,
|
2023-01-11 10:43:56 +08:00
|
|
|
load_from_prebuilt, load_index)
|
2022-07-27 17:29:16 +08:00
|
|
|
|
2022-08-24 13:35:42 +08:00
|
|
|
p = Path(__file__)
|
|
|
|
|
|
|
|
|
|
MODELSCOPE_PATH = p.resolve().parents[2].joinpath('modelscope')
|
2022-07-27 17:29:16 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class AstScaningTest(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
|
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
|
|
|
|
self.tmp_dir = tempfile.TemporaryDirectory().name
|
|
|
|
|
self.test_file = os.path.join(self.tmp_dir, 'test.py')
|
|
|
|
|
if not os.path.exists(self.tmp_dir):
|
|
|
|
|
os.makedirs(self.tmp_dir)
|
|
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
|
super().tearDown()
|
|
|
|
|
shutil.rmtree(self.tmp_dir)
|
|
|
|
|
|
|
|
|
|
def test_ast_scaning_class(self):
|
2023-02-04 10:26:00 +00:00
|
|
|
astScaner = AstScanning()
|
2022-07-27 17:29:16 +08:00
|
|
|
pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
|
2022-09-27 23:08:33 +08:00
|
|
|
'text_generation_pipeline.py')
|
2022-07-27 17:29:16 +08:00
|
|
|
output = astScaner.generate_ast(pipeline_file)
|
|
|
|
|
self.assertTrue(output['imports'] is not None)
|
|
|
|
|
self.assertTrue(output['from_imports'] is not None)
|
|
|
|
|
self.assertTrue(output['decorators'] is not None)
|
|
|
|
|
imports, from_imports, decorators = output['imports'], output[
|
|
|
|
|
'from_imports'], output['decorators']
|
|
|
|
|
self.assertIsInstance(imports, dict)
|
|
|
|
|
self.assertIsInstance(from_imports, dict)
|
|
|
|
|
self.assertIsInstance(decorators, list)
|
[to #42322933] Refactor NLP and fix some user feedbacks
1. Abstract keys of dicts needed by nlp metric classes into the init method
2. Add Preprocessor.save_pretrained to save preprocessor information
3. Abstract the config saving function, which can lead to normally saving in the direct call of from_pretrained, and the modification of cfg one by one when training.
4. Remove SbertTokenizer and VecoTokenizer, use transformers' tokenizers instead
5. Use model/preprocessor's from_pretrained in all nlp pipeline classes.
6. Add model_kwargs and preprocessor_kwargs in all nlp pipeline classes
7. Add base classes for fill-mask and text-classification preprocessor, as a demo for later changes
8. Fix user feedback: Re-train the model in continue training scenario
9. Fix user feedback: Too many checkpoint saved
10. Simplify the nlp-trainer
11. Fix user feedback: Split the default trainer's __init__ method, which makes user easier to override
12. Add safe_get to Config class
---------------------------- Another refactor from version 36 -------------------------
13. Name all nlp transformers' preprocessors from TaskNamePreprocessor to TaskNameTransformersPreprocessor, for example:
TextClassificationPreprocessor -> TextClassificationTransformersPreprocessor
14. Add a base class per task for all nlp tasks' preprocessors which has at least two sub-preprocessors
15. Add output classes of nlp models
16. Refactor the logic for token-classification
17. Fix bug: checkpoint_hook does not support pytorch_model.pt
18. Fix bug: Pipeline name does not match with task name, so inference will not succeed after training
NOTE: This is just a stop bleeding solution, the root cause is the uncertainty of the relationship between models and pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10723513
* add save_pretrained to preprocessor
* save preprocessor config in hook
* refactor label-id mapping fetching logic
* test ok on sentence-similarity
* run on finetuning
* fix bug
* pre-commit passed
* fix bug
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/preprocessors/nlp/nlp_base.py
* add params to init
* 1. support max ckpt num 2. support ignoring others but bin file in continue training 3. add arguments to some nlp metrics
* Split trainer init impls to overridable methods
* remove some obsolete tokenizers
* unfinished
* support input params in pipeline
* fix bugs
* fix ut bug
* fix bug
* fix ut bug
* fix ut bug
* fix ut bug
* add base class for some preprocessors
* Merge commit '379867739548f394d0fa349ba07afe04adf4c8b6' into feat/refactor_config
* compatible with old code
* fix ut bug
* fix ut bugs
* fix bug
* add some comments
* fix ut bug
* add a requirement
* fix pre-commit
* Merge commit '0451b3d3cb2bebfef92ec2c227b2a3dd8d01dc6a' into feat/refactor_config
* fixbug
* Support function type in registry
* fix ut bug
* fix bug
* Merge commit '5f719e542b963f0d35457e5359df879a5eb80b82' into feat/refactor_config
# Conflicts:
# modelscope/pipelines/nlp/multilingual_word_segmentation_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/utils/hub.py
* remove obsolete file
* rename init args
* rename params
* fix merge bug
* add default preprocessor config for ner-model
* move a method a util file
* remove unused config
* Fix a bug in pbar
* bestckptsaver:change default ckpt numbers to 1
* 1. Add assert to max_epoch 2. split init_dist and get_device 3. change cmp func name
* Fix bug
* fix bug
* fix bug
* unfinished refactoring
* unfinished
* uw
* uw
* uw
* uw
* Merge branch 'feat/refactor_config' into feat/refactor_trainer
# Conflicts:
# modelscope/preprocessors/nlp/document_segmentation_preprocessor.py
# modelscope/preprocessors/nlp/faq_question_answering_preprocessor.py
# modelscope/preprocessors/nlp/relation_extraction_preprocessor.py
# modelscope/preprocessors/nlp/text_generation_preprocessor.py
* uw
* uw
* unify nlp task outputs
* uw
* uw
* uw
* uw
* change the order of text cls pipeline
* refactor t5
* refactor tg task preprocessor
* fix
* unfinished
* temp
* refactor code
* unfinished
* unfinished
* unfinished
* unfinished
* uw
* Merge branch 'feat/refactor_config' into feat/refactor_trainer
* smoke test pass
* ut testing
* pre-commit passed
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/models/nlp/bert/document_segmentation.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/document_segmentation_pipeline.py
* merge master
* unifnished
* Merge branch 'feat/fix_bug_pipeline_name' into feat/refactor_config
* fix bug
* fix ut bug
* support ner batch inference
* fix ut bug
* fix bug
* support batch inference on three nlp tasks
* unfinished
* fix bug
* fix bug
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/models/base/base_model.py
# modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
# modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
# modelscope/pipelines/nlp/dialog_modeling_pipeline.py
# modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
# modelscope/pipelines/nlp/document_segmentation_pipeline.py
# modelscope/pipelines/nlp/faq_question_answering_pipeline.py
# modelscope/pipelines/nlp/feature_extraction_pipeline.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/information_extraction_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/sentence_embedding_pipeline.py
# modelscope/pipelines/nlp/summarization_pipeline.py
# modelscope/pipelines/nlp/table_question_answering_pipeline.py
# modelscope/pipelines/nlp/text2text_generation_pipeline.py
# modelscope/pipelines/nlp/text_classification_pipeline.py
# modelscope/pipelines/nlp/text_error_correction_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/text_ranking_pipeline.py
# modelscope/pipelines/nlp/token_classification_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/trainers/nlp_trainer.py
* pre-commit passed
* fix bug
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/preprocessors/__init__.py
* fix bug
* fix bug
* fix bug
* fix bug
* fix bug
* fixbug
* pre-commit passed
* fix bug
* fixbug
* fix bug
* fix bug
* fix bug
* fix bug
* self review done
* fixbug
* fix bug
* fix bug
* fix bugs
* remove sub-token offset mapping
* fix name bug
* add some tests
* 1. support batch inference of text-generation,text2text-generation,token-classification,text-classification 2. add corresponding UTs
* add old logic back
* tmp save
* add tokenize by words logic back
* move outputs file back
* revert veco token-classification back
* fix typo
* Fix description
* Merge commit '4dd99b8f6e4e7aefe047c68a1bedd95d3ec596d6' into feat/refactor_config
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/pipelines/builder.py
2022-11-30 23:52:17 +08:00
|
|
|
self.assertListEqual(
|
|
|
|
|
list(set(imports.keys()) - set(['torch', 'os'])), [])
|
2022-11-08 17:58:03 +08:00
|
|
|
self.assertEqual(len(from_imports.keys()), 10)
|
2022-07-27 17:29:16 +08:00
|
|
|
self.assertTrue(from_imports['modelscope.metainfo'] is not None)
|
|
|
|
|
self.assertEqual(from_imports['modelscope.metainfo'], ['Pipelines'])
|
[to #42322933] Refactor NLP and fix some user feedbacks
1. Abstract keys of dicts needed by nlp metric classes into the init method
2. Add Preprocessor.save_pretrained to save preprocessor information
3. Abstract the config saving function, which can lead to normally saving in the direct call of from_pretrained, and the modification of cfg one by one when training.
4. Remove SbertTokenizer and VecoTokenizer, use transformers' tokenizers instead
5. Use model/preprocessor's from_pretrained in all nlp pipeline classes.
6. Add model_kwargs and preprocessor_kwargs in all nlp pipeline classes
7. Add base classes for fill-mask and text-classification preprocessor, as a demo for later changes
8. Fix user feedback: Re-train the model in continue training scenario
9. Fix user feedback: Too many checkpoint saved
10. Simplify the nlp-trainer
11. Fix user feedback: Split the default trainer's __init__ method, which makes user easier to override
12. Add safe_get to Config class
---------------------------- Another refactor from version 36 -------------------------
13. Name all nlp transformers' preprocessors from TaskNamePreprocessor to TaskNameTransformersPreprocessor, for example:
TextClassificationPreprocessor -> TextClassificationTransformersPreprocessor
14. Add a base class per task for all nlp tasks' preprocessors which has at least two sub-preprocessors
15. Add output classes of nlp models
16. Refactor the logic for token-classification
17. Fix bug: checkpoint_hook does not support pytorch_model.pt
18. Fix bug: Pipeline name does not match with task name, so inference will not succeed after training
NOTE: This is just a stop bleeding solution, the root cause is the uncertainty of the relationship between models and pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10723513
* add save_pretrained to preprocessor
* save preprocessor config in hook
* refactor label-id mapping fetching logic
* test ok on sentence-similarity
* run on finetuning
* fix bug
* pre-commit passed
* fix bug
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/preprocessors/nlp/nlp_base.py
* add params to init
* 1. support max ckpt num 2. support ignoring others but bin file in continue training 3. add arguments to some nlp metrics
* Split trainer init impls to overridable methods
* remove some obsolete tokenizers
* unfinished
* support input params in pipeline
* fix bugs
* fix ut bug
* fix bug
* fix ut bug
* fix ut bug
* fix ut bug
* add base class for some preprocessors
* Merge commit '379867739548f394d0fa349ba07afe04adf4c8b6' into feat/refactor_config
* compatible with old code
* fix ut bug
* fix ut bugs
* fix bug
* add some comments
* fix ut bug
* add a requirement
* fix pre-commit
* Merge commit '0451b3d3cb2bebfef92ec2c227b2a3dd8d01dc6a' into feat/refactor_config
* fixbug
* Support function type in registry
* fix ut bug
* fix bug
* Merge commit '5f719e542b963f0d35457e5359df879a5eb80b82' into feat/refactor_config
# Conflicts:
# modelscope/pipelines/nlp/multilingual_word_segmentation_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/utils/hub.py
* remove obsolete file
* rename init args
* rename params
* fix merge bug
* add default preprocessor config for ner-model
* move a method a util file
* remove unused config
* Fix a bug in pbar
* bestckptsaver:change default ckpt numbers to 1
* 1. Add assert to max_epoch 2. split init_dist and get_device 3. change cmp func name
* Fix bug
* fix bug
* fix bug
* unfinished refactoring
* unfinished
* uw
* uw
* uw
* uw
* Merge branch 'feat/refactor_config' into feat/refactor_trainer
# Conflicts:
# modelscope/preprocessors/nlp/document_segmentation_preprocessor.py
# modelscope/preprocessors/nlp/faq_question_answering_preprocessor.py
# modelscope/preprocessors/nlp/relation_extraction_preprocessor.py
# modelscope/preprocessors/nlp/text_generation_preprocessor.py
* uw
* uw
* unify nlp task outputs
* uw
* uw
* uw
* uw
* change the order of text cls pipeline
* refactor t5
* refactor tg task preprocessor
* fix
* unfinished
* temp
* refactor code
* unfinished
* unfinished
* unfinished
* unfinished
* uw
* Merge branch 'feat/refactor_config' into feat/refactor_trainer
* smoke test pass
* ut testing
* pre-commit passed
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/models/nlp/bert/document_segmentation.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/document_segmentation_pipeline.py
* merge master
* unifnished
* Merge branch 'feat/fix_bug_pipeline_name' into feat/refactor_config
* fix bug
* fix ut bug
* support ner batch inference
* fix ut bug
* fix bug
* support batch inference on three nlp tasks
* unfinished
* fix bug
* fix bug
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/models/base/base_model.py
# modelscope/pipelines/nlp/conversational_text_to_sql_pipeline.py
# modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
# modelscope/pipelines/nlp/dialog_modeling_pipeline.py
# modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
# modelscope/pipelines/nlp/document_segmentation_pipeline.py
# modelscope/pipelines/nlp/faq_question_answering_pipeline.py
# modelscope/pipelines/nlp/feature_extraction_pipeline.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/information_extraction_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/sentence_embedding_pipeline.py
# modelscope/pipelines/nlp/summarization_pipeline.py
# modelscope/pipelines/nlp/table_question_answering_pipeline.py
# modelscope/pipelines/nlp/text2text_generation_pipeline.py
# modelscope/pipelines/nlp/text_classification_pipeline.py
# modelscope/pipelines/nlp/text_error_correction_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/text_ranking_pipeline.py
# modelscope/pipelines/nlp/token_classification_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/trainers/nlp_trainer.py
* pre-commit passed
* fix bug
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/preprocessors/__init__.py
* fix bug
* fix bug
* fix bug
* fix bug
* fix bug
* fixbug
* pre-commit passed
* fix bug
* fixbug
* fix bug
* fix bug
* fix bug
* fix bug
* self review done
* fixbug
* fix bug
* fix bug
* fix bugs
* remove sub-token offset mapping
* fix name bug
* add some tests
* 1. support batch inference of text-generation,text2text-generation,token-classification,text-classification 2. add corresponding UTs
* add old logic back
* tmp save
* add tokenize by words logic back
* move outputs file back
* revert veco token-classification back
* fix typo
* Fix description
* Merge commit '4dd99b8f6e4e7aefe047c68a1bedd95d3ec596d6' into feat/refactor_config
* Merge branch 'master' into feat/refactor_config
# Conflicts:
# modelscope/pipelines/builder.py
2022-11-30 23:52:17 +08:00
|
|
|
self.assertEqual(
|
|
|
|
|
decorators,
|
|
|
|
|
[('PIPELINES', 'text-generation', 'text-generation'),
|
|
|
|
|
('PIPELINES', 'text2text-generation', 'translation_en_to_de'),
|
|
|
|
|
('PIPELINES', 'text2text-generation', 'translation_en_to_ro'),
|
|
|
|
|
('PIPELINES', 'text2text-generation', 'translation_en_to_fr'),
|
|
|
|
|
('PIPELINES', 'text2text-generation', 'text2text-generation')])
|
2022-07-27 17:29:16 +08:00
|
|
|
|
|
|
|
|
def test_files_scaning_method(self):
|
2023-02-04 10:26:00 +00:00
|
|
|
fileScaner = FilesAstScanning()
|
2022-12-02 10:06:24 +08:00
|
|
|
# case of pass in files directly
|
|
|
|
|
pipeline_file = os.path.join(MODELSCOPE_PATH, 'pipelines', 'nlp',
|
|
|
|
|
'text_generation_pipeline.py')
|
|
|
|
|
file_list = [pipeline_file]
|
|
|
|
|
output = fileScaner.get_files_scan_results(file_list)
|
|
|
|
|
self.assertTrue(output[INDEX_KEY] is not None)
|
|
|
|
|
self.assertTrue(output[REQUIREMENT_KEY] is not None)
|
|
|
|
|
index, requirements = output[INDEX_KEY], output[REQUIREMENT_KEY]
|
2022-07-27 17:29:16 +08:00
|
|
|
self.assertIsInstance(index, dict)
|
|
|
|
|
self.assertIsInstance(requirements, dict)
|
|
|
|
|
self.assertIsInstance(list(index.keys())[0], tuple)
|
|
|
|
|
index_0 = list(index.keys())[0]
|
|
|
|
|
self.assertIsInstance(index[index_0], dict)
|
|
|
|
|
self.assertTrue(index[index_0]['imports'] is not None)
|
|
|
|
|
self.assertIsInstance(index[index_0]['imports'], list)
|
|
|
|
|
self.assertTrue(index[index_0]['module'] is not None)
|
|
|
|
|
self.assertIsInstance(index[index_0]['module'], str)
|
|
|
|
|
index_0 = list(requirements.keys())[0]
|
|
|
|
|
self.assertIsInstance(requirements[index_0], list)
|
|
|
|
|
|
|
|
|
|
def test_file_mtime_md5_method(self):
|
2023-02-04 10:26:00 +00:00
|
|
|
fileScaner = FilesAstScanning()
|
2022-07-27 17:29:16 +08:00
|
|
|
# create first file
|
|
|
|
|
with open(self.test_file, 'w', encoding='utf-8') as f:
|
|
|
|
|
f.write('This is the new test!')
|
|
|
|
|
|
2022-12-02 10:06:24 +08:00
|
|
|
md5_1, mtime_1 = fileScaner.files_mtime_md5(self.tmp_dir, [])
|
|
|
|
|
md5_2, mtime_2 = fileScaner.files_mtime_md5(self.tmp_dir, [])
|
2022-07-27 17:29:16 +08:00
|
|
|
self.assertEqual(md5_1, md5_2)
|
2022-12-02 10:06:24 +08:00
|
|
|
self.assertEqual(mtime_1, mtime_2)
|
|
|
|
|
self.assertIsInstance(mtime_1, dict)
|
|
|
|
|
self.assertEqual(list(mtime_1.keys()), [self.test_file])
|
|
|
|
|
self.assertEqual(mtime_1[self.test_file], mtime_2[self.test_file])
|
|
|
|
|
|
2022-07-27 17:29:16 +08:00
|
|
|
time.sleep(2)
|
|
|
|
|
# case of revise
|
|
|
|
|
with open(self.test_file, 'w', encoding='utf-8') as f:
|
|
|
|
|
f.write('test again')
|
2022-12-02 10:06:24 +08:00
|
|
|
md5_3, mtime_3 = fileScaner.files_mtime_md5(self.tmp_dir, [])
|
2022-07-27 17:29:16 +08:00
|
|
|
self.assertNotEqual(md5_1, md5_3)
|
2022-12-02 10:06:24 +08:00
|
|
|
self.assertNotEqual(mtime_1[self.test_file], mtime_3[self.test_file])
|
2022-07-27 17:29:16 +08:00
|
|
|
|
|
|
|
|
# case of create
|
|
|
|
|
self.test_file_new = os.path.join(self.tmp_dir, 'test_1.py')
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
with open(self.test_file_new, 'w', encoding='utf-8') as f:
|
|
|
|
|
f.write('test again')
|
2022-12-02 10:06:24 +08:00
|
|
|
md5_4, mtime_4 = fileScaner.files_mtime_md5(self.tmp_dir, [])
|
2022-07-27 17:29:16 +08:00
|
|
|
self.assertNotEqual(md5_1, md5_4)
|
|
|
|
|
self.assertNotEqual(md5_3, md5_4)
|
2022-12-02 10:06:24 +08:00
|
|
|
self.assertEqual(
|
|
|
|
|
set(mtime_4.keys()) - set([self.test_file, self.test_file_new]),
|
|
|
|
|
set())
|
|
|
|
|
|
|
|
|
|
def test_load_index_method(self):
|
|
|
|
|
# test full indexing case
|
|
|
|
|
output = load_index()
|
|
|
|
|
self.assertTrue(output[INDEX_KEY] is not None)
|
|
|
|
|
self.assertTrue(output[REQUIREMENT_KEY] is not None)
|
|
|
|
|
index, requirements = output[INDEX_KEY], output[REQUIREMENT_KEY]
|
|
|
|
|
self.assertIsInstance(index, dict)
|
|
|
|
|
self.assertIsInstance(requirements, dict)
|
|
|
|
|
self.assertIsInstance(list(index.keys())[0], tuple)
|
|
|
|
|
index_0 = list(index.keys())[0]
|
|
|
|
|
self.assertIsInstance(index[index_0], dict)
|
|
|
|
|
self.assertTrue(index[index_0]['imports'] is not None)
|
|
|
|
|
self.assertIsInstance(index[index_0]['imports'], list)
|
|
|
|
|
self.assertTrue(index[index_0]['module'] is not None)
|
|
|
|
|
self.assertIsInstance(index[index_0]['module'], str)
|
|
|
|
|
index_0 = list(requirements.keys())[0]
|
|
|
|
|
self.assertIsInstance(requirements[index_0], list)
|
|
|
|
|
self.assertIsInstance(output[MD5_KEY], str)
|
|
|
|
|
self.assertIsInstance(output[MODELSCOPE_PATH_KEY], str)
|
|
|
|
|
self.assertIsInstance(output[VERSION_KEY], str)
|
|
|
|
|
self.assertIsInstance(output[FILES_MTIME_KEY], dict)
|
|
|
|
|
|
2023-01-11 10:43:56 +08:00
|
|
|
# generate ast_template
|
|
|
|
|
file_path = os.path.join(self.tmp_dir, 'index_file.py')
|
|
|
|
|
index = generate_ast_template(file_path=file_path, force_rebuild=False)
|
|
|
|
|
self.assertTrue(os.path.exists(file_path))
|
|
|
|
|
self.assertEqual(output, index)
|
|
|
|
|
index_from_prebuilt = load_from_prebuilt(file_path)
|
|
|
|
|
self.assertEqual(index, index_from_prebuilt)
|
|
|
|
|
|
2023-02-09 08:36:44 +00:00
|
|
|
@unittest.skip(
|
|
|
|
|
'skipped the method for not cpu time on this case not stable')
|
2022-12-02 10:06:24 +08:00
|
|
|
def test_update_load_index_method(self):
|
|
|
|
|
file_number = 20
|
|
|
|
|
file_list = []
|
|
|
|
|
for i in range(file_number):
|
|
|
|
|
filename = os.path.join(self.tmp_dir, f'test_{i}.py')
|
|
|
|
|
with open(filename, 'w', encoding='utf-8') as f:
|
|
|
|
|
f.write('import os')
|
|
|
|
|
file_list.append(filename)
|
|
|
|
|
|
|
|
|
|
index_file = 'ast_indexer_1'
|
|
|
|
|
|
|
|
|
|
start = time.time()
|
|
|
|
|
index = load_index(
|
|
|
|
|
file_list=file_list,
|
|
|
|
|
indexer_file_dir=self.tmp_dir,
|
|
|
|
|
indexer_file=index_file)
|
|
|
|
|
duration_1 = time.time() - start
|
|
|
|
|
self.assertEqual(len(index[FILES_MTIME_KEY]), file_number)
|
|
|
|
|
|
|
|
|
|
# no changing case, time should be less than original
|
|
|
|
|
start = time.time()
|
|
|
|
|
index = load_index(
|
|
|
|
|
file_list=file_list,
|
|
|
|
|
indexer_file_dir=self.tmp_dir,
|
|
|
|
|
indexer_file=index_file)
|
|
|
|
|
duration_2 = time.time() - start
|
|
|
|
|
self.assertGreater(duration_1, duration_2)
|
|
|
|
|
self.assertEqual(len(index[FILES_MTIME_KEY]), file_number)
|
|
|
|
|
|
|
|
|
|
# adding new file, time should be less than original
|
|
|
|
|
test_file_new_2 = os.path.join(self.tmp_dir, 'test_new.py')
|
|
|
|
|
with open(test_file_new_2, 'w', encoding='utf-8') as f:
|
|
|
|
|
f.write('import os')
|
|
|
|
|
file_list.append(test_file_new_2)
|
|
|
|
|
|
|
|
|
|
start = time.time()
|
|
|
|
|
index = load_index(
|
|
|
|
|
file_list=file_list,
|
|
|
|
|
indexer_file_dir=self.tmp_dir,
|
|
|
|
|
indexer_file=index_file)
|
|
|
|
|
duration_3 = time.time() - start
|
|
|
|
|
self.assertGreater(duration_1, duration_3)
|
|
|
|
|
self.assertEqual(len(index[FILES_MTIME_KEY]), file_number + 1)
|
|
|
|
|
|
|
|
|
|
# deleting one file, time should be less than original
|
|
|
|
|
file_list.pop()
|
|
|
|
|
start = time.time()
|
|
|
|
|
index = load_index(
|
|
|
|
|
file_list=file_list,
|
|
|
|
|
indexer_file_dir=self.tmp_dir,
|
|
|
|
|
indexer_file=index_file)
|
|
|
|
|
duration_4 = time.time() - start
|
|
|
|
|
self.assertGreater(duration_1, duration_4)
|
|
|
|
|
self.assertEqual(len(index[FILES_MTIME_KEY]), file_number)
|
2022-07-27 17:29:16 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
unittest.main()
|