From a2eba4d9847be84a3424850ca7c923a05ccaceec Mon Sep 17 00:00:00 2001 From: suluyana Date: Wed, 8 Jan 2025 18:16:52 +0800 Subject: [PATCH] update --- .../models/nlp/hf_transformers/backbone.py | 4 ---- .../models/nlp/polylm/text_generation.py | 9 +++++--- .../msdatasets/data_loader/data_loader.py | 5 ++-- .../data_loader/data_loader_manager.py | 10 ++++---- modelscope/msdatasets/ms_dataset.py | 6 ++--- .../msdatasets/utils/hf_datasets_util.py | 22 +++++++++--------- modelscope/pipelines/accelerate/vllm.py | 6 ++--- .../pipelines/multi_modal/ovis_vl_pipeline.py | 6 ++--- modelscope/pipelines/nlp/llm_pipeline.py | 23 +++++++++---------- .../pipelines/nlp/text_generation_pipeline.py | 23 +++++++++---------- modelscope/preprocessors/templates/loader.py | 6 ++--- modelscope/utils/automodel_utils.py | 6 ++--- modelscope/utils/plugins.py | 4 ++-- 13 files changed, 62 insertions(+), 68 deletions(-) diff --git a/modelscope/models/nlp/hf_transformers/backbone.py b/modelscope/models/nlp/hf_transformers/backbone.py index 8cb368f0..5b9a3965 100644 --- a/modelscope/models/nlp/hf_transformers/backbone.py +++ b/modelscope/models/nlp/hf_transformers/backbone.py @@ -99,10 +99,6 @@ class TransformersModel(TorchModel, PreTrainedModel): return model # return the model only - logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') config, kwargs = AutoConfig.from_pretrained( model_dir, return_unused_kwargs=True, diff --git a/modelscope/models/nlp/polylm/text_generation.py b/modelscope/models/nlp/polylm/text_generation.py index bdd6e991..bcef9d88 100644 --- a/modelscope/models/nlp/polylm/text_generation.py +++ b/modelscope/models/nlp/polylm/text_generation.py @@ -11,6 +11,9 @@ from modelscope.models.builder import MODELS from modelscope.utils.constant import Tasks from modelscope.utils.hub import read_config from modelscope.utils.streaming_output import StreamingOutputMixin +from modelscope.utils.logger import get_logger + +logger = get_logger() __all__ = ['PolyLMForTextGeneration'] @@ -28,9 +31,9 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin): self.tokenizer = AutoTokenizer.from_pretrained( model_dir, legacy=False, use_fast=False) logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.' + ) self.model = AutoModelForCausalLM.from_pretrained( model_dir, device_map='auto', trust_remote_code=True) self.model.eval() diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py index c1e374af..f3216516 100644 --- a/modelscope/msdatasets/data_loader/data_loader.py +++ b/modelscope/msdatasets/data_loader/data_loader.py @@ -135,9 +135,8 @@ class OssDownloader(BaseDownloader): if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible: if trust_remote_code: logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {dataset_name}.' + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make ' + 'sure that you can trust the external codes.' ) self.dataset = hf_load_dataset( diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py index 99315a31..17822584 100644 --- a/modelscope/msdatasets/data_loader/data_loader_manager.py +++ b/modelscope/msdatasets/data_loader/data_loader_manager.py @@ -73,9 +73,8 @@ class LocalDataLoaderManager(DataLoaderManager): if data_loader_type == LocalDataLoaderType.HF_DATA_LOADER: if trust_remote_code: logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {dataset_name}.' + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make ' + 'sure that you can trust the external codes.' ) # Build huggingface data loader and return dataset. @@ -119,9 +118,8 @@ class RemoteDataLoaderManager(DataLoaderManager): if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER: if trust_remote_code: logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {dataset_name}.' + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make ' + 'sure that you can trust the external codes.' ) dataset_ret = hf_data_loader( dataset_name, diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 0a259ac5..7060d6e3 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -239,9 +239,9 @@ class MsDataset: if trust_remote_code: logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {dataset_name}.') + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make sure that ' + 'you can trust the external codes.' + ) # Init context config dataset_context_config = DatasetContextConfig( diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index 8f933ceb..fea304f6 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -835,8 +835,8 @@ def get_module_with_script(self) -> DatasetModule: if not os.path.exists(importable_file_path): trust_remote_code = resolve_trust_remote_code(trust_remote_code=self.trust_remote_code, repo_id=self.name) if trust_remote_code: - logger.warning('Use trust_remote_code=True. The code will be downloaded and used from the remote repo' - f' {repo_id}. Please make sure that the remote code content is what you need.') + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {repo_id}. Please make sure that ' + 'you can trust the external codes.') _create_importable_file( local_path=local_script_path, local_imports=local_imports, @@ -937,9 +937,9 @@ class DatasetsWrapperHF: ) if not save_infos else VerificationMode.ALL_CHECKS) if trust_remote_code: - logger.warning('Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {path}.') + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure ' + 'that you can trust the external codes.' + ) # Create a dataset builder builder_instance = DatasetsWrapperHF.load_dataset_builder( @@ -1069,9 +1069,9 @@ class DatasetsWrapperHF: download_config.storage_options.update(storage_options) if trust_remote_code: - logger.warning('Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {path}.') + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure ' + 'that you can trust the external codes.' + ) dataset_module = DatasetsWrapperHF.dataset_module_factory( path, @@ -1184,9 +1184,9 @@ class DatasetsWrapperHF: # - if path has one "/" and is dataset repository on the HF hub without a python file # -> use a packaged module (csv, text etc.) based on content of the repository if trust_remote_code: - logger.warning('Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {path}.') + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure ' + 'that you can trust the external codes.' + ) # Try packaged if path in _PACKAGED_DATASETS_MODULES: diff --git a/modelscope/pipelines/accelerate/vllm.py b/modelscope/pipelines/accelerate/vllm.py index 76569546..669d304a 100644 --- a/modelscope/pipelines/accelerate/vllm.py +++ b/modelscope/pipelines/accelerate/vllm.py @@ -31,9 +31,9 @@ class Vllm(InferFramework): in ('bfloat16', 'auto')): dtype = 'float16' logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {self.model_dir}.') + f'Use trust_remote_code=True. Will invoke codes from {self.model_dir}. Please make ' + 'sure that you can trust the external codes.' + ) self.model = LLM( self.model_dir, dtype=dtype, diff --git a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py index cffe3741..6d336749 100644 --- a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py +++ b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py @@ -38,9 +38,9 @@ class VisionChatPipeline(VisualQuestionAnsweringPipeline): multimodal_max_length = kwargs.get('multimodal_max_length', 8192) self.device = 'cuda' if device == 'gpu' else device logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model}.') + f'Use trust_remote_code=True. Will invoke codes from {model}. Please make ' + 'sure that you can trust the external codes.' + ) self.model = AutoModelForCausalLM.from_pretrained( model, torch_dtype=torch_dtype, diff --git a/modelscope/pipelines/nlp/llm_pipeline.py b/modelscope/pipelines/nlp/llm_pipeline.py index 9789de40..5e1b1c86 100644 --- a/modelscope/pipelines/nlp/llm_pipeline.py +++ b/modelscope/pipelines/nlp/llm_pipeline.py @@ -98,9 +98,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): revision = self.cfg.safe_get('adapter_cfg.model_revision', 'master') logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {base_model}.') + f'Use trust_remote_code=True. Will invoke codes from {base_model}. Please make sure that you can ' + 'trust the external codes.' + ) base_model = Model.from_pretrained( base_model, revision, @@ -139,9 +139,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): # TODO: Temporary use of AutoModelForCausalLM # Need to be updated into a universal solution logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.' + ) model = AutoModelForCausalLM.from_pretrained( model_dir, device_map=self.device_map, @@ -182,9 +182,8 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): if os.path.exists(kwargs['model']): logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {kwargs["model"]}.' + f'Use trust_remote_code=True. Will invoke codes from {kwargs["model"]}. Please make sure ' + 'that you can trust the external codes.' ) config = AutoConfig.from_pretrained( kwargs['model'], trust_remote_code=True) @@ -437,9 +436,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): if tokenizer_class is None: tokenizer_class = AutoTokenizer logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.' + ) return tokenizer_class.from_pretrained( model_dir, trust_remote_code=True) diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index b8de7df8..01326e0f 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -270,9 +270,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): default_torch_dtype = torch.bfloat16 torch_dtype = kwargs.get('torch_dtype', default_torch_dtype) logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.' + ) model = Model.from_pretrained( model_dir, trust_remote_code=True, @@ -290,9 +290,8 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): self.model = model self.model.eval() logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {self.model.model_dir}.' + f'Use trust_remote_code=True. Will invoke codes from {self.model.model_dir}. Please ' + 'make sure that you can trust the external codes.' ) self.tokenizer = AutoTokenizer.from_pretrained( self.model.model_dir, trust_remote_code=True) @@ -338,9 +337,9 @@ class QWenChatPipeline(Pipeline): if isinstance(model, str): logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model}.') + f'Use trust_remote_code=True. Will invoke codes from {model}. Please make sure ' + 'that you can trust the external codes.' + ) self.tokenizer = AutoTokenizer.from_pretrained( model, revision=revision, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained( @@ -406,9 +405,9 @@ class QWenTextGenerationPipeline(Pipeline): if isinstance(model, str): logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model}.') + f'Use trust_remote_code=True. Will invoke codes from {model}. Please make sure ' + 'that you can trust the external codes.' + ) self.model = AutoModelForCausalLM.from_pretrained( model, device_map=device_map, diff --git a/modelscope/preprocessors/templates/loader.py b/modelscope/preprocessors/templates/loader.py index b9da3608..8dac8d41 100644 --- a/modelscope/preprocessors/templates/loader.py +++ b/modelscope/preprocessors/templates/loader.py @@ -820,9 +820,9 @@ class TemplateLoader: model_id, revision=kwargs.pop('revision', 'master'), ignore_file_pattern=ignore_file_pattern) - logger.warning('Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {model_dir}.' + ' Please make sure that you can trust the external codes.' + ) tokenizer = AutoTokenizer.from_pretrained( model_dir, trust_remote_code=True) config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) diff --git a/modelscope/utils/automodel_utils.py b/modelscope/utils/automodel_utils.py index 70257b63..e505e2c6 100644 --- a/modelscope/utils/automodel_utils.py +++ b/modelscope/utils/automodel_utils.py @@ -95,9 +95,9 @@ def get_hf_automodel_class(model_dir: str, return None try: logger.warning( - 'Use trust_remote_code=True. The code will be downloaded' - ' and used from the remote repo. Please make sure that' - f' the remote code content is what you need {model_dir}.') + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.' + ) config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) if task_name is None: automodel_class = get_default_automodel(config) diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py index c99fc2b0..0c322dd5 100644 --- a/modelscope/utils/plugins.py +++ b/modelscope/utils/plugins.py @@ -452,8 +452,8 @@ def register_modelhub_repo(model_dir, allow_remote=False) -> None: """ Try to install and import remote model from modelhub""" if allow_remote: logger.warning( - 'Use allow_remote=True. The code will be downloaded and used from the remote repo.' - f' Please make sure that the remote code content is what you need {model_dir}.' + f'Use allow_remote=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.' ) try: import_module_from_model_dir(model_dir)