From 0a643276ca14226487a5230466ac5d30e8a13216 Mon Sep 17 00:00:00 2001 From: suluyana Date: Tue, 7 Jan 2025 21:04:29 +0800 Subject: [PATCH] warning parsing trust_remote_code=True --- modelscope/models/nlp/hf_transformers/backbone.py | 3 +++ modelscope/models/nlp/polylm/text_generation.py | 3 +++ modelscope/msdatasets/data_loader/data_loader.py | 5 +++++ .../msdatasets/data_loader/data_loader_manager.py | 9 +++++++++ modelscope/msdatasets/ms_dataset.py | 5 +++++ modelscope/msdatasets/utils/hf_datasets_util.py | 14 ++++++++++++++ modelscope/pipelines/accelerate/vllm.py | 6 +++++- .../pipelines/multi_modal/ovis_vl_pipeline.py | 7 +++++-- modelscope/pipelines/nlp/llm_pipeline.py | 12 ++++++++++++ .../pipelines/nlp/text_generation_pipeline.py | 12 ++++++++++++ modelscope/preprocessors/templates/loader.py | 3 +++ modelscope/utils/automodel_utils.py | 6 +++++- 12 files changed, 81 insertions(+), 4 deletions(-) diff --git a/modelscope/models/nlp/hf_transformers/backbone.py b/modelscope/models/nlp/hf_transformers/backbone.py index 5b9a3965..10681d8b 100644 --- a/modelscope/models/nlp/hf_transformers/backbone.py +++ b/modelscope/models/nlp/hf_transformers/backbone.py @@ -99,6 +99,9 @@ class TransformersModel(TorchModel, PreTrainedModel): return model # return the model only + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') config, kwargs = AutoConfig.from_pretrained( model_dir, return_unused_kwargs=True, diff --git a/modelscope/models/nlp/polylm/text_generation.py b/modelscope/models/nlp/polylm/text_generation.py index 1881cf2b..cf53157a 100644 --- a/modelscope/models/nlp/polylm/text_generation.py +++ b/modelscope/models/nlp/polylm/text_generation.py @@ -27,6 +27,9 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin): super().__init__(model_dir, *args, **kwargs) self.tokenizer = AutoTokenizer.from_pretrained( model_dir, legacy=False, use_fast=False) + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') self.model = AutoModelForCausalLM.from_pretrained( model_dir, device_map='auto', trust_remote_code=True) self.model.eval() diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py index 92074449..9e1583f4 100644 --- a/modelscope/msdatasets/data_loader/data_loader.py +++ b/modelscope/msdatasets/data_loader/data_loader.py @@ -133,6 +133,11 @@ class OssDownloader(BaseDownloader): raise f'meta-file: {dataset_name}.py not found on the modelscope hub.' if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible: + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {dataset_name}.') + self.dataset = hf_load_dataset( dataset_py_script, name=subset_name, diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py index a9e58b7c..d59fc1d6 100644 --- a/modelscope/msdatasets/data_loader/data_loader_manager.py +++ b/modelscope/msdatasets/data_loader/data_loader_manager.py @@ -71,6 +71,11 @@ class LocalDataLoaderManager(DataLoaderManager): # Select local data loader # TODO: more loaders to be supported. if data_loader_type == LocalDataLoaderType.HF_DATA_LOADER: + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {dataset_name}.') + # Build huggingface data loader and return dataset. return hf_data_loader( dataset_name, @@ -110,6 +115,10 @@ class RemoteDataLoaderManager(DataLoaderManager): # To use the huggingface data loader if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER: + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {dataset_name}.') dataset_ret = hf_data_loader( dataset_name, name=subset_name, diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 899142ad..28de17f6 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -237,6 +237,11 @@ class MsDataset: if not namespace or not dataset_name: raise 'The dataset_name should be in the form of `namespace/dataset_name` or `dataset_name`.' + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {dataset_name}.') + # Init context config dataset_context_config = DatasetContextConfig( dataset_name=dataset_name, diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index 1e9e9970..8f933ceb 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -936,6 +936,11 @@ class DatasetsWrapperHF: verification_mode or VerificationMode.BASIC_CHECKS ) if not save_infos else VerificationMode.ALL_CHECKS) + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {path}.') + # Create a dataset builder builder_instance = DatasetsWrapperHF.load_dataset_builder( path=path, @@ -1063,6 +1068,11 @@ class DatasetsWrapperHF: ) if download_config else DownloadConfig() download_config.storage_options.update(storage_options) + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {path}.') + dataset_module = DatasetsWrapperHF.dataset_module_factory( path, revision=revision, @@ -1173,6 +1183,10 @@ class DatasetsWrapperHF: # -> the module from the python file in the dataset repository # - if path has one "/" and is dataset repository on the HF hub without a python file # -> use a packaged module (csv, text etc.) based on content of the repository + if trust_remote_code: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {path}.') # Try packaged if path in _PACKAGED_DATASETS_MODULES: diff --git a/modelscope/pipelines/accelerate/vllm.py b/modelscope/pipelines/accelerate/vllm.py index 15ced4bb..599f520a 100644 --- a/modelscope/pipelines/accelerate/vllm.py +++ b/modelscope/pipelines/accelerate/vllm.py @@ -2,7 +2,8 @@ from typing import List, Union from modelscope.pipelines.accelerate.base import InferFramework from modelscope.utils.import_utils import is_vllm_available - +from modelscope import get_logger +logger = get_logger() class Vllm(InferFramework): @@ -27,6 +28,9 @@ class Vllm(InferFramework): if not Vllm.check_gpu_compatibility(8) and (dtype in ('bfloat16', 'auto')): dtype = 'float16' + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {self.model_dir}.') self.model = LLM( self.model_dir, dtype=dtype, diff --git a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py index f19eddff..5e1e18a9 100644 --- a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py +++ b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Union import torch -from modelscope import AutoModelForCausalLM +from modelscope import AutoModelForCausalLM, get_logger from modelscope.metainfo import Pipelines, Preprocessors from modelscope.models.base import Model from modelscope.outputs import OutputKeys @@ -12,7 +12,7 @@ from modelscope.pipelines.multi_modal.visual_question_answering_pipeline import VisualQuestionAnsweringPipeline from modelscope.preprocessors import Preprocessor, load_image from modelscope.utils.constant import Fields, Frameworks, Tasks - +logger = get_logger() @PIPELINES.register_module( Tasks.visual_question_answering, module_name='ovis-vl') @@ -35,6 +35,9 @@ class VisionChatPipeline(VisualQuestionAnsweringPipeline): torch_dtype = kwargs.get('torch_dtype', torch.float16) multimodal_max_length = kwargs.get('multimodal_max_length', 8192) self.device = 'cuda' if device == 'gpu' else device + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model}.') self.model = AutoModelForCausalLM.from_pretrained( model, torch_dtype=torch_dtype, diff --git a/modelscope/pipelines/nlp/llm_pipeline.py b/modelscope/pipelines/nlp/llm_pipeline.py index 7ad0d278..ceaa4c90 100644 --- a/modelscope/pipelines/nlp/llm_pipeline.py +++ b/modelscope/pipelines/nlp/llm_pipeline.py @@ -97,6 +97,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): assert base_model is not None, 'Cannot get adapter_cfg.model_id_or_path from configuration.json file.' revision = self.cfg.safe_get('adapter_cfg.model_revision', 'master') + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {base_model}.') base_model = Model.from_pretrained( base_model, revision, @@ -134,6 +137,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): model) else snapshot_download(model) # TODO: Temporary use of AutoModelForCausalLM # Need to be updated into a universal solution + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') model = AutoModelForCausalLM.from_pretrained( model_dir, device_map=self.device_map, @@ -173,6 +179,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): self.llm_framework = llm_framework if os.path.exists(kwargs['model']): + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {kwargs['model']}.') config = AutoConfig.from_pretrained( kwargs['model'], trust_remote_code=True) q_config = config.__dict__.get('quantization_config', None) @@ -423,6 +432,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): model_dir = self.model.model_dir if tokenizer_class is None: tokenizer_class = AutoTokenizer + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') return tokenizer_class.from_pretrained( model_dir, trust_remote_code=True) diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 55eaf809..555e7a9d 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -269,6 +269,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): if use_bf16: default_torch_dtype = torch.bfloat16 torch_dtype = kwargs.get('torch_dtype', default_torch_dtype) + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') model = Model.from_pretrained( model_dir, trust_remote_code=True, @@ -285,6 +288,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): self.model = model self.model.eval() + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {self.model.model_dir}.') self.tokenizer = AutoTokenizer.from_pretrained( self.model.model_dir, trust_remote_code=True) @@ -328,6 +334,9 @@ class QWenChatPipeline(Pipeline): bf16 = False if isinstance(model, str): + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model}.') self.tokenizer = AutoTokenizer.from_pretrained( model, revision=revision, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained( @@ -392,6 +401,9 @@ class QWenTextGenerationPipeline(Pipeline): bf16 = False if isinstance(model, str): + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model}.') self.model = AutoModelForCausalLM.from_pretrained( model, device_map=device_map, diff --git a/modelscope/preprocessors/templates/loader.py b/modelscope/preprocessors/templates/loader.py index 8943f25d..b9da3608 100644 --- a/modelscope/preprocessors/templates/loader.py +++ b/modelscope/preprocessors/templates/loader.py @@ -820,6 +820,9 @@ class TemplateLoader: model_id, revision=kwargs.pop('revision', 'master'), ignore_file_pattern=ignore_file_pattern) + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') tokenizer = AutoTokenizer.from_pretrained( model_dir, trust_remote_code=True) config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) diff --git a/modelscope/utils/automodel_utils.py b/modelscope/utils/automodel_utils.py index eb4aa6c8..435d461d 100644 --- a/modelscope/utils/automodel_utils.py +++ b/modelscope/utils/automodel_utils.py @@ -8,7 +8,8 @@ from modelscope.utils.ast_utils import INDEX_KEY from modelscope.utils.import_utils import (LazyImportModule, is_torch_available, is_transformers_available) - +from modelscope import get_logger +logger = get_logger() def can_load_by_ms(model_dir: str, task_name: Optional[str], model_type: Optional[str]) -> bool: @@ -91,6 +92,9 @@ def get_hf_automodel_class(model_dir: str, if not os.path.exists(config_path): return None try: + logger.warning('Use trust_remote_code=True. The code will be downloaded' + ' and used from the remote repo. Please make sure that' + f' the remote code content is what you need {model_dir}.') config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) if task_name is None: automodel_class = get_default_automodel(config)