diff --git a/modelscope/models/nlp/polylm/text_generation.py b/modelscope/models/nlp/polylm/text_generation.py index 1881cf2b..bd6fbd69 100644 --- a/modelscope/models/nlp/polylm/text_generation.py +++ b/modelscope/models/nlp/polylm/text_generation.py @@ -10,8 +10,11 @@ from modelscope.models.base import Tensor, TorchModel from modelscope.models.builder import MODELS from modelscope.utils.constant import Tasks from modelscope.utils.hub import read_config +from modelscope.utils.logger import get_logger from modelscope.utils.streaming_output import StreamingOutputMixin +logger = get_logger() + __all__ = ['PolyLMForTextGeneration'] @@ -27,6 +30,9 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin): super().__init__(model_dir, *args, **kwargs) self.tokenizer = AutoTokenizer.from_pretrained( model_dir, legacy=False, use_fast=False) + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.') self.model = AutoModelForCausalLM.from_pretrained( model_dir, device_map='auto', trust_remote_code=True) self.model.eval() diff --git a/modelscope/msdatasets/data_loader/data_loader.py b/modelscope/msdatasets/data_loader/data_loader.py index 92074449..71fcee45 100644 --- a/modelscope/msdatasets/data_loader/data_loader.py +++ b/modelscope/msdatasets/data_loader/data_loader.py @@ -133,6 +133,11 @@ class OssDownloader(BaseDownloader): raise f'meta-file: {dataset_name}.py not found on the modelscope hub.' if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible: + if trust_remote_code: + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make ' + 'sure that you can trust the external codes.') + self.dataset = hf_load_dataset( dataset_py_script, name=subset_name, diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py index a9e58b7c..b64a8926 100644 --- a/modelscope/msdatasets/data_loader/data_loader_manager.py +++ b/modelscope/msdatasets/data_loader/data_loader_manager.py @@ -71,6 +71,11 @@ class LocalDataLoaderManager(DataLoaderManager): # Select local data loader # TODO: more loaders to be supported. if data_loader_type == LocalDataLoaderType.HF_DATA_LOADER: + if trust_remote_code: + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make ' + 'sure that you can trust the external codes.') + # Build huggingface data loader and return dataset. return hf_data_loader( dataset_name, @@ -110,6 +115,10 @@ class RemoteDataLoaderManager(DataLoaderManager): # To use the huggingface data loader if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER: + if trust_remote_code: + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make ' + 'sure that you can trust the external codes.') dataset_ret = hf_data_loader( dataset_name, name=subset_name, diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 899142ad..dbe15171 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -237,6 +237,11 @@ class MsDataset: if not namespace or not dataset_name: raise 'The dataset_name should be in the form of `namespace/dataset_name` or `dataset_name`.' + if trust_remote_code: + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make sure that ' + 'you can trust the external codes.') + # Init context config dataset_context_config = DatasetContextConfig( dataset_name=dataset_name, diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index 4d6de81c..fea304f6 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -835,6 +835,8 @@ def get_module_with_script(self) -> DatasetModule: if not os.path.exists(importable_file_path): trust_remote_code = resolve_trust_remote_code(trust_remote_code=self.trust_remote_code, repo_id=self.name) if trust_remote_code: + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {repo_id}. Please make sure that ' + 'you can trust the external codes.') _create_importable_file( local_path=local_script_path, local_imports=local_imports, @@ -934,6 +936,11 @@ class DatasetsWrapperHF: verification_mode or VerificationMode.BASIC_CHECKS ) if not save_infos else VerificationMode.ALL_CHECKS) + if trust_remote_code: + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure ' + 'that you can trust the external codes.' + ) + # Create a dataset builder builder_instance = DatasetsWrapperHF.load_dataset_builder( path=path, @@ -1061,6 +1068,11 @@ class DatasetsWrapperHF: ) if download_config else DownloadConfig() download_config.storage_options.update(storage_options) + if trust_remote_code: + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure ' + 'that you can trust the external codes.' + ) + dataset_module = DatasetsWrapperHF.dataset_module_factory( path, revision=revision, @@ -1171,6 +1183,10 @@ class DatasetsWrapperHF: # -> the module from the python file in the dataset repository # - if path has one "/" and is dataset repository on the HF hub without a python file # -> use a packaged module (csv, text etc.) based on content of the repository + if trust_remote_code: + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure ' + 'that you can trust the external codes.' + ) # Try packaged if path in _PACKAGED_DATASETS_MODULES: diff --git a/modelscope/pipelines/accelerate/vllm.py b/modelscope/pipelines/accelerate/vllm.py index 15ced4bb..9cadb979 100644 --- a/modelscope/pipelines/accelerate/vllm.py +++ b/modelscope/pipelines/accelerate/vllm.py @@ -1,8 +1,11 @@ from typing import List, Union +from modelscope import get_logger from modelscope.pipelines.accelerate.base import InferFramework from modelscope.utils.import_utils import is_vllm_available +logger = get_logger() + class Vllm(InferFramework): @@ -27,6 +30,9 @@ class Vllm(InferFramework): if not Vllm.check_gpu_compatibility(8) and (dtype in ('bfloat16', 'auto')): dtype = 'float16' + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {self.model_dir}. Please make ' + 'sure that you can trust the external codes.') self.model = LLM( self.model_dir, dtype=dtype, diff --git a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py index f19eddff..cdce09a1 100644 --- a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py +++ b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Union import torch -from modelscope import AutoModelForCausalLM +from modelscope import AutoModelForCausalLM, get_logger from modelscope.metainfo import Pipelines, Preprocessors from modelscope.models.base import Model from modelscope.outputs import OutputKeys @@ -13,6 +13,8 @@ from modelscope.pipelines.multi_modal.visual_question_answering_pipeline import from modelscope.preprocessors import Preprocessor, load_image from modelscope.utils.constant import Fields, Frameworks, Tasks +logger = get_logger() + @PIPELINES.register_module( Tasks.visual_question_answering, module_name='ovis-vl') @@ -35,6 +37,9 @@ class VisionChatPipeline(VisualQuestionAnsweringPipeline): torch_dtype = kwargs.get('torch_dtype', torch.float16) multimodal_max_length = kwargs.get('multimodal_max_length', 8192) self.device = 'cuda' if device == 'gpu' else device + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model}. Please make ' + 'sure that you can trust the external codes.') self.model = AutoModelForCausalLM.from_pretrained( model, torch_dtype=torch_dtype, diff --git a/modelscope/pipelines/nlp/llm_pipeline.py b/modelscope/pipelines/nlp/llm_pipeline.py index 7ad0d278..269e8a42 100644 --- a/modelscope/pipelines/nlp/llm_pipeline.py +++ b/modelscope/pipelines/nlp/llm_pipeline.py @@ -97,6 +97,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): assert base_model is not None, 'Cannot get adapter_cfg.model_id_or_path from configuration.json file.' revision = self.cfg.safe_get('adapter_cfg.model_revision', 'master') + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {base_model}. Please make sure that you can ' + 'trust the external codes.') base_model = Model.from_pretrained( base_model, revision, @@ -134,6 +137,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): model) else snapshot_download(model) # TODO: Temporary use of AutoModelForCausalLM # Need to be updated into a universal solution + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.') model = AutoModelForCausalLM.from_pretrained( model_dir, device_map=self.device_map, @@ -173,6 +179,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): self.llm_framework = llm_framework if os.path.exists(kwargs['model']): + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {kwargs["model"]}. Please make sure ' + 'that you can trust the external codes.') config = AutoConfig.from_pretrained( kwargs['model'], trust_remote_code=True) q_config = config.__dict__.get('quantization_config', None) @@ -423,6 +432,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin): model_dir = self.model.model_dir if tokenizer_class is None: tokenizer_class = AutoTokenizer + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.') return tokenizer_class.from_pretrained( model_dir, trust_remote_code=True) diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index 55eaf809..374381cf 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -269,6 +269,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): if use_bf16: default_torch_dtype = torch.bfloat16 torch_dtype = kwargs.get('torch_dtype', default_torch_dtype) + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.') model = Model.from_pretrained( model_dir, trust_remote_code=True, @@ -285,6 +288,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): self.model = model self.model.eval() + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {self.model.model_dir}. Please ' + 'make sure that you can trust the external codes.') self.tokenizer = AutoTokenizer.from_pretrained( self.model.model_dir, trust_remote_code=True) @@ -328,6 +334,9 @@ class QWenChatPipeline(Pipeline): bf16 = False if isinstance(model, str): + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model}. Please make sure ' + 'that you can trust the external codes.') self.tokenizer = AutoTokenizer.from_pretrained( model, revision=revision, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained( @@ -392,6 +401,9 @@ class QWenTextGenerationPipeline(Pipeline): bf16 = False if isinstance(model, str): + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model}. Please make sure ' + 'that you can trust the external codes.') self.model = AutoModelForCausalLM.from_pretrained( model, device_map=device_map, diff --git a/modelscope/preprocessors/templates/loader.py b/modelscope/preprocessors/templates/loader.py index 8943f25d..8dac8d41 100644 --- a/modelscope/preprocessors/templates/loader.py +++ b/modelscope/preprocessors/templates/loader.py @@ -820,6 +820,9 @@ class TemplateLoader: model_id, revision=kwargs.pop('revision', 'master'), ignore_file_pattern=ignore_file_pattern) + logger.warning(f'Use trust_remote_code=True. Will invoke codes from {model_dir}.' + ' Please make sure that you can trust the external codes.' + ) tokenizer = AutoTokenizer.from_pretrained( model_dir, trust_remote_code=True) config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) diff --git a/modelscope/utils/automodel_utils.py b/modelscope/utils/automodel_utils.py index eb4aa6c8..9bf1459e 100644 --- a/modelscope/utils/automodel_utils.py +++ b/modelscope/utils/automodel_utils.py @@ -3,12 +3,15 @@ import os from types import MethodType from typing import Any, Optional +from modelscope import get_logger from modelscope.metainfo import Tasks from modelscope.utils.ast_utils import INDEX_KEY from modelscope.utils.import_utils import (LazyImportModule, is_torch_available, is_transformers_available) +logger = get_logger() + def can_load_by_ms(model_dir: str, task_name: Optional[str], model_type: Optional[str]) -> bool: @@ -91,6 +94,9 @@ def get_hf_automodel_class(model_dir: str, if not os.path.exists(config_path): return None try: + logger.warning( + f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.') config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) if task_name is None: automodel_class = get_default_automodel(config) diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py index 1f191a8d..671b1ea6 100644 --- a/modelscope/utils/plugins.py +++ b/modelscope/utils/plugins.py @@ -451,6 +451,9 @@ def register_plugins_repo(plugins: List[str]) -> None: def register_modelhub_repo(model_dir, allow_remote=False) -> None: """ Try to install and import remote model from modelhub""" if allow_remote: + logger.warning( + f'Use allow_remote=True. Will invoke codes from {model_dir}. Please make sure ' + 'that you can trust the external codes.') try: import_module_from_model_dir(model_dir) except KeyError: