logger.warning when using remote code (#1171)

* logger warning when using remote code Co-authored-by: suluyan <suluyan.sly@alibaba-inc.com>
2026-02-24 12:10:09 +01:00 · 2025-01-10 15:38:05 +08:00
parent 768f953b9d
commit cc6fbbab5f
12 changed files with 89 additions and 1 deletions
--- a/modelscope/models/nlp/polylm/text_generation.py
+++ b/modelscope/models/nlp/polylm/text_generation.py
@@ -10,8 +10,11 @@ from modelscope.models.base import Tensor, TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.utils.constant import Tasks
 from modelscope.utils.hub import read_config
+from modelscope.utils.logger import get_logger
 from modelscope.utils.streaming_output import StreamingOutputMixin

+logger = get_logger()
+
 __all__ = ['PolyLMForTextGeneration']


@@ -27,6 +30,9 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin):
        super().__init__(model_dir, *args, **kwargs)
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_dir, legacy=False, use_fast=False)
+        logger.warning(
+            f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
+            'that you can trust the external codes.')
        self.model = AutoModelForCausalLM.from_pretrained(
            model_dir, device_map='auto', trust_remote_code=True)
        self.model.eval()
--- a/modelscope/msdatasets/data_loader/data_loader.py
+++ b/modelscope/msdatasets/data_loader/data_loader.py
@@ -133,6 +133,11 @@ class OssDownloader(BaseDownloader):
            raise f'meta-file: {dataset_name}.py not found on the modelscope hub.'

        if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible:
+            if trust_remote_code:
+                logger.warning(
+                    f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make '
+                    'sure that you can trust the external codes.')
+
            self.dataset = hf_load_dataset(
                dataset_py_script,
                name=subset_name,
--- a/modelscope/msdatasets/data_loader/data_loader_manager.py
+++ b/modelscope/msdatasets/data_loader/data_loader_manager.py
@@ -71,6 +71,11 @@ class LocalDataLoaderManager(DataLoaderManager):
        # Select local data loader
        # TODO: more loaders to be supported.
        if data_loader_type == LocalDataLoaderType.HF_DATA_LOADER:
+            if trust_remote_code:
+                logger.warning(
+                    f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make '
+                    'sure that you can trust the external codes.')
+
            # Build huggingface data loader and return dataset.
            return hf_data_loader(
                dataset_name,
@@ -110,6 +115,10 @@ class RemoteDataLoaderManager(DataLoaderManager):

        # To use the huggingface data loader
        if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER:
+            if trust_remote_code:
+                logger.warning(
+                    f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make '
+                    'sure that you can trust the external codes.')
            dataset_ret = hf_data_loader(
                dataset_name,
                name=subset_name,
--- a/modelscope/msdatasets/ms_dataset.py
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -237,6 +237,11 @@ class MsDataset:
            if not namespace or not dataset_name:
                raise 'The dataset_name should be in the form of `namespace/dataset_name` or `dataset_name`.'

+        if trust_remote_code:
+            logger.warning(
+                f'Use trust_remote_code=True. Will invoke codes from {dataset_name}. Please make sure that '
+                'you can trust the external codes.')
+
        # Init context config
        dataset_context_config = DatasetContextConfig(
            dataset_name=dataset_name,
--- a/modelscope/msdatasets/utils/hf_datasets_util.py
+++ b/modelscope/msdatasets/utils/hf_datasets_util.py
@@ -835,6 +835,8 @@ def get_module_with_script(self) -> DatasetModule:
    if not os.path.exists(importable_file_path):
        trust_remote_code = resolve_trust_remote_code(trust_remote_code=self.trust_remote_code, repo_id=self.name)
        if trust_remote_code:
+            logger.warning(f'Use trust_remote_code=True. Will invoke codes from {repo_id}. Please make sure that '
+                           'you can trust the external codes.')
            _create_importable_file(
                local_path=local_script_path,
                local_imports=local_imports,
@@ -934,6 +936,11 @@ class DatasetsWrapperHF:
            verification_mode or VerificationMode.BASIC_CHECKS
        ) if not save_infos else VerificationMode.ALL_CHECKS)

+        if trust_remote_code:
+            logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure '
+                           'that you can trust the external codes.'
+                           )
+
        # Create a dataset builder
        builder_instance = DatasetsWrapperHF.load_dataset_builder(
            path=path,
@@ -1061,6 +1068,11 @@ class DatasetsWrapperHF:
            ) if download_config else DownloadConfig()
            download_config.storage_options.update(storage_options)

+        if trust_remote_code:
+            logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure '
+                           'that you can trust the external codes.'
+                           )
+
        dataset_module = DatasetsWrapperHF.dataset_module_factory(
            path,
            revision=revision,
@@ -1171,6 +1183,10 @@ class DatasetsWrapperHF:
        #   -> the module from the python file in the dataset repository
        # - if path has one "/" and is dataset repository on the HF hub without a python file
        #   -> use a packaged module (csv, text etc.) based on content of the repository
+        if trust_remote_code:
+            logger.warning(f'Use trust_remote_code=True. Will invoke codes from {path}. Please make sure '
+                           'that you can trust the external codes.'
+                           )

        # Try packaged
        if path in _PACKAGED_DATASETS_MODULES:
--- a/modelscope/pipelines/accelerate/vllm.py
+++ b/modelscope/pipelines/accelerate/vllm.py
@@ -1,8 +1,11 @@
 from typing import List, Union

+from modelscope import get_logger
 from modelscope.pipelines.accelerate.base import InferFramework
 from modelscope.utils.import_utils import is_vllm_available

+logger = get_logger()
+

 class Vllm(InferFramework):

@@ -27,6 +30,9 @@ class Vllm(InferFramework):
        if not Vllm.check_gpu_compatibility(8) and (dtype
                                                    in ('bfloat16', 'auto')):
            dtype = 'float16'
+        logger.warning(
+            f'Use trust_remote_code=True. Will invoke codes from {self.model_dir}. Please make '
+            'sure that you can trust the external codes.')
        self.model = LLM(
            self.model_dir,
            dtype=dtype,
--- a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py
+++ b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py
@@ -2,7 +2,7 @@ from typing import Any, Dict, Union

 import torch

-from modelscope import AutoModelForCausalLM
+from modelscope import AutoModelForCausalLM, get_logger
 from modelscope.metainfo import Pipelines, Preprocessors
 from modelscope.models.base import Model
 from modelscope.outputs import OutputKeys
@@ -13,6 +13,8 @@ from modelscope.pipelines.multi_modal.visual_question_answering_pipeline import
 from modelscope.preprocessors import Preprocessor, load_image
 from modelscope.utils.constant import Fields, Frameworks, Tasks

+logger = get_logger()
+

@PIPELINES.register_module(
    Tasks.visual_question_answering, module_name='ovis-vl')
@@ -35,6 +37,9 @@ class VisionChatPipeline(VisualQuestionAnsweringPipeline):
        torch_dtype = kwargs.get('torch_dtype', torch.float16)
        multimodal_max_length = kwargs.get('multimodal_max_length', 8192)
        self.device = 'cuda' if device == 'gpu' else device
+        logger.warning(
+            f'Use trust_remote_code=True. Will invoke codes from {model}. Please make '
+            'sure that you can trust the external codes.')
        self.model = AutoModelForCausalLM.from_pretrained(
            model,
            torch_dtype=torch_dtype,
--- a/modelscope/pipelines/nlp/llm_pipeline.py
+++ b/modelscope/pipelines/nlp/llm_pipeline.py
@@ -97,6 +97,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
            assert base_model is not None, 'Cannot get adapter_cfg.model_id_or_path from configuration.json file.'
            revision = self.cfg.safe_get('adapter_cfg.model_revision',
                                         'master')
+            logger.warning(
+                f'Use trust_remote_code=True. Will invoke codes from {base_model}. Please make sure that you can '
+                'trust the external codes.')
            base_model = Model.from_pretrained(
                base_model,
                revision,
@@ -134,6 +137,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
                    model) else snapshot_download(model)
                # TODO: Temporary use of AutoModelForCausalLM
                # Need to be updated into a universal solution
+                logger.warning(
+                    f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
+                    'that you can trust the external codes.')
                model = AutoModelForCausalLM.from_pretrained(
                    model_dir,
                    device_map=self.device_map,
@@ -173,6 +179,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
        self.llm_framework = llm_framework

        if os.path.exists(kwargs['model']):
+            logger.warning(
+                f'Use trust_remote_code=True. Will invoke codes from {kwargs["model"]}. Please make sure '
+                'that you can trust the external codes.')
            config = AutoConfig.from_pretrained(
                kwargs['model'], trust_remote_code=True)
            q_config = config.__dict__.get('quantization_config', None)
@@ -423,6 +432,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
            model_dir = self.model.model_dir
        if tokenizer_class is None:
            tokenizer_class = AutoTokenizer
+        logger.warning(
+            f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
+            'that you can trust the external codes.')
        return tokenizer_class.from_pretrained(
            model_dir, trust_remote_code=True)

--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -269,6 +269,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline):
            if use_bf16:
                default_torch_dtype = torch.bfloat16
            torch_dtype = kwargs.get('torch_dtype', default_torch_dtype)
+            logger.warning(
+                f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
+                'that you can trust the external codes.')
            model = Model.from_pretrained(
                model_dir,
                trust_remote_code=True,
@@ -285,6 +288,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline):

        self.model = model
        self.model.eval()
+        logger.warning(
+            f'Use trust_remote_code=True. Will invoke codes from {self.model.model_dir}. Please '
+            'make sure that you can trust the external codes.')
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model.model_dir, trust_remote_code=True)

@@ -328,6 +334,9 @@ class QWenChatPipeline(Pipeline):
            bf16 = False

        if isinstance(model, str):
+            logger.warning(
+                f'Use trust_remote_code=True. Will invoke codes from {model}. Please make sure '
+                'that you can trust the external codes.')
            self.tokenizer = AutoTokenizer.from_pretrained(
                model, revision=revision, trust_remote_code=True)
            self.model = AutoModelForCausalLM.from_pretrained(
@@ -392,6 +401,9 @@ class QWenTextGenerationPipeline(Pipeline):
            bf16 = False

        if isinstance(model, str):
+            logger.warning(
+                f'Use trust_remote_code=True. Will invoke codes from {model}. Please make sure '
+                'that you can trust the external codes.')
            self.model = AutoModelForCausalLM.from_pretrained(
                model,
                device_map=device_map,
--- a/modelscope/preprocessors/templates/loader.py
+++ b/modelscope/preprocessors/templates/loader.py
@@ -820,6 +820,9 @@ class TemplateLoader:
                                model_id,
                                revision=kwargs.pop('revision', 'master'),
                                ignore_file_pattern=ignore_file_pattern)
+                            logger.warning(f'Use trust_remote_code=True. Will invoke codes from {model_dir}.'
+                                           ' Please make sure that you can trust the external codes.'
+                                           )
                            tokenizer = AutoTokenizer.from_pretrained(
                                model_dir, trust_remote_code=True)
                            config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
--- a/modelscope/utils/automodel_utils.py
+++ b/modelscope/utils/automodel_utils.py
@@ -3,12 +3,15 @@ import os
 from types import MethodType
 from typing import Any, Optional

+from modelscope import get_logger
 from modelscope.metainfo import Tasks
 from modelscope.utils.ast_utils import INDEX_KEY
 from modelscope.utils.import_utils import (LazyImportModule,
                                           is_torch_available,
                                           is_transformers_available)

+logger = get_logger()
+

 def can_load_by_ms(model_dir: str, task_name: Optional[str],
                   model_type: Optional[str]) -> bool:
@@ -91,6 +94,9 @@ def get_hf_automodel_class(model_dir: str,
    if not os.path.exists(config_path):
        return None
    try:
+        logger.warning(
+            f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
+            'that you can trust the external codes.')
        config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
        if task_name is None:
            automodel_class = get_default_automodel(config)
--- a/modelscope/utils/plugins.py
+++ b/modelscope/utils/plugins.py
@@ -451,6 +451,9 @@ def register_plugins_repo(plugins: List[str]) -> None:
 def register_modelhub_repo(model_dir, allow_remote=False) -> None:
    """ Try to install and import remote model from modelhub"""
    if allow_remote:
+        logger.warning(
+            f'Use allow_remote=True. Will invoke codes from {model_dir}. Please make sure '
+            'that you can trust the external codes.')
        try:
            import_module_from_model_dir(model_dir)
        except KeyError: