warning parsing trust_remote_code=True

2026-02-24 04:01:10 +01:00 · 2025-01-07 21:04:29 +08:00
parent d361038e7e
commit 0a643276ca
12 changed files with 81 additions and 4 deletions
--- a/modelscope/models/nlp/hf_transformers/backbone.py
+++ b/modelscope/models/nlp/hf_transformers/backbone.py
@@ -99,6 +99,9 @@ class TransformersModel(TorchModel, PreTrainedModel):
            return model

        # return the model only
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {model_dir}.')
        config, kwargs = AutoConfig.from_pretrained(
            model_dir,
            return_unused_kwargs=True,
--- a/modelscope/models/nlp/polylm/text_generation.py
+++ b/modelscope/models/nlp/polylm/text_generation.py
@@ -27,6 +27,9 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin):
        super().__init__(model_dir, *args, **kwargs)
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_dir, legacy=False, use_fast=False)
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {model_dir}.')
        self.model = AutoModelForCausalLM.from_pretrained(
            model_dir, device_map='auto', trust_remote_code=True)
        self.model.eval()
--- a/modelscope/msdatasets/data_loader/data_loader.py
+++ b/modelscope/msdatasets/data_loader/data_loader.py
@@ -133,6 +133,11 @@ class OssDownloader(BaseDownloader):
            raise f'meta-file: {dataset_name}.py not found on the modelscope hub.'

        if dataset_py_script and dataset_formation == DatasetFormations.hf_compatible:
+            if trust_remote_code:
+                logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                               ' and used from the remote repo. Please make sure that'
+                               f' the remote code content is what you need  {dataset_name}.')
+
            self.dataset = hf_load_dataset(
                dataset_py_script,
                name=subset_name,
--- a/modelscope/msdatasets/data_loader/data_loader_manager.py
+++ b/modelscope/msdatasets/data_loader/data_loader_manager.py
@@ -71,6 +71,11 @@ class LocalDataLoaderManager(DataLoaderManager):
        # Select local data loader
        # TODO: more loaders to be supported.
        if data_loader_type == LocalDataLoaderType.HF_DATA_LOADER:
+            if trust_remote_code:
+                logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                               ' and used from the remote repo. Please make sure that'
+                               f' the remote code content is what you need  {dataset_name}.')
+
            # Build huggingface data loader and return dataset.
            return hf_data_loader(
                dataset_name,
@@ -110,6 +115,10 @@ class RemoteDataLoaderManager(DataLoaderManager):

        # To use the huggingface data loader
        if data_loader_type == RemoteDataLoaderType.HF_DATA_LOADER:
+            if trust_remote_code:
+                logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                               ' and used from the remote repo. Please make sure that'
+                               f' the remote code content is what you need  {dataset_name}.')
            dataset_ret = hf_data_loader(
                dataset_name,
                name=subset_name,
--- a/modelscope/msdatasets/ms_dataset.py
+++ b/modelscope/msdatasets/ms_dataset.py
@@ -237,6 +237,11 @@ class MsDataset:
            if not namespace or not dataset_name:
                raise 'The dataset_name should be in the form of `namespace/dataset_name` or `dataset_name`.'

+        if trust_remote_code:
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {dataset_name}.')
+
        # Init context config
        dataset_context_config = DatasetContextConfig(
            dataset_name=dataset_name,
--- a/modelscope/msdatasets/utils/hf_datasets_util.py
+++ b/modelscope/msdatasets/utils/hf_datasets_util.py
@@ -936,6 +936,11 @@ class DatasetsWrapperHF:
            verification_mode or VerificationMode.BASIC_CHECKS
        ) if not save_infos else VerificationMode.ALL_CHECKS)

+        if trust_remote_code:
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {path}.')
+
        # Create a dataset builder
        builder_instance = DatasetsWrapperHF.load_dataset_builder(
            path=path,
@@ -1063,6 +1068,11 @@ class DatasetsWrapperHF:
            ) if download_config else DownloadConfig()
            download_config.storage_options.update(storage_options)

+        if trust_remote_code:
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {path}.')
+
        dataset_module = DatasetsWrapperHF.dataset_module_factory(
            path,
            revision=revision,
@@ -1173,6 +1183,10 @@ class DatasetsWrapperHF:
        #   -> the module from the python file in the dataset repository
        # - if path has one "/" and is dataset repository on the HF hub without a python file
        #   -> use a packaged module (csv, text etc.) based on content of the repository
+        if trust_remote_code:
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {path}.')

        # Try packaged
        if path in _PACKAGED_DATASETS_MODULES:
--- a/modelscope/pipelines/accelerate/vllm.py
+++ b/modelscope/pipelines/accelerate/vllm.py
@@ -2,7 +2,8 @@ from typing import List, Union

 from modelscope.pipelines.accelerate.base import InferFramework
 from modelscope.utils.import_utils import is_vllm_available
-
+from modelscope import get_logger
+logger = get_logger()

 class Vllm(InferFramework):

@@ -27,6 +28,9 @@ class Vllm(InferFramework):
        if not Vllm.check_gpu_compatibility(8) and (dtype
                                                    in ('bfloat16', 'auto')):
            dtype = 'float16'
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {self.model_dir}.')
        self.model = LLM(
            self.model_dir,
            dtype=dtype,
--- a/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py
+++ b/modelscope/pipelines/multi_modal/ovis_vl_pipeline.py
@@ -2,7 +2,7 @@ from typing import Any, Dict, Union

 import torch

-from modelscope import AutoModelForCausalLM
+from modelscope import AutoModelForCausalLM, get_logger
 from modelscope.metainfo import Pipelines, Preprocessors
 from modelscope.models.base import Model
 from modelscope.outputs import OutputKeys
@@ -12,7 +12,7 @@ from modelscope.pipelines.multi_modal.visual_question_answering_pipeline import
    VisualQuestionAnsweringPipeline
 from modelscope.preprocessors import Preprocessor, load_image
 from modelscope.utils.constant import Fields, Frameworks, Tasks
-
+logger = get_logger()

@PIPELINES.register_module(
    Tasks.visual_question_answering, module_name='ovis-vl')
@@ -35,6 +35,9 @@ class VisionChatPipeline(VisualQuestionAnsweringPipeline):
        torch_dtype = kwargs.get('torch_dtype', torch.float16)
        multimodal_max_length = kwargs.get('multimodal_max_length', 8192)
        self.device = 'cuda' if device == 'gpu' else device
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {model}.')
        self.model = AutoModelForCausalLM.from_pretrained(
            model,
            torch_dtype=torch_dtype,
--- a/modelscope/pipelines/nlp/llm_pipeline.py
+++ b/modelscope/pipelines/nlp/llm_pipeline.py
@@ -97,6 +97,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
            assert base_model is not None, 'Cannot get adapter_cfg.model_id_or_path from configuration.json file.'
            revision = self.cfg.safe_get('adapter_cfg.model_revision',
                                         'master')
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {base_model}.')
            base_model = Model.from_pretrained(
                base_model,
                revision,
@@ -134,6 +137,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
                    model) else snapshot_download(model)
                # TODO: Temporary use of AutoModelForCausalLM
                # Need to be updated into a universal solution
+                logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                               ' and used from the remote repo. Please make sure that'
+                               f' the remote code content is what you need  {model_dir}.')
                model = AutoModelForCausalLM.from_pretrained(
                    model_dir,
                    device_map=self.device_map,
@@ -173,6 +179,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
        self.llm_framework = llm_framework

        if os.path.exists(kwargs['model']):
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {kwargs['model']}.')
            config = AutoConfig.from_pretrained(
                kwargs['model'], trust_remote_code=True)
            q_config = config.__dict__.get('quantization_config', None)
@@ -423,6 +432,9 @@ class LLMPipeline(Pipeline, PipelineStreamingOutputMixin):
            model_dir = self.model.model_dir
        if tokenizer_class is None:
            tokenizer_class = AutoTokenizer
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {model_dir}.')
        return tokenizer_class.from_pretrained(
            model_dir, trust_remote_code=True)

--- a/modelscope/pipelines/nlp/text_generation_pipeline.py
+++ b/modelscope/pipelines/nlp/text_generation_pipeline.py
@@ -269,6 +269,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline):
            if use_bf16:
                default_torch_dtype = torch.bfloat16
            torch_dtype = kwargs.get('torch_dtype', default_torch_dtype)
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {model_dir}.')
            model = Model.from_pretrained(
                model_dir,
                trust_remote_code=True,
@@ -285,6 +288,9 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline):

        self.model = model
        self.model.eval()
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {self.model.model_dir}.')
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model.model_dir, trust_remote_code=True)

@@ -328,6 +334,9 @@ class QWenChatPipeline(Pipeline):
            bf16 = False

        if isinstance(model, str):
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {model}.')
            self.tokenizer = AutoTokenizer.from_pretrained(
                model, revision=revision, trust_remote_code=True)
            self.model = AutoModelForCausalLM.from_pretrained(
@@ -392,6 +401,9 @@ class QWenTextGenerationPipeline(Pipeline):
            bf16 = False

        if isinstance(model, str):
+            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                           ' and used from the remote repo. Please make sure that'
+                           f' the remote code content is what you need  {model}.')
            self.model = AutoModelForCausalLM.from_pretrained(
                model,
                device_map=device_map,
--- a/modelscope/preprocessors/templates/loader.py
+++ b/modelscope/preprocessors/templates/loader.py
@@ -820,6 +820,9 @@ class TemplateLoader:
                                model_id,
                                revision=kwargs.pop('revision', 'master'),
                                ignore_file_pattern=ignore_file_pattern)
+                            logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                                           ' and used from the remote repo. Please make sure that'
+                                           f' the remote code content is what you need  {model_dir}.')
                            tokenizer = AutoTokenizer.from_pretrained(
                                model_dir, trust_remote_code=True)
                            config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
--- a/modelscope/utils/automodel_utils.py
+++ b/modelscope/utils/automodel_utils.py
@@ -8,7 +8,8 @@ from modelscope.utils.ast_utils import INDEX_KEY
 from modelscope.utils.import_utils import (LazyImportModule,
                                           is_torch_available,
                                           is_transformers_available)
-
+from modelscope import get_logger
+logger = get_logger()

 def can_load_by_ms(model_dir: str, task_name: Optional[str],
                   model_type: Optional[str]) -> bool:
@@ -91,6 +92,9 @@ def get_hf_automodel_class(model_dir: str,
    if not os.path.exists(config_path):
        return None
    try:
+        logger.warning('Use trust_remote_code=True. The code will be downloaded'
+                       ' and used from the remote repo. Please make sure that'
+                       f' the remote code content is what you need  {model_dir}.')
        config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
        if task_name is None:
            automodel_class = get_default_automodel(config)