Fix trust_remote_code (#1462)

1. Set `trust_remote_code` to `True` by default in datasets module
2. Set `trust_remote_code` to `True` by default in PolyLM pipeline
This commit is contained in:
Xingjun.Wang
2025-08-14 10:56:16 +08:00
committed by GitHub
parent e802630865
commit 7d11b77112
6 changed files with 19 additions and 15 deletions

View File

@@ -21,13 +21,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
fi fi
fi fi
pip install -r requirements/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html pip install -U sentence_transformers
pip install -r requirements/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/multi-modal.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/science.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
python -m spacy download en_core_web_sm python -m spacy download en_core_web_sm
pip install faiss-gpu pip install faiss-gpu
pip install healpy pip install healpy

View File

@@ -5,6 +5,7 @@ from __future__ import (absolute_import, division, print_function,
import datetime import datetime
import os import os
import shutil import shutil
import sys
import wave import wave
import zipfile import zipfile
@@ -60,6 +61,8 @@ class SambertHifigan(Model):
raise TtsVoiceNotExistsException( raise TtsVoiceNotExistsException(
'modelscope error: voices is empty in voices.json') 'modelscope error: voices is empty in voices.json')
# initialize frontend # initialize frontend
if sys.version_info >= (3, 11):
raise ImportError('Python version needs to be <= 3.10')
import ttsfrd import ttsfrd
frontend = ttsfrd.TtsFrontendEngine() frontend = ttsfrd.TtsFrontendEngine()
zip_file = os.path.join(model_dir, 'resource.zip') zip_file = os.path.join(model_dir, 'resource.zip')

View File

@@ -9,7 +9,6 @@ from modelscope.metainfo import Models
from modelscope.models.base import Tensor, TorchModel from modelscope.models.base import Tensor, TorchModel
from modelscope.models.builder import MODELS from modelscope.models.builder import MODELS
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.hub import read_config
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from modelscope.utils.streaming_output import StreamingOutputMixin from modelscope.utils.streaming_output import StreamingOutputMixin
@@ -30,11 +29,17 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin):
super().__init__(model_dir, *args, **kwargs) super().__init__(model_dir, *args, **kwargs)
self.tokenizer = AutoTokenizer.from_pretrained( self.tokenizer = AutoTokenizer.from_pretrained(
model_dir, legacy=False, use_fast=False) model_dir, legacy=False, use_fast=False)
logger.warning(
self.check_trust_remote_code(
info_str=
f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure ' f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
'that you can trust the external codes.') 'that you can trust the external codes.',
model_dir=model_dir)
self.model = AutoModelForCausalLM.from_pretrained( self.model = AutoModelForCausalLM.from_pretrained(
model_dir, device_map='auto', trust_remote_code=True) model_dir,
device_map='auto',
trust_remote_code=self.trust_remote_code)
self.model.eval() self.model.eval()
def forward(self, input: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]: def forward(self, input: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:

View File

@@ -171,7 +171,7 @@ class MsDataset:
custom_cfg: Optional[Config] = Config(), custom_cfg: Optional[Config] = Config(),
token: Optional[str] = None, token: Optional[str] = None,
dataset_info_only: Optional[bool] = False, dataset_info_only: Optional[bool] = False,
trust_remote_code: Optional[bool] = True, trust_remote_code: Optional[bool] = False,
**config_kwargs, **config_kwargs,
) -> Union[dict, 'MsDataset', NativeIterableDataset]: ) -> Union[dict, 'MsDataset', NativeIterableDataset]:
"""Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
@@ -202,7 +202,7 @@ class MsDataset:
see https://modelscope.cn/docs/Configuration%E8%AF%A6%E8%A7%A3 see https://modelscope.cn/docs/Configuration%E8%AF%A6%E8%A7%A3
token (str, Optional): SDK token of ModelScope. token (str, Optional): SDK token of ModelScope.
dataset_info_only (bool, Optional): If set to True, only return the dataset config and info (dict). dataset_info_only (bool, Optional): If set to True, only return the dataset config and info (dict).
trust_remote_code (bool, Optional): If set to True, trust the remote code. trust_remote_code (bool, Optional): If set to True, trust the remote code. Default to `False`.
**config_kwargs (additional keyword arguments): Keyword arguments to be passed **config_kwargs (additional keyword arguments): Keyword arguments to be passed
Returns: Returns:

View File

@@ -940,7 +940,7 @@ class DatasetsWrapperHF:
streaming: bool = False, streaming: bool = False,
num_proc: Optional[int] = None, num_proc: Optional[int] = None,
storage_options: Optional[Dict] = None, storage_options: Optional[Dict] = None,
trust_remote_code: bool = True, trust_remote_code: bool = False,
dataset_info_only: Optional[bool] = False, dataset_info_only: Optional[bool] = False,
**config_kwargs, **config_kwargs,
) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset, ) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset,

View File

@@ -45,7 +45,9 @@ class TestTtsTrainer(unittest.TestCase):
shutil.rmtree(self.tmp_dir, ignore_errors=True) shutil.rmtree(self.tmp_dir, ignore_errors=True)
super().tearDown() super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @unittest.skipUnless(
test_level() >= 2,
'skip test because the ci test python version is higher then 3.10')
def test_trainer(self): def test_trainer(self):
kwargs = dict( kwargs = dict(
model=self.model_id, model=self.model_id,