Fix trust_remote_code (#1462)

1. Set `trust_remote_code` to `True` by default in datasets module
2. Set `trust_remote_code` to `True` by default in PolyLM pipeline
This commit is contained in:
Xingjun.Wang
2025-08-14 10:56:16 +08:00
committed by GitHub
parent e802630865
commit 7d11b77112
6 changed files with 19 additions and 15 deletions

View File

@@ -21,13 +21,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
fi
fi
pip install -r requirements/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/multi-modal.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/science.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -U sentence_transformers
python -m spacy download en_core_web_sm
pip install faiss-gpu
pip install healpy

View File

@@ -5,6 +5,7 @@ from __future__ import (absolute_import, division, print_function,
import datetime
import os
import shutil
import sys
import wave
import zipfile
@@ -60,6 +61,8 @@ class SambertHifigan(Model):
raise TtsVoiceNotExistsException(
'modelscope error: voices is empty in voices.json')
# initialize frontend
if sys.version_info >= (3, 11):
raise ImportError('Python version needs to be <= 3.10')
import ttsfrd
frontend = ttsfrd.TtsFrontendEngine()
zip_file = os.path.join(model_dir, 'resource.zip')

View File

@@ -9,7 +9,6 @@ from modelscope.metainfo import Models
from modelscope.models.base import Tensor, TorchModel
from modelscope.models.builder import MODELS
from modelscope.utils.constant import Tasks
from modelscope.utils.hub import read_config
from modelscope.utils.logger import get_logger
from modelscope.utils.streaming_output import StreamingOutputMixin
@@ -30,11 +29,17 @@ class PolyLMForTextGeneration(TorchModel, StreamingOutputMixin):
super().__init__(model_dir, *args, **kwargs)
self.tokenizer = AutoTokenizer.from_pretrained(
model_dir, legacy=False, use_fast=False)
logger.warning(
self.check_trust_remote_code(
info_str=
f'Use trust_remote_code=True. Will invoke codes from {model_dir}. Please make sure '
'that you can trust the external codes.')
'that you can trust the external codes.',
model_dir=model_dir)
self.model = AutoModelForCausalLM.from_pretrained(
model_dir, device_map='auto', trust_remote_code=True)
model_dir,
device_map='auto',
trust_remote_code=self.trust_remote_code)
self.model.eval()
def forward(self, input: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:

View File

@@ -171,7 +171,7 @@ class MsDataset:
custom_cfg: Optional[Config] = Config(),
token: Optional[str] = None,
dataset_info_only: Optional[bool] = False,
trust_remote_code: Optional[bool] = True,
trust_remote_code: Optional[bool] = False,
**config_kwargs,
) -> Union[dict, 'MsDataset', NativeIterableDataset]:
"""Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset.
@@ -202,7 +202,7 @@ class MsDataset:
see https://modelscope.cn/docs/Configuration%E8%AF%A6%E8%A7%A3
token (str, Optional): SDK token of ModelScope.
dataset_info_only (bool, Optional): If set to True, only return the dataset config and info (dict).
trust_remote_code (bool, Optional): If set to True, trust the remote code.
trust_remote_code (bool, Optional): If set to True, trust the remote code. Default to `False`.
**config_kwargs (additional keyword arguments): Keyword arguments to be passed
Returns:

View File

@@ -940,7 +940,7 @@ class DatasetsWrapperHF:
streaming: bool = False,
num_proc: Optional[int] = None,
storage_options: Optional[Dict] = None,
trust_remote_code: bool = True,
trust_remote_code: bool = False,
dataset_info_only: Optional[bool] = False,
**config_kwargs,
) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset,

View File

@@ -45,7 +45,9 @@ class TestTtsTrainer(unittest.TestCase):
shutil.rmtree(self.tmp_dir, ignore_errors=True)
super().tearDown()
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipUnless(
test_level() >= 2,
'skip test because the ci test python version is higher then 3.10')
def test_trainer(self):
kwargs = dict(
model=self.model_id,