diff --git a/modelscope/msdatasets/data_loader/data_loader_manager.py b/modelscope/msdatasets/data_loader/data_loader_manager.py index 504f3da6..a9e58b7c 100644 --- a/modelscope/msdatasets/data_loader/data_loader_manager.py +++ b/modelscope/msdatasets/data_loader/data_loader_manager.py @@ -57,6 +57,7 @@ class LocalDataLoaderManager(DataLoaderManager): cache_root_dir = self.dataset_context_config.cache_root_dir download_mode = self.dataset_context_config.download_mode use_streaming = self.dataset_context_config.use_streaming + trust_remote_code = self.dataset_context_config.trust_remote_code input_config_kwargs = self.dataset_context_config.config_kwargs # load local single file @@ -81,7 +82,7 @@ class LocalDataLoaderManager(DataLoaderManager): cache_dir=cache_root_dir, download_mode=download_mode.value, streaming=use_streaming, - ignore_verifications=True, + trust_remote_code=trust_remote_code, **input_config_kwargs) raise f'Expected local data loader type: {LocalDataLoaderType.HF_DATA_LOADER.value}.' @@ -118,7 +119,6 @@ class RemoteDataLoaderManager(DataLoaderManager): data_files=data_files, download_mode=download_mode_val, streaming=use_streaming, - ignore_verifications=True, trust_remote_code=trust_remote_code, **input_config_kwargs) # download statistics diff --git a/modelscope/msdatasets/utils/hf_datasets_util.py b/modelscope/msdatasets/utils/hf_datasets_util.py index 3fb996ac..8bd768dc 100644 --- a/modelscope/msdatasets/utils/hf_datasets_util.py +++ b/modelscope/msdatasets/utils/hf_datasets_util.py @@ -874,7 +874,6 @@ class DatasetsWrapperHF: download_config: Optional[DownloadConfig] = None, download_mode: Optional[Union[DownloadMode, str]] = None, verification_mode: Optional[Union[VerificationMode, str]] = None, - ignore_verifications='deprecated', keep_in_memory: Optional[bool] = None, save_infos: bool = False, revision: Optional[Union[str, Version]] = None, @@ -897,14 +896,6 @@ class DatasetsWrapperHF: FutureWarning, ) token = use_auth_token - if ignore_verifications != 'deprecated': - verification_mode = VerificationMode.NO_CHECKS if ignore_verifications else VerificationMode.ALL_CHECKS - warnings.warn( - "'ignore_verifications' was deprecated in favor of 'verification_mode' " - 'in version 2.9.1 and will be removed in 3.0.0.\n' - f"You can remove this warning by passing 'verification_mode={verification_mode.value}' instead.", - FutureWarning, - ) if task != 'deprecated': warnings.warn( "'task' was deprecated in version 2.13.0 and will be removed in 3.0.0.\n", diff --git a/tests/msdatasets/test_general_datasets.py b/tests/msdatasets/test_general_datasets.py index d32daeb9..0792cf46 100644 --- a/tests/msdatasets/test_general_datasets.py +++ b/tests/msdatasets/test_general_datasets.py @@ -107,6 +107,42 @@ class GeneralMsDatasetTest(unittest.TestCase): assert data_sample['video_id'][0] assert os.path.exists(data_sample['video_id:FILE'][0]) + @unittest.skipUnless(test_level() >= TEST_INNER_LEVEL, + 'skip test in current test level') + def test_local_py_script(self): + # Download the dataset files to temp directory + from tempfile import TemporaryDirectory + py_script_url = 'https://modelscope.cn/datasets/wangxingjun778/glue_test/resolve/master/glue_test.py' + with TemporaryDirectory() as tmp_dir: + os.makedirs(tmp_dir, exist_ok=True) + os.system(f'wget -P {tmp_dir} {py_script_url}') + py_script_file = os.path.join(tmp_dir, 'glue_test.py') + assert os.path.exists(py_script_file), f'File not found: {py_script_file}, ' \ + f'please check the url: {py_script_url}' + + # Load the dataset + ds = MsDataset.load(py_script_file, subset_name='cola', split='train') + sample = next(iter(ds)) + logger.info(f'>>output of test_local_py_script:\n {sample}') + assert sample + + @unittest.skipUnless(test_level() >= TEST_INNER_LEVEL, + 'skip test in current test level') + def test_local_img_folder(self): + # Download the dataset files to temp directory + from tempfile import TemporaryDirectory + img_url = 'https://modelscope.cn/datasets/wangxingjun778/test_img_dataset/resolve/master/data/train/' \ + '000000573258.jpg' + with TemporaryDirectory() as tmp_dir: + os.makedirs(tmp_dir, exist_ok=True) + os.system(f'wget -P {tmp_dir} {img_url}') + + # Load the local image folder + ds = MsDataset.load('imagefolder', data_dir=tmp_dir) + sample = next(iter(ds)) + logger.info(f'>>output of test_local_img_folder:\n {sample}') + assert sample + if __name__ == '__main__': unittest.main()