ok Merge branch 'master' of github.com:modelscope/modelscope into release/1.19

This commit is contained in:
xingjun.wang
2024-10-15 12:42:00 +08:00
3 changed files with 38 additions and 11 deletions

View File

@@ -57,6 +57,7 @@ class LocalDataLoaderManager(DataLoaderManager):
cache_root_dir = self.dataset_context_config.cache_root_dir
download_mode = self.dataset_context_config.download_mode
use_streaming = self.dataset_context_config.use_streaming
trust_remote_code = self.dataset_context_config.trust_remote_code
input_config_kwargs = self.dataset_context_config.config_kwargs
# load local single file
@@ -81,7 +82,7 @@ class LocalDataLoaderManager(DataLoaderManager):
cache_dir=cache_root_dir,
download_mode=download_mode.value,
streaming=use_streaming,
ignore_verifications=True,
trust_remote_code=trust_remote_code,
**input_config_kwargs)
raise f'Expected local data loader type: {LocalDataLoaderType.HF_DATA_LOADER.value}.'
@@ -118,7 +119,6 @@ class RemoteDataLoaderManager(DataLoaderManager):
data_files=data_files,
download_mode=download_mode_val,
streaming=use_streaming,
ignore_verifications=True,
trust_remote_code=trust_remote_code,
**input_config_kwargs)
# download statistics

View File

@@ -874,7 +874,6 @@ class DatasetsWrapperHF:
download_config: Optional[DownloadConfig] = None,
download_mode: Optional[Union[DownloadMode, str]] = None,
verification_mode: Optional[Union[VerificationMode, str]] = None,
ignore_verifications='deprecated',
keep_in_memory: Optional[bool] = None,
save_infos: bool = False,
revision: Optional[Union[str, Version]] = None,
@@ -897,14 +896,6 @@ class DatasetsWrapperHF:
FutureWarning,
)
token = use_auth_token
if ignore_verifications != 'deprecated':
verification_mode = VerificationMode.NO_CHECKS if ignore_verifications else VerificationMode.ALL_CHECKS
warnings.warn(
"'ignore_verifications' was deprecated in favor of 'verification_mode' "
'in version 2.9.1 and will be removed in 3.0.0.\n'
f"You can remove this warning by passing 'verification_mode={verification_mode.value}' instead.",
FutureWarning,
)
if task != 'deprecated':
warnings.warn(
"'task' was deprecated in version 2.13.0 and will be removed in 3.0.0.\n",

View File

@@ -107,6 +107,42 @@ class GeneralMsDatasetTest(unittest.TestCase):
assert data_sample['video_id'][0]
assert os.path.exists(data_sample['video_id:FILE'][0])
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_local_py_script(self):
# Download the dataset files to temp directory
from tempfile import TemporaryDirectory
py_script_url = 'https://modelscope.cn/datasets/wangxingjun778/glue_test/resolve/master/glue_test.py'
with TemporaryDirectory() as tmp_dir:
os.makedirs(tmp_dir, exist_ok=True)
os.system(f'wget -P {tmp_dir} {py_script_url}')
py_script_file = os.path.join(tmp_dir, 'glue_test.py')
assert os.path.exists(py_script_file), f'File not found: {py_script_file}, ' \
f'please check the url: {py_script_url}'
# Load the dataset
ds = MsDataset.load(py_script_file, subset_name='cola', split='train')
sample = next(iter(ds))
logger.info(f'>>output of test_local_py_script:\n {sample}')
assert sample
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_local_img_folder(self):
# Download the dataset files to temp directory
from tempfile import TemporaryDirectory
img_url = 'https://modelscope.cn/datasets/wangxingjun778/test_img_dataset/resolve/master/data/train/' \
'000000573258.jpg'
with TemporaryDirectory() as tmp_dir:
os.makedirs(tmp_dir, exist_ok=True)
os.system(f'wget -P {tmp_dir} {img_url}')
# Load the local image folder
ds = MsDataset.load('imagefolder', data_dir=tmp_dir)
sample = next(iter(ds))
logger.info(f'>>output of test_local_img_folder:\n {sample}')
assert sample
if __name__ == '__main__':
unittest.main()