Files
modelscope/tests/msdatasets/test_general_datasets.py
Xingjun.Wang 1a66f069c4 Dataset refactor (#807)
* add main entry in ms_dataset

* update func get_data_patterns import

* modify return_config_only

* modify return_config_only to dataset_info_only

* udpate version for test

* del get_logger(__name__)

* fix py script loading

* fix loading py and without py

* add subset support

* add hf_datasets_util; refine list_repo_tree_ms; fix private datasets loading issue

* update version to rc5

* fix and support preview for dataset_info_only mode

* fix urlencode

* update to rc7

* loading of dataset_infos.json is deprecated; 2. add some ut

* update version

* add escapechar for read_csv and to_csv

* add params: Source=SDK

* add create_dataset func

* overwrite _get_paths_info

* update & version

* update list_repo_tree name

* add get_module_with_script, fix download imports

* fix py script loading issue in dataset_module_factory

* fix create dataset

* update log info in api
2024-03-22 17:30:34 +08:00

104 lines
3.9 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
from modelscope import MsDataset
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
# Note: MODELSCOPE_DOMAIN is set to 'test.modelscope.cn' in the environment variable
# TODO: ONLY FOR TEST ENVIRONMENT, to be replaced by the online domain
TEST_INNER_LEVEL = 1
class GeneralMsDatasetTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_return_dataset_info_only(self):
ds = MsDataset.load(
'wangxingjun778test/aya_dataset_mini', dataset_info_only=True)
print(f'>>output of test_return_dataset_info_only:\n {ds}')
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_fashion_mnist(self):
# inner means the dataset is on the test.modelscope.cn environment
ds = MsDataset.load(
'xxxxtest0004/ms_test_0308_py',
subset_name='fashion_mnist',
split='train')
print(f'>>output of test_inner_fashion_mnist:\n {next(iter(ds))}')
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_clue(self):
ds = MsDataset.load(
'wangxingjun778test/clue', subset_name='afqmc', split='train')
print(f'>>output of test_inner_clue:\n {next(iter(ds))}')
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_cats_and_dogs_mini(self):
ds = MsDataset.load(
'wangxingjun778test/cats_and_dogs_mini', split='train')
print(f'>>output of test_inner_cats_and_dogs_mini:\n {next(iter(ds))}')
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_aya_dataset_mini(self):
# Dataset Format:
# data/train-xxx-of-xxx.parquet; data/test-xxx-of-xxx.parquet
# demographics/train-xxx-of-xxx.parquet
ds = MsDataset.load(
'wangxingjun778test/aya_dataset_mini', split='train')
print(f'>>output of test_inner_aya_dataset_mini:\n {next(iter(ds))}')
ds = MsDataset.load(
'wangxingjun778test/aya_dataset_mini', subset_name='demographics')
assert next(iter(ds['train']))
print(
f">>output of test_inner_aya_dataset_mini:\n {next(iter(ds['train']))}"
)
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_no_standard_imgs(self):
infos = MsDataset.load(
'xxxxtest0004/png_jpg_txt_test', dataset_info_only=True)
assert infos['default']
ds = MsDataset.load('xxxxtest0004/png_jpg_txt_test', split='train')
print(f'>>>output of test_inner_no_standard_imgs: \n{next(iter(ds))}')
assert next(iter(ds))
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_hf_pictures(self):
ds = MsDataset.load('xxxxtest0004/hf_Pictures')
print(ds)
assert next(iter(ds))
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
def test_inner_speech_yinpin(self):
ds = MsDataset.load('xxxxtest0004/hf_lj_speech_yinpin_test')
print(ds)
assert next(iter(ds))
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
'skip test in current test level')
def test_inner_yuancheng_picture(self):
ds = MsDataset.load(
'xxxxtest0004/yuancheng_picture',
subset_name='remote_images',
split='train')
print(next(iter(ds)))
assert next(iter(ds))
if __name__ == '__main__':
unittest.main()