mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
150 lines
6.0 KiB
Python
150 lines
6.0 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
import os
|
|
import unittest
|
|
|
|
from modelscope import MsDataset
|
|
from modelscope.utils.logger import get_logger
|
|
from modelscope.utils.test_utils import test_level
|
|
|
|
logger = get_logger()
|
|
|
|
TEST_INNER_LEVEL = 1
|
|
|
|
|
|
class GeneralMsDatasetTest(unittest.TestCase):
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_return_dataset_info_only(self):
|
|
ds = MsDataset.load(
|
|
'wangxingjun778/aya_dataset_mini', dataset_info_only=True)
|
|
logger.info(f'>>output of test_return_dataset_info_only:\n {ds}')
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_inner_fashion_mnist(self):
|
|
# inner means the dataset is on the test.modelscope.cn environment
|
|
ds = MsDataset.load(
|
|
'wangxingjun778/ms_test_0308_py',
|
|
subset_name='fashion_mnist',
|
|
split='train')
|
|
logger.info(
|
|
f'>>output of test_inner_fashion_mnist:\n {next(iter(ds))}')
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_inner_clue(self):
|
|
ds = MsDataset.load(
|
|
'wangxingjun778/clue', subset_name='afqmc', split='train')
|
|
logger.info(f'>>output of test_inner_clue:\n {next(iter(ds))}')
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_inner_cats_and_dogs_mini(self):
|
|
ds = MsDataset.load('wangxingjun778/cats_and_dogs_mini', split='train')
|
|
logger.info(
|
|
f'>>output of test_inner_cats_and_dogs_mini:\n {next(iter(ds))}')
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_inner_aya_dataset_mini(self):
|
|
# Dataset Format:
|
|
# data/train-xxx-of-xxx.parquet; data/test-xxx-of-xxx.parquet
|
|
# demographics/train-xxx-of-xxx.parquet
|
|
|
|
ds = MsDataset.load('wangxingjun778/aya_dataset_mini', split='train')
|
|
logger.info(
|
|
f'>>output of test_inner_aya_dataset_mini:\n {next(iter(ds))}')
|
|
|
|
ds = MsDataset.load(
|
|
'wangxingjun778/aya_dataset_mini', subset_name='demographics')
|
|
assert next(iter(ds['train']))
|
|
logger.info(
|
|
f">>output of test_inner_aya_dataset_mini:\n {next(iter(ds['train']))}"
|
|
)
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_inner_no_standard_imgs(self):
|
|
infos = MsDataset.load(
|
|
'wangxingjun778/png_jpg_txt_test', dataset_info_only=True)
|
|
assert infos['default']
|
|
|
|
ds = MsDataset.load('wangxingjun778/png_jpg_txt_test', split='train')
|
|
logger.info(
|
|
f'>>>output of test_inner_no_standard_imgs: \n{next(iter(ds))}')
|
|
assert next(iter(ds))
|
|
|
|
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
|
|
def test_inner_speech_yinpin(self):
|
|
ds = MsDataset.load('wangxingjun778/hf_lj_speech_yinpin_test')
|
|
logger.info(ds)
|
|
assert next(iter(ds))
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_inner_yuancheng_picture(self):
|
|
ds = MsDataset.load(
|
|
'wangxingjun778/yuancheng_picture',
|
|
subset_name='remote_images',
|
|
split='train')
|
|
logger.info(next(iter(ds)))
|
|
assert next(iter(ds))
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_youku_mplug_dataset(self):
|
|
# To test the Youku-AliceMind dataset with new sdk version
|
|
ds = MsDataset.load(
|
|
'modelscope/Youku-AliceMind',
|
|
subset_name='classification',
|
|
split='validation', # Options: train, test, validation
|
|
use_streaming=True)
|
|
|
|
logger.info(next(iter(ds)))
|
|
data_sample = next(iter(ds))
|
|
|
|
assert data_sample['video_id'][0]
|
|
assert os.path.exists(data_sample['video_id:FILE'][0])
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_local_py_script(self):
|
|
# Download the dataset files to temp directory
|
|
from tempfile import TemporaryDirectory
|
|
py_script_url = 'https://modelscope.cn/datasets/wangxingjun778/glue_test/resolve/master/glue_test.py'
|
|
with TemporaryDirectory() as tmp_dir:
|
|
os.makedirs(tmp_dir, exist_ok=True)
|
|
os.system(f'wget -P {tmp_dir} {py_script_url}')
|
|
py_script_file = os.path.join(tmp_dir, 'glue_test.py')
|
|
assert os.path.exists(py_script_file), f'File not found: {py_script_file}, ' \
|
|
f'please check the url: {py_script_url}'
|
|
|
|
# Load the dataset
|
|
ds = MsDataset.load(
|
|
py_script_file, subset_name='cola', split='train')
|
|
sample = next(iter(ds))
|
|
logger.info(f'>>output of test_local_py_script:\n {sample}')
|
|
assert sample
|
|
|
|
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
|
'skip test in current test level')
|
|
def test_local_img_folder(self):
|
|
# Download the dataset files to temp directory
|
|
from tempfile import TemporaryDirectory
|
|
img_url = 'https://modelscope.cn/datasets/wangxingjun778/test_img_dataset/resolve/master/data/train/' \
|
|
'000000573258.jpg'
|
|
with TemporaryDirectory() as tmp_dir:
|
|
os.makedirs(tmp_dir, exist_ok=True)
|
|
os.system(f'wget -P {tmp_dir} {img_url}')
|
|
|
|
# Load the local image folder
|
|
ds = MsDataset.load('imagefolder', data_dir=tmp_dir)
|
|
sample = next(iter(ds))
|
|
logger.info(f'>>output of test_local_img_folder:\n {sample}')
|
|
assert sample
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|