mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-19 01:29:24 +01:00
Upgrade datasets (#921)
* del _datasets_server import in hf_dataset_util * fix streaming for youku-mplug and adopt latest datasets * fix download config copy * update ut * add youku in test_general_datasets * update UT for general dataset * adapt to datasets version: 2.19.0 or later * add assert for youku data UT * fix disable_tqdm in some functions for 2.19.0 or later * update get_module_with_script * set trust_remote_code is True in load_dataset_with_ctx * update print info * update requirements for datasets version restriction * fix _dataset_info * add pillow * update comments * update comment * reuse _download function in DataDownloadManager * remove unused code * update test_run_modelhub in Human3DAnimationTest * set datasets>=2.18.0
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from modelscope import MsDataset
|
||||
@@ -7,9 +8,6 @@ from modelscope.utils.test_utils import test_level
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
# Note: MODELSCOPE_DOMAIN is set to 'test.modelscope.cn' in the environment variable
|
||||
# TODO: ONLY FOR TEST ENVIRONMENT, to be replaced by the online domain
|
||||
|
||||
TEST_INNER_LEVEL = 1
|
||||
|
||||
|
||||
@@ -19,32 +17,33 @@ class GeneralMsDatasetTest(unittest.TestCase):
|
||||
'skip test in current test level')
|
||||
def test_return_dataset_info_only(self):
|
||||
ds = MsDataset.load(
|
||||
'wangxingjun778test/aya_dataset_mini', dataset_info_only=True)
|
||||
print(f'>>output of test_return_dataset_info_only:\n {ds}')
|
||||
'wangxingjun778/aya_dataset_mini', dataset_info_only=True)
|
||||
logger.info(f'>>output of test_return_dataset_info_only:\n {ds}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
def test_inner_fashion_mnist(self):
|
||||
# inner means the dataset is on the test.modelscope.cn environment
|
||||
ds = MsDataset.load(
|
||||
'xxxxtest0004/ms_test_0308_py',
|
||||
'wangxingjun778/ms_test_0308_py',
|
||||
subset_name='fashion_mnist',
|
||||
split='train')
|
||||
print(f'>>output of test_inner_fashion_mnist:\n {next(iter(ds))}')
|
||||
logger.info(
|
||||
f'>>output of test_inner_fashion_mnist:\n {next(iter(ds))}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
def test_inner_clue(self):
|
||||
ds = MsDataset.load(
|
||||
'wangxingjun778test/clue', subset_name='afqmc', split='train')
|
||||
print(f'>>output of test_inner_clue:\n {next(iter(ds))}')
|
||||
'wangxingjun778/clue', subset_name='afqmc', split='train')
|
||||
logger.info(f'>>output of test_inner_clue:\n {next(iter(ds))}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
def test_inner_cats_and_dogs_mini(self):
|
||||
ds = MsDataset.load(
|
||||
'wangxingjun778test/cats_and_dogs_mini', split='train')
|
||||
print(f'>>output of test_inner_cats_and_dogs_mini:\n {next(iter(ds))}')
|
||||
ds = MsDataset.load('wangxingjun778/cats_and_dogs_mini', split='train')
|
||||
logger.info(
|
||||
f'>>output of test_inner_cats_and_dogs_mini:\n {next(iter(ds))}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
@@ -53,14 +52,14 @@ class GeneralMsDatasetTest(unittest.TestCase):
|
||||
# data/train-xxx-of-xxx.parquet; data/test-xxx-of-xxx.parquet
|
||||
# demographics/train-xxx-of-xxx.parquet
|
||||
|
||||
ds = MsDataset.load(
|
||||
'wangxingjun778test/aya_dataset_mini', split='train')
|
||||
print(f'>>output of test_inner_aya_dataset_mini:\n {next(iter(ds))}')
|
||||
ds = MsDataset.load('wangxingjun778/aya_dataset_mini', split='train')
|
||||
logger.info(
|
||||
f'>>output of test_inner_aya_dataset_mini:\n {next(iter(ds))}')
|
||||
|
||||
ds = MsDataset.load(
|
||||
'wangxingjun778test/aya_dataset_mini', subset_name='demographics')
|
||||
'wangxingjun778/aya_dataset_mini', subset_name='demographics')
|
||||
assert next(iter(ds['train']))
|
||||
print(
|
||||
logger.info(
|
||||
f">>output of test_inner_aya_dataset_mini:\n {next(iter(ds['train']))}"
|
||||
)
|
||||
|
||||
@@ -68,36 +67,46 @@ class GeneralMsDatasetTest(unittest.TestCase):
|
||||
'skip test in current test level')
|
||||
def test_inner_no_standard_imgs(self):
|
||||
infos = MsDataset.load(
|
||||
'xxxxtest0004/png_jpg_txt_test', dataset_info_only=True)
|
||||
'wangxingjun778/png_jpg_txt_test', dataset_info_only=True)
|
||||
assert infos['default']
|
||||
|
||||
ds = MsDataset.load('xxxxtest0004/png_jpg_txt_test', split='train')
|
||||
print(f'>>>output of test_inner_no_standard_imgs: \n{next(iter(ds))}')
|
||||
assert next(iter(ds))
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
def test_inner_hf_pictures(self):
|
||||
ds = MsDataset.load('xxxxtest0004/hf_Pictures')
|
||||
print(ds)
|
||||
ds = MsDataset.load('wangxingjun778/png_jpg_txt_test', split='train')
|
||||
logger.info(
|
||||
f'>>>output of test_inner_no_standard_imgs: \n{next(iter(ds))}')
|
||||
assert next(iter(ds))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 3, 'skip test in current test level')
|
||||
def test_inner_speech_yinpin(self):
|
||||
ds = MsDataset.load('xxxxtest0004/hf_lj_speech_yinpin_test')
|
||||
print(ds)
|
||||
ds = MsDataset.load('wangxingjun778/hf_lj_speech_yinpin_test')
|
||||
logger.info(ds)
|
||||
assert next(iter(ds))
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
def test_inner_yuancheng_picture(self):
|
||||
ds = MsDataset.load(
|
||||
'xxxxtest0004/yuancheng_picture',
|
||||
'wangxingjun778/yuancheng_picture',
|
||||
subset_name='remote_images',
|
||||
split='train')
|
||||
print(next(iter(ds)))
|
||||
logger.info(next(iter(ds)))
|
||||
assert next(iter(ds))
|
||||
|
||||
@unittest.skipUnless(test_level() >= TEST_INNER_LEVEL,
|
||||
'skip test in current test level')
|
||||
def test_youku_mplug_dataset(self):
|
||||
# To test the Youku-AliceMind dataset with new sdk version
|
||||
ds = MsDataset.load(
|
||||
'modelscope/Youku-AliceMind',
|
||||
subset_name='classification',
|
||||
split='validation', # Options: train, test, validation
|
||||
use_streaming=True)
|
||||
|
||||
logger.info(next(iter(ds)))
|
||||
data_sample = next(iter(ds))
|
||||
|
||||
assert data_sample['video_id'][0]
|
||||
assert os.path.exists(data_sample['video_id:FILE'][0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user