2024-08-22 14:43:37 +08:00
|
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
|
|
|
|
|
|
from modelscope import MsDataset
|
|
|
|
|
from modelscope.utils.logger import get_logger
|
|
|
|
|
from modelscope.utils.test_utils import test_level
|
|
|
|
|
|
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStreamLoad(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
|
...
|
|
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
|
...
|
|
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
|
|
|
def test_stream_read_zstd(self):
|
|
|
|
|
repo_id: str = 'swift/chinese-c4'
|
|
|
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
|
|
|
sample = next(iter(ds))
|
|
|
|
|
logger.info(sample)
|
|
|
|
|
|
|
|
|
|
assert sample['url'], f'Failed to load sample from {repo_id}'
|
|
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
|
|
|
def test_stream_imagefolder(self):
|
|
|
|
|
repo_id: str = 'wangxingjun778/test_new_dataset'
|
|
|
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
|
|
|
sample = next(iter(ds))
|
|
|
|
|
logger.info(sample)
|
|
|
|
|
|
|
|
|
|
assert sample['image'], f'Failed to load sample from {repo_id}'
|
|
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
|
|
|
def test_stream_parquet(self):
|
|
|
|
|
repo_id: str = 'swift/A-OKVQA'
|
|
|
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
|
|
|
sample = next(iter(ds))
|
|
|
|
|
logger.info(sample)
|
|
|
|
|
|
|
|
|
|
assert sample['question'], f'Failed to load sample from {repo_id}'
|
|
|
|
|
|
2024-09-30 16:46:00 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
|
|
|
def test_stream_swift_jsonl(self):
|
|
|
|
|
repo_id: str = 'iic/MSAgent-MultiRole'
|
|
|
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
|
|
|
sample = next(iter(ds))
|
|
|
|
|
logger.info(sample)
|
|
|
|
|
|
|
|
|
|
assert sample['id'], f'Failed to load sample from {repo_id}'
|
|
|
|
|
|
2024-08-22 14:43:37 +08:00
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
unittest.main()
|