mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 20:19:51 +01:00
add ut for stream
This commit is contained in:
49
tests/msdatasets/test_stream_load.py
Normal file
49
tests/msdatasets/test_stream_load.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import unittest
|
||||
|
||||
from modelscope import MsDataset
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class TestStreamLoad(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
...
|
||||
|
||||
def tearDown(self):
|
||||
...
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_stream_read_zstd(self):
|
||||
repo_id: str = 'swift/chinese-c4'
|
||||
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
||||
sample = next(iter(ds))
|
||||
logger.info(sample)
|
||||
|
||||
assert sample['url'], f'Failed to load sample from {repo_id}'
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_stream_imagefolder(self):
|
||||
repo_id: str = 'wangxingjun778/test_new_dataset'
|
||||
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
||||
sample = next(iter(ds))
|
||||
logger.info(sample)
|
||||
|
||||
assert sample['image'], f'Failed to load sample from {repo_id}'
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_stream_parquet(self):
|
||||
repo_id: str = 'swift/A-OKVQA'
|
||||
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
||||
sample = next(iter(ds))
|
||||
logger.info(sample)
|
||||
|
||||
assert sample['question'], f'Failed to load sample from {repo_id}'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user