Files
modelscope/tests/msdatasets/test_stream_load.py
Xingjun.Wang 2c4505e13a Adapt new datasets (#1002)
* update ds==3.0

* update

* add http_get_ms func

* del unused code

* fix pr issue and update requirements
2024-09-30 16:46:00 +08:00

59 lines
1.8 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
from modelscope import MsDataset
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level
logger = get_logger()
class TestStreamLoad(unittest.TestCase):
def setUp(self):
...
def tearDown(self):
...
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_stream_read_zstd(self):
repo_id: str = 'swift/chinese-c4'
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
sample = next(iter(ds))
logger.info(sample)
assert sample['url'], f'Failed to load sample from {repo_id}'
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_stream_imagefolder(self):
repo_id: str = 'wangxingjun778/test_new_dataset'
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
sample = next(iter(ds))
logger.info(sample)
assert sample['image'], f'Failed to load sample from {repo_id}'
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_stream_parquet(self):
repo_id: str = 'swift/A-OKVQA'
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
sample = next(iter(ds))
logger.info(sample)
assert sample['question'], f'Failed to load sample from {repo_id}'
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_stream_swift_jsonl(self):
repo_id: str = 'iic/MSAgent-MultiRole'
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
sample = next(iter(ds))
logger.info(sample)
assert sample['id'], f'Failed to load sample from {repo_id}'
if __name__ == '__main__':
unittest.main()