mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 16:27:45 +01:00
* update ds==3.0 * update * add http_get_ms func * del unused code * fix pr issue and update requirements
59 lines
1.8 KiB
Python
59 lines
1.8 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
import unittest
|
|
|
|
from modelscope import MsDataset
|
|
from modelscope.utils.logger import get_logger
|
|
from modelscope.utils.test_utils import test_level
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
class TestStreamLoad(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
...
|
|
|
|
def tearDown(self):
|
|
...
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_stream_read_zstd(self):
|
|
repo_id: str = 'swift/chinese-c4'
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
sample = next(iter(ds))
|
|
logger.info(sample)
|
|
|
|
assert sample['url'], f'Failed to load sample from {repo_id}'
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_stream_imagefolder(self):
|
|
repo_id: str = 'wangxingjun778/test_new_dataset'
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
sample = next(iter(ds))
|
|
logger.info(sample)
|
|
|
|
assert sample['image'], f'Failed to load sample from {repo_id}'
|
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
def test_stream_parquet(self):
|
|
repo_id: str = 'swift/A-OKVQA'
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
sample = next(iter(ds))
|
|
logger.info(sample)
|
|
|
|
assert sample['question'], f'Failed to load sample from {repo_id}'
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_stream_swift_jsonl(self):
|
|
repo_id: str = 'iic/MSAgent-MultiRole'
|
|
ds = MsDataset.load(repo_id, split='train', use_streaming=True)
|
|
sample = next(iter(ds))
|
|
logger.info(sample)
|
|
|
|
assert sample['id'], f'Failed to load sample from {repo_id}'
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|