Files
modelscope/tests/msdatasets/test_ms_dataset.py

133 lines
4.7 KiB
Python
Raw Normal View History

import unittest
from modelscope.models import Model
from modelscope.msdatasets import MsDataset
from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.preprocessors.base import Preprocessor
from modelscope.utils.constant import DownloadMode
from modelscope.utils.test_utils import require_tf, require_torch, test_level
class ImgPreprocessor(Preprocessor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.path_field = kwargs.pop('image_path', 'image_path')
self.width = kwargs.pop('width', 'width')
self.height = kwargs.pop('height', 'width')
def __call__(self, data):
import cv2
image_path = data.get(self.path_field)
if not image_path:
return None
img = cv2.imread(image_path)
return {
'image':
cv2.resize(img,
(data.get(self.height, 128), data.get(self.width, 128)))
}
class MsDatasetTest(unittest.TestCase):
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_coco(self):
ms_ds_train = MsDataset.load(
'pets_small',
namespace='modelscope',
split='train',
download_mode=DownloadMode.FORCE_REDOWNLOAD,
classes=('1', '2'))
print(ms_ds_train._hf_ds.config_kwargs)
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ms_csv_basic(self):
ms_ds_train = MsDataset.load(
'afqmc_small', namespace='userxiaoming', split='train')
print(next(iter(ms_ds_train)))
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_basic(self):
ms_ds_full = MsDataset.load(
'xcopa', subset_name='translation-et', namespace='damotest')
ms_ds = MsDataset.load(
'xcopa',
subset_name='translation-et',
namespace='damotest',
split='test')
print(next(iter(ms_ds_full['test'])))
print(next(iter(ms_ds)))
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@require_torch
def test_to_torch_dataset_text(self):
model_id = 'damo/bert-base-sst2'
nlp_model = Model.from_pretrained(model_id)
preprocessor = SequenceClassificationPreprocessor(
nlp_model.model_dir,
first_sequence='premise',
second_sequence=None)
ms_ds_train = MsDataset.load(
'xcopa',
subset_name='translation-et',
namespace='damotest',
split='test')
pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
import torch
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
print(next(iter(dataloader)))
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@require_tf
def test_to_tf_dataset_text(self):
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
model_id = 'damo/bert-base-sst2'
nlp_model = Model.from_pretrained(model_id)
preprocessor = SequenceClassificationPreprocessor(
nlp_model.model_dir,
first_sequence='premise',
second_sequence=None)
ms_ds_train = MsDataset.load(
'xcopa',
subset_name='translation-et',
namespace='damotest',
split='test')
tf_dataset = ms_ds_train.to_tf_dataset(
batch_size=5,
shuffle=True,
preprocessors=preprocessor,
drop_remainder=True)
print(next(iter(tf_dataset)))
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@require_torch
def test_to_torch_dataset_img(self):
ms_image_train = MsDataset.load(
'fixtures_image_utils', namespace='damotest', split='test')
pt_dataset = ms_image_train.to_torch_dataset(
preprocessors=ImgPreprocessor(image_path='file'))
import torch
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
print(next(iter(dataloader)))
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@require_tf
def test_to_tf_dataset_img(self):
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
ms_image_train = MsDataset.load(
'fixtures_image_utils', namespace='damotest', split='test')
tf_dataset = ms_image_train.to_tf_dataset(
batch_size=5,
shuffle=True,
preprocessors=ImgPreprocessor(image_path='file'),
drop_remainder=True,
)
print(next(iter(tf_dataset)))
if __name__ == '__main__':
unittest.main()