2022-09-20 17:49:31 +08:00
|
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
|
|
2022-06-08 18:29:39 +08:00
|
|
|
import unittest
|
|
|
|
|
|
2022-06-21 11:10:28 +08:00
|
|
|
from modelscope.models import Model
|
2022-06-27 11:09:38 +08:00
|
|
|
from modelscope.msdatasets import MsDataset
|
2022-06-21 11:10:28 +08:00
|
|
|
from modelscope.preprocessors import SequenceClassificationPreprocessor
|
|
|
|
|
from modelscope.preprocessors.base import Preprocessor
|
2022-08-25 22:28:10 +08:00
|
|
|
from modelscope.utils.constant import DEFAULT_DATASET_NAMESPACE, DownloadMode
|
2022-06-21 11:10:28 +08:00
|
|
|
from modelscope.utils.test_utils import require_tf, require_torch, test_level
|
2022-06-08 18:29:39 +08:00
|
|
|
|
|
|
|
|
|
2022-06-21 11:10:28 +08:00
|
|
|
class ImgPreprocessor(Preprocessor):
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
self.path_field = kwargs.pop('image_path', 'image_path')
|
|
|
|
|
self.width = kwargs.pop('width', 'width')
|
|
|
|
|
self.height = kwargs.pop('height', 'width')
|
2022-06-08 18:29:39 +08:00
|
|
|
|
2022-06-21 11:10:28 +08:00
|
|
|
def __call__(self, data):
|
|
|
|
|
import cv2
|
|
|
|
|
image_path = data.get(self.path_field)
|
|
|
|
|
if not image_path:
|
|
|
|
|
return None
|
|
|
|
|
img = cv2.imread(image_path)
|
|
|
|
|
return {
|
|
|
|
|
'image':
|
|
|
|
|
cv2.resize(img,
|
|
|
|
|
(data.get(self.height, 128), data.get(self.width, 128)))
|
2022-06-08 18:29:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2022-06-27 11:09:38 +08:00
|
|
|
class MsDatasetTest(unittest.TestCase):
|
2022-06-21 11:10:28 +08:00
|
|
|
|
2022-08-31 20:54:20 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
|
|
|
def test_movie_scene_seg_toydata(self):
|
|
|
|
|
ms_ds_train = MsDataset.load('movie_scene_seg_toydata', split='train')
|
|
|
|
|
print(ms_ds_train._hf_ds.config_kwargs)
|
|
|
|
|
assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
|
|
|
|
|
|
2022-08-17 22:51:22 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
|
|
|
def test_coco(self):
|
|
|
|
|
ms_ds_train = MsDataset.load(
|
|
|
|
|
'pets_small',
|
2022-08-25 22:28:10 +08:00
|
|
|
namespace=DEFAULT_DATASET_NAMESPACE,
|
2022-08-30 15:15:15 +08:00
|
|
|
download_mode=DownloadMode.FORCE_REDOWNLOAD,
|
|
|
|
|
split='train')
|
2022-08-26 22:41:13 +08:00
|
|
|
print(ms_ds_train.config_kwargs)
|
|
|
|
|
assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
|
2022-08-17 22:51:22 +08:00
|
|
|
|
2022-07-29 12:22:48 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
|
|
|
def test_ms_csv_basic(self):
|
|
|
|
|
ms_ds_train = MsDataset.load(
|
|
|
|
|
'afqmc_small', namespace='userxiaoming', split='train')
|
|
|
|
|
print(next(iter(ms_ds_train)))
|
|
|
|
|
|
2022-06-28 20:40:57 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
2022-06-21 11:10:28 +08:00
|
|
|
def test_ds_basic(self):
|
2022-07-20 16:38:15 +08:00
|
|
|
ms_ds_full = MsDataset.load(
|
|
|
|
|
'xcopa', subset_name='translation-et', namespace='damotest')
|
|
|
|
|
ms_ds = MsDataset.load(
|
|
|
|
|
'xcopa',
|
|
|
|
|
subset_name='translation-et',
|
|
|
|
|
namespace='damotest',
|
|
|
|
|
split='test')
|
|
|
|
|
print(next(iter(ms_ds_full['test'])))
|
|
|
|
|
print(next(iter(ms_ds)))
|
2022-06-21 11:10:28 +08:00
|
|
|
|
2022-06-28 20:40:57 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
2022-06-21 11:10:28 +08:00
|
|
|
@require_torch
|
|
|
|
|
def test_to_torch_dataset_text(self):
|
|
|
|
|
model_id = 'damo/bert-base-sst2'
|
|
|
|
|
nlp_model = Model.from_pretrained(model_id)
|
|
|
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
|
|
|
nlp_model.model_dir,
|
2022-07-20 16:38:15 +08:00
|
|
|
first_sequence='premise',
|
2022-09-27 23:08:33 +08:00
|
|
|
second_sequence=None,
|
|
|
|
|
padding='max_length')
|
2022-06-28 20:40:57 +08:00
|
|
|
ms_ds_train = MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'xcopa',
|
|
|
|
|
subset_name='translation-et',
|
|
|
|
|
namespace='damotest',
|
|
|
|
|
split='test')
|
2022-06-21 11:10:28 +08:00
|
|
|
pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
|
|
|
|
|
import torch
|
|
|
|
|
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
|
|
|
|
|
print(next(iter(dataloader)))
|
2022-06-08 18:29:39 +08:00
|
|
|
|
2022-06-28 20:40:57 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
2022-06-21 11:10:28 +08:00
|
|
|
@require_tf
|
|
|
|
|
def test_to_tf_dataset_text(self):
|
|
|
|
|
import tensorflow as tf
|
|
|
|
|
tf.compat.v1.enable_eager_execution()
|
|
|
|
|
model_id = 'damo/bert-base-sst2'
|
|
|
|
|
nlp_model = Model.from_pretrained(model_id)
|
|
|
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
|
|
|
nlp_model.model_dir,
|
2022-07-20 16:38:15 +08:00
|
|
|
first_sequence='premise',
|
2022-06-21 11:10:28 +08:00
|
|
|
second_sequence=None)
|
2022-06-28 20:40:57 +08:00
|
|
|
ms_ds_train = MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'xcopa',
|
|
|
|
|
subset_name='translation-et',
|
|
|
|
|
namespace='damotest',
|
|
|
|
|
split='test')
|
2022-06-21 11:10:28 +08:00
|
|
|
tf_dataset = ms_ds_train.to_tf_dataset(
|
|
|
|
|
batch_size=5,
|
|
|
|
|
shuffle=True,
|
|
|
|
|
preprocessors=preprocessor,
|
|
|
|
|
drop_remainder=True)
|
|
|
|
|
print(next(iter(tf_dataset)))
|
2022-06-08 18:29:39 +08:00
|
|
|
|
2022-06-23 16:55:48 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
2022-06-21 11:10:28 +08:00
|
|
|
@require_torch
|
|
|
|
|
def test_to_torch_dataset_img(self):
|
2022-06-28 20:40:57 +08:00
|
|
|
ms_image_train = MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'fixtures_image_utils', namespace='damotest', split='test')
|
2022-06-21 11:10:28 +08:00
|
|
|
pt_dataset = ms_image_train.to_torch_dataset(
|
2022-07-20 16:38:15 +08:00
|
|
|
preprocessors=ImgPreprocessor(image_path='file'))
|
2022-06-21 11:10:28 +08:00
|
|
|
import torch
|
|
|
|
|
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
|
|
|
|
|
print(next(iter(dataloader)))
|
2022-06-08 18:29:39 +08:00
|
|
|
|
2022-06-23 16:55:48 +08:00
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
2022-06-21 11:10:28 +08:00
|
|
|
@require_tf
|
|
|
|
|
def test_to_tf_dataset_img(self):
|
|
|
|
|
import tensorflow as tf
|
|
|
|
|
tf.compat.v1.enable_eager_execution()
|
2022-06-28 20:40:57 +08:00
|
|
|
ms_image_train = MsDataset.load(
|
2022-07-20 16:38:15 +08:00
|
|
|
'fixtures_image_utils', namespace='damotest', split='test')
|
2022-06-21 11:10:28 +08:00
|
|
|
tf_dataset = ms_image_train.to_tf_dataset(
|
|
|
|
|
batch_size=5,
|
|
|
|
|
shuffle=True,
|
2022-07-20 16:38:15 +08:00
|
|
|
preprocessors=ImgPreprocessor(image_path='file'),
|
2022-06-21 11:10:28 +08:00
|
|
|
drop_remainder=True,
|
2022-07-20 16:38:15 +08:00
|
|
|
)
|
2022-06-21 11:10:28 +08:00
|
|
|
print(next(iter(tf_dataset)))
|
2022-06-08 18:29:39 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
unittest.main()
|