mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-17 08:47:43 +01:00
1. Fix bugs in daily test
2. Fix a bug that the updating of lr is before the first time of updating of optimizer
TODO this will still cause warnings when GA is above 1
3. Remove the judgement of mode in text-classification's preprocessor to fit the base trainer(Bug)
Update some regression bins to fit the preprocessor
4. Update the regression tool to let outer code modify atol and rtol
5. Add the default metric for text-classification task
6. Remove the useless ckpt conversion method in bert to avoid the requirement of tf when loading modeling_bert
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10430764
143 lines
5.2 KiB
Python
143 lines
5.2 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
import unittest
|
|
|
|
from modelscope.models import Model
|
|
from modelscope.msdatasets import MsDataset
|
|
from modelscope.preprocessors import SequenceClassificationPreprocessor
|
|
from modelscope.preprocessors.base import Preprocessor
|
|
from modelscope.utils.constant import DEFAULT_DATASET_NAMESPACE, DownloadMode
|
|
from modelscope.utils.test_utils import require_tf, require_torch, test_level
|
|
|
|
|
|
class ImgPreprocessor(Preprocessor):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.path_field = kwargs.pop('image_path', 'image_path')
|
|
self.width = kwargs.pop('width', 'width')
|
|
self.height = kwargs.pop('height', 'width')
|
|
|
|
def __call__(self, data):
|
|
import cv2
|
|
image_path = data.get(self.path_field)
|
|
if not image_path:
|
|
return None
|
|
img = cv2.imread(image_path)
|
|
return {
|
|
'image':
|
|
cv2.resize(img,
|
|
(data.get(self.height, 128), data.get(self.width, 128)))
|
|
}
|
|
|
|
|
|
class MsDatasetTest(unittest.TestCase):
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
def test_movie_scene_seg_toydata(self):
|
|
ms_ds_train = MsDataset.load('movie_scene_seg_toydata', split='train')
|
|
print(ms_ds_train._hf_ds.config_kwargs)
|
|
assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
def test_coco(self):
|
|
ms_ds_train = MsDataset.load(
|
|
'pets_small',
|
|
namespace=DEFAULT_DATASET_NAMESPACE,
|
|
download_mode=DownloadMode.FORCE_REDOWNLOAD,
|
|
split='train')
|
|
print(ms_ds_train.config_kwargs)
|
|
assert next(iter(ms_ds_train.config_kwargs['split_config'].values()))
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_ms_csv_basic(self):
|
|
ms_ds_train = MsDataset.load(
|
|
'clue', subset_name='afqmc',
|
|
split='train').to_hf_dataset().select(range(5))
|
|
print(next(iter(ms_ds_train)))
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
def test_ds_basic(self):
|
|
ms_ds_full = MsDataset.load(
|
|
'xcopa', subset_name='translation-et', namespace='damotest')
|
|
ms_ds = MsDataset.load(
|
|
'xcopa',
|
|
subset_name='translation-et',
|
|
namespace='damotest',
|
|
split='test')
|
|
print(next(iter(ms_ds_full['test'])))
|
|
print(next(iter(ms_ds)))
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
@require_torch
|
|
def test_to_torch_dataset_text(self):
|
|
model_id = 'damo/bert-base-sst2'
|
|
nlp_model = Model.from_pretrained(model_id)
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
nlp_model.model_dir,
|
|
first_sequence='premise',
|
|
second_sequence=None,
|
|
padding='max_length')
|
|
ms_ds_train = MsDataset.load(
|
|
'xcopa',
|
|
subset_name='translation-et',
|
|
namespace='damotest',
|
|
split='test')
|
|
pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor)
|
|
import torch
|
|
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
|
|
print(next(iter(dataloader)))
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
|
@require_tf
|
|
def test_to_tf_dataset_text(self):
|
|
import tensorflow as tf
|
|
tf.compat.v1.enable_eager_execution()
|
|
model_id = 'damo/bert-base-sst2'
|
|
nlp_model = Model.from_pretrained(model_id)
|
|
preprocessor = SequenceClassificationPreprocessor(
|
|
nlp_model.model_dir,
|
|
first_sequence='premise',
|
|
second_sequence=None)
|
|
ms_ds_train = MsDataset.load(
|
|
'xcopa',
|
|
subset_name='translation-et',
|
|
namespace='damotest',
|
|
split='test')
|
|
tf_dataset = ms_ds_train.to_tf_dataset(
|
|
batch_size=5,
|
|
shuffle=True,
|
|
preprocessors=preprocessor,
|
|
drop_remainder=True)
|
|
print(next(iter(tf_dataset)))
|
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
@require_torch
|
|
def test_to_torch_dataset_img(self):
|
|
ms_image_train = MsDataset.load(
|
|
'fixtures_image_utils', namespace='damotest', split='test')
|
|
pt_dataset = ms_image_train.to_torch_dataset(
|
|
preprocessors=ImgPreprocessor(image_path='file'))
|
|
import torch
|
|
dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5)
|
|
print(next(iter(dataloader)))
|
|
|
|
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
|
@require_tf
|
|
def test_to_tf_dataset_img(self):
|
|
import tensorflow as tf
|
|
tf.compat.v1.enable_eager_execution()
|
|
ms_image_train = MsDataset.load(
|
|
'fixtures_image_utils', namespace='damotest', split='test')
|
|
tf_dataset = ms_image_train.to_tf_dataset(
|
|
batch_size=5,
|
|
shuffle=True,
|
|
preprocessors=ImgPreprocessor(image_path='file'),
|
|
drop_remainder=True,
|
|
)
|
|
print(next(iter(tf_dataset)))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|