mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-17 16:57:42 +01:00
1. refactor maaslib to modelscope 2. fix UT error 3. support pipeline which does not register default model Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8988388
45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
import unittest
|
|
|
|
import datasets as hfdata
|
|
|
|
from modelscope.pydatasets import PyDataset
|
|
|
|
|
|
class PyDatasetTest(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
# ds1 initialized from in memory json
|
|
self.json_data = {
|
|
'dummy': [{
|
|
'a': i,
|
|
'x': i * 10,
|
|
'c': i * 100
|
|
} for i in range(1, 11)]
|
|
}
|
|
hfds1 = hfdata.Dataset.from_dict(self.json_data)
|
|
self.ds1 = PyDataset.from_hf_dataset(hfds1)
|
|
|
|
# ds2 initialized from hg hub
|
|
hfds2 = hfdata.load_dataset(
|
|
'glue', 'mrpc', revision='2.0.0', split='train')
|
|
self.ds2 = PyDataset.from_hf_dataset(hfds2)
|
|
|
|
def tearDown(self):
|
|
pass
|
|
|
|
def test_to_hf_dataset(self):
|
|
hfds = self.ds1.to_hf_dataset()
|
|
hfds1 = hfdata.Dataset.from_dict(self.json_data)
|
|
self.assertEqual(hfds.data, hfds1.data)
|
|
|
|
# simple map function
|
|
hfds = hfds.map(lambda e: {'new_feature': e['dummy']['a']})
|
|
self.assertEqual(len(hfds['new_feature']), 10)
|
|
|
|
hfds2 = self.ds2.to_hf_dataset()
|
|
self.assertTrue(hfds2[0]['sentence1'].startswith('Amrozi'))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|