Files
modelscope/tests/pydatasets/test_py_dataset.py

45 lines
1.2 KiB
Python
Raw Normal View History

import unittest
import datasets as hfdata
from modelscope.pydatasets import PyDataset
class PyDatasetTest(unittest.TestCase):
def setUp(self):
# ds1 initialized from in memory json
self.json_data = {
'dummy': [{
'a': i,
'x': i * 10,
'c': i * 100
} for i in range(1, 11)]
}
hfds1 = hfdata.Dataset.from_dict(self.json_data)
self.ds1 = PyDataset.from_hf_dataset(hfds1)
# ds2 initialized from hg hub
hfds2 = hfdata.load_dataset(
'glue', 'mrpc', revision='2.0.0', split='train')
self.ds2 = PyDataset.from_hf_dataset(hfds2)
def tearDown(self):
pass
def test_to_hf_dataset(self):
hfds = self.ds1.to_hf_dataset()
hfds1 = hfdata.Dataset.from_dict(self.json_data)
self.assertEqual(hfds.data, hfds1.data)
# simple map function
hfds = hfds.map(lambda e: {'new_feature': e['dummy']['a']})
self.assertEqual(len(hfds['new_feature']), 10)
hfds2 = self.ds2.to_hf_dataset()
self.assertTrue(hfds2[0]['sentence1'].startswith('Amrozi'))
if __name__ == '__main__':
unittest.main()