mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 04:29:22 +01:00
add basic remap column wrapper
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10539917 * add basic remap column wrapper
This commit is contained in:
@@ -563,6 +563,18 @@ class MsDataset:
|
||||
self._hf_ds.reset_format()
|
||||
return self._hf_ds
|
||||
|
||||
def remap_columns(self, column_mapping: Dict[str, str]) -> Dataset:
|
||||
"""
|
||||
Rename columns and return the underlying hf dataset directly
|
||||
TODO: support native MsDataset column rename.
|
||||
Args:
|
||||
column_mapping: the mapping of the original and new column names
|
||||
Returns:
|
||||
underlying hf dataset
|
||||
"""
|
||||
self._hf_ds.reset_format()
|
||||
return self._hf_ds.rename_columns(column_mapping)
|
||||
|
||||
@staticmethod
|
||||
def upload(object_name: str,
|
||||
local_file_path: str,
|
||||
|
||||
@@ -24,17 +24,16 @@ class TestFinetuneMPlug(unittest.TestCase):
|
||||
datadict = MsDataset.load(
|
||||
'coco_captions_small_slice',
|
||||
download_mode=DownloadMode.FORCE_REDOWNLOAD)
|
||||
self.train_dataset = MsDataset(datadict['train'].to_hf_dataset().map(
|
||||
lambda _: {
|
||||
'question': 'what the picture describes?'
|
||||
}).rename_column('image:FILE',
|
||||
'image').rename_column('answer:Value', 'answer'))
|
||||
self.test_dataset = MsDataset(datadict['test'].to_hf_dataset().map(
|
||||
lambda _: {
|
||||
'question': 'what the picture describes?'
|
||||
}).rename_column('image:FILE',
|
||||
'image').rename_column('answer:Value', 'answer'))
|
||||
|
||||
self.train_dataset = MsDataset(
|
||||
datadict['train'].remap_columns({
|
||||
'image:FILE': 'image',
|
||||
'answer:Value': 'answer'
|
||||
}).map(lambda _: {'question': 'what the picture describes?'}))
|
||||
self.test_dataset = MsDataset(
|
||||
datadict['test'].remap_columns({
|
||||
'image:FILE': 'image',
|
||||
'answer:Value': 'answer'
|
||||
}).map(lambda _: {'question': 'what the picture describes?'}))
|
||||
self.max_epochs = 2
|
||||
|
||||
def tearDown(self):
|
||||
|
||||
@@ -130,10 +130,16 @@ class TestFinetuneTextGeneration(unittest.TestCase):
|
||||
def test_finetune_cnndm(self):
|
||||
from modelscope.msdatasets import MsDataset
|
||||
dataset_dict = MsDataset.load('DuReader_robust-QG')
|
||||
train_dataset = dataset_dict['train'].to_hf_dataset() \
|
||||
.rename_columns({'text1': 'src_txt', 'text2': 'tgt_txt'})
|
||||
eval_dataset = dataset_dict['validation'].to_hf_dataset() \
|
||||
.rename_columns({'text1': 'src_txt', 'text2': 'tgt_txt'})
|
||||
train_dataset = dataset_dict['train'].remap_columns({
|
||||
'text1': 'src_txt',
|
||||
'text2': 'tgt_txt'
|
||||
})
|
||||
eval_dataset = dataset_dict['validation'].remap_columns({
|
||||
'text1':
|
||||
'src_txt',
|
||||
'text2':
|
||||
'tgt_txt'
|
||||
})
|
||||
num_warmup_steps = 200
|
||||
os.environ['LOCAL_RANK'] = '0'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user