mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 20:19:51 +01:00
1. 增加fine-tuning流程
2. 增加evalution流程
3. 关联数据集nlp_convai_text2sql_pretrain_cn_trainset
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11276053
* add space-t trainer
* revise for trainer
* Merge branch 'master' into dev/tableqa_finetune
* revise for trainer
* Merge remote-tracking branch 'origin' into dev/tableqa_finetune
47 lines
1.6 KiB
Python
47 lines
1.6 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
import os
|
|
import unittest
|
|
|
|
import json
|
|
|
|
from modelscope.msdatasets import MsDataset
|
|
from modelscope.trainers.nlp.table_question_answering_trainer import \
|
|
TableQuestionAnsweringTrainer
|
|
from modelscope.utils.constant import DownloadMode, ModelFile
|
|
from modelscope.utils.test_utils import test_level
|
|
|
|
|
|
class TableQuestionAnsweringTest(unittest.TestCase):
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
|
def test_trainer_with_model_name(self):
|
|
# load data
|
|
input_dataset = MsDataset.load(
|
|
'ChineseText2SQL', download_mode=DownloadMode.FORCE_REDOWNLOAD)
|
|
train_dataset = []
|
|
for name in input_dataset['train']._hf_ds.data[1]:
|
|
train_dataset.append(json.load(open(str(name), 'r')))
|
|
eval_dataset = []
|
|
for name in input_dataset['test']._hf_ds.data[1]:
|
|
eval_dataset.append(json.load(open(str(name), 'r')))
|
|
print('size of training set', len(train_dataset))
|
|
print('size of evaluation set', len(eval_dataset))
|
|
|
|
model_id = 'damo/nlp_convai_text2sql_pretrain_cn'
|
|
trainer = TableQuestionAnsweringTrainer(
|
|
model=model_id,
|
|
train_dataset=train_dataset,
|
|
eval_dataset=eval_dataset,
|
|
)
|
|
trainer.train(
|
|
batch_size=8,
|
|
total_epoches=2,
|
|
)
|
|
trainer.evaluate(
|
|
checkpoint_path=os.path.join(trainer.model.model_dir,
|
|
'finetuned_model.bin'))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|