mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-19 01:29:24 +01:00
28 lines
947 B
Bash
28 lines
947 B
Bash
DATA_PARALLEL_SIZE=2
|
|
TENSOR_MODEL_PARALLEL_SIZE=2
|
|
|
|
WORLD_SIZE=$(($DATA_PARALLEL_SIZE * $TENSOR_MODEL_PARALLEL_SIZE))
|
|
|
|
|
|
PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generation/finetune_text_generation.py \
|
|
--trainer 'nlp-gpt3-trainer' \
|
|
--work_dir './tmp' \
|
|
--model 'damo/nlp_gpt3_text-generation_1.3B' \
|
|
--train_dataset_name 'chinese-poetry-collection' \
|
|
--val_dataset_name 'chinese-poetry-collection' \
|
|
--train_split 'train' \
|
|
--val_split 'test' \
|
|
--preprocessor 'text-gen-jieba-tokenizer' \
|
|
--src_txt 'text1' \
|
|
--tgt_txt 'text2' \
|
|
--max_epochs 3 \
|
|
--per_device_train_batch_size 16 \
|
|
--lr 3e-4 \
|
|
--lr_scheduler 'noam' \
|
|
--eval_metrics 'ppl' \
|
|
--world_size $WORLD_SIZE \
|
|
--tensor_model_parallel_size $TENSOR_MODEL_PARALLEL_SIZE \
|
|
--use_megatron true \
|
|
--use_model_config true \
|
|
# --train_dataset_name 'DuReader_robust-QG' \ # input&output
|