DATA_PARALLEL_SIZE=2
TENSOR_MODEL_PARALLEL_SIZE=2

WORLD_SIZE=$(($DATA_PARALLEL_SIZE * $TENSOR_MODEL_PARALLEL_SIZE))


PYTHONPATH=. torchrun --nproc_per_node $WORLD_SIZE examples/pytorch/text_generation/finetune_text_generation.py \
    --trainer 'nlp-gpt3-trainer' \
    --work_dir './tmp' \
    --model 'damo/nlp_gpt3_text-generation_1.3B' \
    --train_dataset_name 'chinese-poetry-collection' \
    --val_dataset_name 'chinese-poetry-collection' \
    --train_split 'train' \
    --val_split 'test' \
    --preprocessor 'text-gen-jieba-tokenizer' \
    --src_txt 'text1' \
    --tgt_txt 'text2' \
    --max_epochs 3 \
    --per_device_train_batch_size 16 \
    --lr 3e-4 \
    --lr_scheduler 'noam' \
    --eval_metrics 'ppl' \
    --world_size $WORLD_SIZE \
    --tensor_model_parallel_size $TENSOR_MODEL_PARALLEL_SIZE \
    --use_megatron true \
    --use_model_config true \
    # --train_dataset_name 'DuReader_robust-QG' \ # input&output