modelscope/configs/nlp/sequence_classification_trainer.yaml

# In current version, many arguments are not used in pipelines, so,
# a tag `[being used]` will indicate which argument is being used
version: v0.1
framework: pytorch
task: text-classification

model:
  path: bert-base-sst2
  backbone:
    type: bert
    prefix: bert
  attention_probs_dropout_prob: 0.1
  bos_token_id: 0
  eos_token_id: 2
  hidden_act: elu
  hidden_dropout_prob: 0.1
  hidden_size: 768
  initializer_range: 0.02
  intermediate_size: 3072
  layer_norm_eps: 1e-05
  max_position_embeddings: 514
  model_type: roberta
  num_attention_heads: 12
  num_hidden_layers: 12
  pad_token_id: 1
  type_vocab_size: 1
  vocab_size: 50265
  num_classes: 5


col_index: &col_indexs
  text_col: 0
  label_col: 1

dataset:
  train:
    <<: *col_indexs
    file: ~
  valid:
    <<: *col_indexs
    file: glue/sst2  # [being used]
  test:
    <<: *col_indexs
    file: ~

preprocessor:
  type: Tokenize
  tokenizer_name: /workspace/bert-base-sst2

train:
  batch_size: 256
  learning_rate: 0.00001
  lr_scheduler_type: cosine
  num_steps: 100000

evaluation: # [being used]
  model_path: .cache/easynlp/
  max_sequence_length: 128
  batch_size: 32
  metrics:
    - accuracy
    - f1