2022-07-14 16:25:55 +08:00
|
|
|
# Copyright (c) OpenMMLab. All rights reserved.
|
|
|
|
|
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
|
import os
|
|
|
|
|
import pickle
|
|
|
|
|
import shutil
|
|
|
|
|
import time
|
2022-07-22 17:03:38 +08:00
|
|
|
from collections.abc import Mapping
|
2022-07-14 16:25:55 +08:00
|
|
|
|
|
|
|
|
import torch
|
|
|
|
|
from torch import distributed as dist
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
2022-08-16 12:04:07 +08:00
|
|
|
from modelscope.utils.data_utils import to_device
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
from modelscope.utils.file_utils import func_receive_dict_inputs
|
2022-08-02 14:49:48 +08:00
|
|
|
from modelscope.utils.torch_utils import (broadcast, get_dist_info, is_master,
|
|
|
|
|
make_tmp_dir)
|
2022-07-14 16:25:55 +08:00
|
|
|
|
|
|
|
|
|
2022-08-16 12:04:07 +08:00
|
|
|
def single_gpu_test(model, data_loader, device, metric_classes=None):
|
2022-07-14 16:25:55 +08:00
|
|
|
"""Test model with a single gpu.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
model (nn.Module): Model to be tested.
|
|
|
|
|
data_loader (nn.Dataloader): Pytorch data loader.
|
2022-08-16 12:04:07 +08:00
|
|
|
device: (str | torch.device): The target device for the data.
|
2022-07-14 16:25:55 +08:00
|
|
|
metric_classes(List): List of Metric class that uses to collect metrics
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: The prediction results.
|
|
|
|
|
"""
|
|
|
|
|
model.eval()
|
|
|
|
|
dataset = data_loader.dataset
|
|
|
|
|
with tqdm(total=len(dataset), desc='test samples') as pbar:
|
|
|
|
|
for data in data_loader:
|
2022-08-16 12:04:07 +08:00
|
|
|
data = to_device(data, device)
|
2022-07-14 16:25:55 +08:00
|
|
|
with torch.no_grad():
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
if isinstance(data, Mapping) and not func_receive_dict_inputs(
|
|
|
|
|
model.forward):
|
|
|
|
|
result = model.forward(**data)
|
2022-07-20 16:36:11 +08:00
|
|
|
else:
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
result = model.forward(data)
|
2022-07-14 16:25:55 +08:00
|
|
|
if metric_classes is not None:
|
|
|
|
|
for metric_cls in metric_classes:
|
|
|
|
|
metric_cls.add(result, data)
|
|
|
|
|
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
if isinstance(data, dict):
|
|
|
|
|
batch_size = len(next(iter(data.values())))
|
|
|
|
|
else:
|
|
|
|
|
batch_size = len(data)
|
2022-07-14 16:25:55 +08:00
|
|
|
for _ in range(batch_size):
|
|
|
|
|
pbar.update()
|
|
|
|
|
|
2022-08-02 14:49:48 +08:00
|
|
|
metric_values = {}
|
|
|
|
|
for metric_cls in metric_classes:
|
|
|
|
|
metric_values.update(metric_cls.evaluate())
|
|
|
|
|
|
|
|
|
|
return metric_values
|
|
|
|
|
|
2022-07-14 16:25:55 +08:00
|
|
|
|
|
|
|
|
def multi_gpu_test(model,
|
|
|
|
|
data_loader,
|
2022-08-16 12:04:07 +08:00
|
|
|
device,
|
2022-07-14 16:25:55 +08:00
|
|
|
tmpdir=None,
|
|
|
|
|
gpu_collect=False,
|
|
|
|
|
metric_classes=None):
|
|
|
|
|
"""Test model with multiple gpus.
|
|
|
|
|
|
|
|
|
|
This method tests model with multiple gpus and collects the results
|
|
|
|
|
under two different modes: gpu and cpu modes. By setting
|
|
|
|
|
``gpu_collect=True``, it encodes results to gpu tensors and use gpu
|
|
|
|
|
communication for results collection. On cpu mode it saves the results on
|
|
|
|
|
different gpus to ``tmpdir`` and collects them by the rank 0 worker.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
model (nn.Module): Model to be tested.
|
|
|
|
|
data_loader (nn.Dataloader): Pytorch data loader.
|
2022-08-16 12:04:07 +08:00
|
|
|
device: (str | torch.device): The target device for the data.
|
2022-07-14 16:25:55 +08:00
|
|
|
tmpdir (str): Path of directory to save the temporary results from
|
|
|
|
|
different gpus under cpu mode.
|
|
|
|
|
gpu_collect (bool): Option to use either gpu or cpu to collect results.
|
|
|
|
|
metric_classes(List): List of Metric class that uses to collect metrics
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: The prediction results.
|
|
|
|
|
"""
|
|
|
|
|
model.eval()
|
|
|
|
|
results = []
|
2022-07-28 17:43:23 +08:00
|
|
|
data_list = []
|
2022-07-14 16:25:55 +08:00
|
|
|
dataset = data_loader.dataset
|
|
|
|
|
|
|
|
|
|
time.sleep(2) # This line can prevent deadlock problem in some cases.
|
|
|
|
|
|
2022-07-28 17:43:23 +08:00
|
|
|
rank, world_size = get_dist_info()
|
|
|
|
|
|
2022-07-14 16:25:55 +08:00
|
|
|
count = 0
|
|
|
|
|
with tqdm(total=len(dataset), desc='test samples with multi gpus') as pbar:
|
|
|
|
|
for _, data in enumerate(data_loader):
|
2022-08-16 12:04:07 +08:00
|
|
|
data = to_device(data, device)
|
2022-07-28 17:43:23 +08:00
|
|
|
data_list.append(data)
|
2022-07-14 16:25:55 +08:00
|
|
|
with torch.no_grad():
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
if isinstance(data, Mapping) and not func_receive_dict_inputs(
|
|
|
|
|
model.forward):
|
|
|
|
|
result = model.forward(**data)
|
2022-07-20 16:36:11 +08:00
|
|
|
else:
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
result = model.forward(data)
|
2022-07-28 17:43:23 +08:00
|
|
|
results.append(result)
|
2022-07-14 16:25:55 +08:00
|
|
|
|
|
|
|
|
if rank == 0:
|
[to #42322933] add/refactor nlp models source code and finetune
1. add sbert,veco,palm,space source code
2. support sbert sequence classification, token classification finetune
3. support veco sequence classification finetune
4. support palm nlg finetune
evaluation result: https://sheet.alibaba-inc.com/#/sheet/f7fdcc7f22bd5105 sheet:Maas
5. add ut for finetunes
6. add veco's taskdataset processor
7. add a common trainer for nlp, and a specific trainer for veco
8. merge some duplicate codes of models, preprocessors, pipelines
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9574105
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'master' into finetune_hooks_metrics
# Conflicts:
# modelscope/metainfo.py
* pre-commit passed
* add basic class of hook&metrics
* pre-commit passed
* change some comments
* pre commit passed
* 1. remove accuracy's groups 2. remove useless hooks 3. simplify priorities
* pre-commit passed
* fix a comment
* Merge branch 'feat/finetune' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune
* mv hooks related to modelscope/trainers/hooks
* mv priority back
* add torch mdoel base and test
* update hooks, trainer, import_util
* add torch epoch based trainer and dis utils
* add hooks
* fix warmup
* format code stype and fix warmup and add warmup unittest
* fix impls
* pre-commit check passed
* update hook and add EpochBasedTrainer
* add trainer unittest
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/models/base_torch.py
# modelscope/trainers/hooks/hook.py
# modelscope/trainers/trainer.py
* update unittest name
* rewrite taskdataset to trainer
* fix trainer and add unittest
* add unittest
* code: run to forward
* run through... but ugly code
* arrange some cls
* fix some errs
* revert some mistakes
* init check in
* Merge branch 'feat/add_hooks' into feat/add_task
# Conflicts:
# modelscope/trainers/trainer.py
* test with bigger epoch and size
* add the default metrics class
* move build metrics code to a method
* merge add_task
* merge origin add_task
* add device initialization
* remove preprocessor arg for bool
* add task models
* move metric collect logic to metrics class
* pre-commit passed
* fix cr comments
* precommit passed
* add task models
* Merge remote-tracking branch 'origin/feat/add_task' into feat/backbone_head
* add comment
* change comment formats.
* fix comments
* fix ut bug
* fix comments
* add wrapper check
* fix comments
* pre commit passed
* fix cr comments
* solve a loop import problem
* fix ut bug
* fix ut errors
* change dummydataset to msdataset
* precommit passed
* merge add task
* backbone-head is build, model is not correctly loaded
* model load states matched
* result matched
* lint
* add veco/palm_v2 code
* merge master
* merge master success running
* add repr model name level
* Merge branch 'feat/veco_palm' into feat/finetune_sbert_veco
* model test for training
* add token-classification metric add formal ut
* fix running bug
* finetune and pipeline are working with backbone-head
* add nli
* add missing code
* finetune and pipeline are working with backbone-head
* Merge branch 'feat/backbone_head' of http://gitlab.alibaba-inc.com/Ali-MaaS/MaaS-lib into feat/backbone_head
* add a test repo for pr
* remove merge conflicted file
* remove merge conflicted file 1
* lint check
* import error
* none type bug fix
* forward input unpacking or dict bug
* move head into models, add build_backbone with registry, no base method
* merge master
* feat: 1. add interleave dataset method 2. support multiple dataset in trainer.build_dataset 3. support 3 sub tasks in sequence_classification task
* unfinished
* update the task model structure in NLP field
* merge master
* update by comments
* keep the default model id as current on production
* unfinished
* unfinished
* veco can run
* Merge remote-tracking branch 'origin/master' into feat/backbone_head
* add taskmodel for module management
* remove forward_input_is_dict
* unfinished
* token classification started
* update base model structure
* move space to backbone
* remove 'type' in build_from_cfg method
* test update
* bug fix
* on tesing, mess code
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# requirements/multi-modal.txt
* add missing merge
* add sofa source code
* refactor
* add veco task dataset
* add veco task dataset
* pre-commit passed
* fix bug of log
* add some features
* merge master
* bug fix
* refine nlp models
* fix the training error
* unfinished
* refactor pipeline
* Merge branch 'feat/backbone_head' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metrics/builder.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* Merge commit 'ab04ceafc5453ce7daa9aa09e37a55f703072a10' into feat/refactor_nlp_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/builder.py
# modelscope/models/__init__.py
# modelscope/models/base/base_torch_model.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/space/model/intent_unified_transformer.py
# modelscope/models/nlp/backbones/space/model/model_base.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/__init__.py
# modelscope/models/nlp/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space_for_dialog_modeling.py
# modelscope/models/nlp/space_for_dialog_state_tracking.py
# modelscope/models/nlp/task_model.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/preprocessors/base.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
* revert changes
* unify sentnece classification postprocess
* revert some changes, move some model files
* pipeline first case run through
* ws pipeline passed
* Merge branch 'feat/refactor_nlp_730' into feat/finetune_sbert_veco
* finetune
* revert code
* revert some code
* ws finetune started, only the accuracy is weird
* Merge branch 'feat/veco_taskdataset' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/task_datasets/veco_dataset.py
# tests/taskdataset/test_veco_dataset.py
* veco+nli finetune started
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/trainers/trainer.py
* add trainer for nlp
* trainer: dataset params passed into preprocessor
* test passed by nlptrainer
* fix some bugs
* fix some bugs
* add backbone/head subclass
* fix regression bugs
* fix bug in token-cls finetune
* support cfg modification
* fix bug
* fix bug
* update requirements
* add some comments and fix some t
* add some comments and revert a argument
* split to two test files
* revert code
* fixbug in precessor
(cherry picked from commit 7a648d096ef8500c694d3255dabe29e6f4bfc3e5)
* fix ut bug
* support sbert models
* unfinished
* Merge branch 'feat/finetune_sbert_veco' into sly_tmp_veco_finetune
# Conflicts:
# tests/trainers/test_finetune_sequence_classification.py
* fixbug in veco
* fix bug
* fixbug
* correct running params
* remove useless files
* add palm finetuning with cnn_dailymail dataset
* copy space model from sofa
* Merge branch 'feat/finetune_sbert_veco' of gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib into feat/finetune_sbert_veco
* Merge branch 'master' into feat/finetune_sbert_veco
# Conflicts:
# modelscope/metrics/__init__.py
# modelscope/models/__init__.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/modeling_sbert.py
# modelscope/models/nlp/heads/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_nli.py
# modelscope/models/nlp/sbert_for_sentence_similarity.py
# modelscope/models/nlp/sbert_for_sentiment_classification.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/models/nlp/sequence_classification.py
# modelscope/models/nlp/space/space_for_dialog_intent_prediction.py
# modelscope/models/nlp/space/space_for_dialog_modeling.py
# modelscope/models/nlp/space/space_for_dialog_state_tracking.py
# modelscope/models/nlp/structbert/adv_utils.py
# modelscope/models/nlp/structbert/configuration_sbert.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/pipelines/__init__.py
# modelscope/pipelines/nlp/__init__.py
# modelscope/pipelines/nlp/fill_mask_pipeline.py
# modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
# modelscope/pipelines/nlp/nli_pipeline.py
# modelscope/pipelines/nlp/sentence_similarity_pipeline.py
# modelscope/pipelines/nlp/sentiment_classification_pipeline.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/pipelines/nlp/word_segmentation_pipeline.py
# modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/task_datasets/__init__.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# requirements/nlp.txt
# tests/pipelines/test_nli.py
# tests/pipelines/test_sentence_similarity.py
# tests/pipelines/test_sentiment_classification.py
* fix imports
* mark backbone in their own modeling
* pre-commit check passed
* pre-commit passed, remove roberta model
* fix a bug in ast import
* skip all finetune uts
* fix bugs
* pre-commit passed
* bug fixed
* bug fixed
* bug fixed
* bug fixed
* fix ut bug
* fix bug
* fix ut bug
* fix bug
* fix bug
* fixbugs
* fixbug
* revert veco
* revert veco because of core dump
* fix palm bug
* revert veco
* revert mistaken code
* add a test print
* pre-commit check
* test exception
* add test code
* for test
* fix bug and test
* remove test code
* remove useless file
* 1. fix some bugs 2. add backbone ut
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/metainfo.py
# modelscope/metrics/sequence_classification_metric.py
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/task_models/task_model.py
# modelscope/preprocessors/__init__.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
# modelscope/utils/file_utils.py
# tests/trainers/test_trainer_with_nlp.py
* pre-commit passed
* revert files
* increase test level
* unregister models
* fix bugs
* fix cr comments
* fix bug in backbone-head
* add sbert backbone
* fix bug
* add test for token-cls-metric
* pre-commit passed
* fix ut comments
* revert normal tokenizer to fast tokenizer
* Merge branch 'master' into feat/finetune_refactor_730
# Conflicts:
# modelscope/models/nlp/__init__.py
# modelscope/models/nlp/backbones/__init__.py
# modelscope/models/nlp/backbones/structbert/__init__.py
# modelscope/models/nlp/masked_language.py
# modelscope/models/nlp/palm_v2/palm_for_text_generation.py
# modelscope/models/nlp/sbert_for_sequence_classification.py
# modelscope/models/nlp/sbert_for_token_classification.py
# modelscope/models/nlp/sbert_for_zero_shot_classification.py
# modelscope/pipelines/nlp/text_generation_pipeline.py
# modelscope/preprocessors/nlp.py
# modelscope/trainers/trainer.py
# modelscope/trainers/utils/inference.py
* fix merge bugs
* pre commit passed
* fix bug
* fix bug
* fix bug
* fix bug from master
* add print
* fix ut bug
* fix bug
* Merge branch 'master' into feat/finetune_refactor_730
* skip task model test
2022-08-03 18:38:41 +08:00
|
|
|
if isinstance(data, dict):
|
|
|
|
|
batch_size = len(next(iter(data.values())))
|
|
|
|
|
else:
|
|
|
|
|
batch_size = len(data)
|
2022-07-14 16:25:55 +08:00
|
|
|
batch_size_all = batch_size * world_size
|
|
|
|
|
count += batch_size_all
|
|
|
|
|
if count > len(dataset):
|
|
|
|
|
batch_size_all = len(dataset) - (count - batch_size_all)
|
|
|
|
|
for _ in range(batch_size_all):
|
|
|
|
|
pbar.update()
|
|
|
|
|
|
2022-07-28 17:43:23 +08:00
|
|
|
# TODO: allgather data list may cost a lot of memory and needs to be redesigned
|
|
|
|
|
# collect results and data from all ranks
|
2022-07-14 16:25:55 +08:00
|
|
|
if gpu_collect:
|
|
|
|
|
results = collect_results_gpu(results, len(dataset))
|
2022-07-28 17:43:23 +08:00
|
|
|
data_list = collect_results_gpu(data_list, len(dataset))
|
2022-07-14 16:25:55 +08:00
|
|
|
else:
|
2022-07-28 17:43:23 +08:00
|
|
|
if tmpdir is None:
|
|
|
|
|
tmpdir = make_tmp_dir()
|
|
|
|
|
results = collect_results_cpu(results, len(dataset),
|
|
|
|
|
os.path.join(tmpdir, 'predict'))
|
|
|
|
|
data_list = collect_results_cpu(data_list, len(dataset),
|
|
|
|
|
os.path.join(tmpdir, 'groundtruth'))
|
|
|
|
|
|
|
|
|
|
if is_master():
|
|
|
|
|
assert len(data_list) == len(
|
|
|
|
|
results), f'size mismatch {len(data_list)} and {len(results)}'
|
|
|
|
|
if metric_classes is not None:
|
|
|
|
|
for i in range(len(data_list)):
|
|
|
|
|
for metric_cls in metric_classes:
|
|
|
|
|
metric_cls.add(results[i], data_list[i])
|
2022-07-14 16:25:55 +08:00
|
|
|
|
2022-08-02 14:49:48 +08:00
|
|
|
metric_values = {}
|
|
|
|
|
if rank == 0:
|
|
|
|
|
for metric_cls in metric_classes:
|
|
|
|
|
metric_values.update(metric_cls.evaluate())
|
|
|
|
|
if world_size > 1:
|
|
|
|
|
metric_values = broadcast(metric_values, 0)
|
|
|
|
|
|
|
|
|
|
return metric_values
|
|
|
|
|
|
2022-07-14 16:25:55 +08:00
|
|
|
|
|
|
|
|
def collect_results_cpu(result_part, size, tmpdir=None):
|
|
|
|
|
"""Collect results under cpu mode.
|
|
|
|
|
|
|
|
|
|
On cpu mode, this function will save the results on different gpus to
|
|
|
|
|
``tmpdir`` and collect them by the rank 0 worker.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
result_part (list): Result list containing result parts
|
|
|
|
|
to be collected.
|
|
|
|
|
size (int): Size of the results, commonly equal to length of
|
|
|
|
|
the results.
|
|
|
|
|
tmpdir (str | None): temporal directory for collected results to
|
|
|
|
|
store. If set to None, it will create a random temporal directory
|
|
|
|
|
for it.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: The collected results.
|
|
|
|
|
"""
|
|
|
|
|
rank, world_size = get_dist_info()
|
|
|
|
|
if tmpdir is None:
|
2022-07-28 17:43:23 +08:00
|
|
|
tmpdir = make_tmp_dir()
|
|
|
|
|
if not os.path.exists(tmpdir) and is_master():
|
2022-07-14 16:25:55 +08:00
|
|
|
os.makedirs(tmpdir)
|
2022-07-28 17:43:23 +08:00
|
|
|
dist.barrier()
|
|
|
|
|
|
2022-07-14 16:25:55 +08:00
|
|
|
# dump the part result to the dir
|
2022-07-28 17:43:23 +08:00
|
|
|
with open(os.path.join(tmpdir, f'part_{rank}.pkl'), 'wb') as f:
|
|
|
|
|
pickle.dump(result_part, f)
|
2022-07-14 16:25:55 +08:00
|
|
|
dist.barrier()
|
|
|
|
|
# collect all parts
|
|
|
|
|
if rank != 0:
|
|
|
|
|
return None
|
|
|
|
|
else:
|
|
|
|
|
# load results of all parts from tmp dir
|
|
|
|
|
part_list = []
|
|
|
|
|
for i in range(world_size):
|
|
|
|
|
part_file = os.path.join(tmpdir, f'part_{i}.pkl')
|
2022-07-28 17:43:23 +08:00
|
|
|
with open(part_file, 'rb') as f:
|
|
|
|
|
part_result = pickle.load(f)
|
2022-07-14 16:25:55 +08:00
|
|
|
# When data is severely insufficient, an empty part_result
|
|
|
|
|
# on a certain gpu could makes the overall outputs empty.
|
|
|
|
|
if part_result:
|
|
|
|
|
part_list.append(part_result)
|
|
|
|
|
# sort the results
|
|
|
|
|
ordered_results = []
|
|
|
|
|
for res in zip(*part_list):
|
|
|
|
|
ordered_results.extend(list(res))
|
|
|
|
|
# the dataloader may pad some samples
|
|
|
|
|
ordered_results = ordered_results[:size]
|
|
|
|
|
# remove tmp dir
|
|
|
|
|
shutil.rmtree(tmpdir)
|
|
|
|
|
return ordered_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def collect_results_gpu(result_part, size):
|
|
|
|
|
"""Collect results under gpu mode.
|
|
|
|
|
|
|
|
|
|
On gpu mode, this function will encode results to gpu tensors and use gpu
|
|
|
|
|
communication for results collection.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
result_part (list): Result list containing result parts
|
|
|
|
|
to be collected.
|
|
|
|
|
size (int): Size of the results, commonly equal to length of
|
|
|
|
|
the results.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: The collected results.
|
|
|
|
|
"""
|
|
|
|
|
rank, world_size = get_dist_info()
|
|
|
|
|
# dump result part to tensor with pickle
|
|
|
|
|
part_tensor = torch.tensor(
|
|
|
|
|
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
|
|
|
|
|
# gather all result part tensor shape
|
|
|
|
|
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
|
|
|
|
|
shape_list = [shape_tensor.clone() for _ in range(world_size)]
|
|
|
|
|
dist.all_gather(shape_list, shape_tensor)
|
|
|
|
|
# padding result part tensor to max length
|
|
|
|
|
shape_max = torch.tensor(shape_list).max()
|
|
|
|
|
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
|
|
|
|
|
part_send[:shape_tensor[0]] = part_tensor
|
|
|
|
|
part_recv_list = [
|
|
|
|
|
part_tensor.new_zeros(shape_max) for _ in range(world_size)
|
|
|
|
|
]
|
|
|
|
|
# gather all result part
|
|
|
|
|
dist.all_gather(part_recv_list, part_send)
|
|
|
|
|
|
|
|
|
|
if rank == 0:
|
|
|
|
|
part_list = []
|
|
|
|
|
for recv, shape in zip(part_recv_list, shape_list):
|
|
|
|
|
part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
|
|
|
|
|
# When data is severely insufficient, an empty part_result
|
|
|
|
|
# on a certain gpu could makes the overall outputs empty.
|
|
|
|
|
if part_result:
|
|
|
|
|
part_list.append(part_result)
|
|
|
|
|
# sort the results
|
|
|
|
|
ordered_results = []
|
|
|
|
|
for res in zip(*part_list):
|
|
|
|
|
ordered_results.extend(list(res))
|
|
|
|
|
# the dataloader may pad some samples
|
|
|
|
|
ordered_results = ordered_results[:size]
|
|
|
|
|
return ordered_results
|