diff --git a/README.md b/README.md index 4a4ce792..d3d92865 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@

English | - 中文 + 中文 | + 日本語

diff --git a/README_ja.md b/README_ja.md new file mode 100644 index 00000000..073b0c48 --- /dev/null +++ b/README_ja.md @@ -0,0 +1,300 @@ + +

+
+ +
+

+ +

+ +[![PyPI](https://img.shields.io/pypi/v/modelscope)](https://pypi.org/project/modelscope/) + +[![license](https://img.shields.io/github/license/modelscope/modelscope.svg)](https://github.com/modelscope/modelscope/blob/master/LICENSE) +[![open issues](https://isitmaintained.com/badge/open/modelscope/modelscope.svg)](https://github.com/modelscope/modelscope/issues) +[![GitHub pull-requests](https://img.shields.io/github/issues-pr/modelscope/modelscope.svg)](https://GitHub.com/modelscope/modelscope/pull/) +[![GitHub latest commit](https://badgen.net/github/last-commit/modelscope/modelscope)](https://GitHub.com/modelscope/modelscope/commit/) +[![Leaderboard](https://img.shields.io/badge/ModelScope-Check%20Your%20Contribution-orange)](https://opensource.alibaba.com/contribution_leaderboard/details?projectValue=modelscope) + + + + +

+

+ English | + 中文 | + 日本語 +

+

+ + +
+ +# はじめに + +[ModelScope](https://www.modelscope.cn) は、"Model-as-a-Service"(MaaS) の概念に基づいて構築されています。AI コミュニティから最も先進的な機械学習モデルを集め、実世界のアプリケーションで AI モデルを活用するプロセスを合理化することを目指しています。このリポジトリでオープンソース化されている中核となる ModelScope ライブラリは、開発者がモデルの推論、トレーニング、評価を実行するためのインターフェースと実装を提供します。 + + +特に、API 抽象化の豊富なレイヤーにより、ModelScope ライブラリは、CV、NLP、音声、マルチモダリティ、科学計算などのドメインにまたがる最先端のモデルを探索するための統一された体験を提供します。様々な分野のモデル貢献者は、レイヤー化された API を通じてモデルを ModelScope エコシステムに統合することができ、モデルへの容易で統一されたアクセスを可能にします。一旦統合されると、モデルの推論、微調整、および評価は、わずか数行のコードで行うことができます。一方、モデルアプリケーションの様々なコンポーネントを必要に応じてカスタマイズできるように、柔軟性も提供されています。 + +ModelScope ライブラリは、様々なモデルの実装を保持するだけでなく、ModelScope のバックエンドサービス、特に Model-Hub と Dataset-Hub との必要な相互作用も可能にします。このような相互作用により、エンティティの検索、バージョン管理、キャッシュ管理など、様々なエンティティ(モデルやデータセット)の管理をアンダーザフードでシームレスに実行することができます。 + +# モデルとオンラインアクセシビリティ + +[ModelScope](https://www.modelscope.cn) では、NLP、CV、オーディオ、マルチモダリティ、科学のための AI などの分野の最新開発を網羅した、何百ものモデルが一般公開されています(700 以上、カウント中)。これらのモデルの多くは、特定の分野における SOTA を代表するものであり、ModelScope でオープンソースとしてデビューしました。ユーザーは、ModelScope([modelscope.cn](http://www.modelscope.cn)) にアクセスし、数回クリックするだけで、オンライン体験を通じて、これらのモデルがどのように機能するかを直接体験することができます。また、[ModelScope](https://www.modelscope.cn) をワンクリックするだけで、クラウド上のすぐに使える CPU/GPU 開発環境に支えられた ModelScope ノートブックを通じて、すぐに開発者体験が可能です。 + + +

+
+ +
+

+ +代表的な例をいくつか挙げると: + +NLP: + +* [nlp_gpt3_text-generation_2.7B](https://modelscope.cn/models/damo/nlp_gpt3_text-generation_2.7B) + +* [ChatYuan-large](https://modelscope.cn/models/ClueAI/ChatYuan-large) + +* [mengzi-t5-base](https://modelscope.cn/models/langboat/mengzi-t5-base) + +* [nlp_csanmt_translation_en2zh](https://modelscope.cn/models/damo/nlp_csanmt_translation_en2zh) + +* [nlp_raner_named-entity-recognition_chinese-base-news](https://modelscope.cn/models/damo/nlp_raner_named-entity-recognition_chinese-base-news) + +* [nlp_structbert_word-segmentation_chinese-base](https://modelscope.cn/models/damo/nlp_structbert_word-segmentation_chinese-base) + +* [Erlangshen-RoBERTa-330M-Sentiment](https://modelscope.cn/models/fengshenbang/Erlangshen-RoBERTa-330M-Sentiment) + +* [nlp_convai_text2sql_pretrain_cn](https://modelscope.cn/models/damo/nlp_convai_text2sql_pretrain_cn) + +マルチモーダル: + +* [multi-modal_clip-vit-base-patch16_zh](https://modelscope.cn/models/damo/multi-modal_clip-vit-base-patch16_zh) + +* [ofa_pretrain_base_zh](https://modelscope.cn/models/damo/ofa_pretrain_base_zh) + +* [Taiyi-Stable-Diffusion-1B-Chinese-v0.1](https://modelscope.cn/models/fengshenbang/Taiyi-Stable-Diffusion-1B-Chinese-v0.1) + +* [mplug_visual-question-answering_coco_large_en](https://modelscope.cn/models/damo/mplug_visual-question-answering_coco_large_en) + +CV: + +* [cv_controlnet_controllable-image-generation_nine-annotators](https://modelscope.cn/models/dienstag/cv_controlnet_controllable-image-generation_nine-annotators/summary) + +* [cv_tinynas_object-detection_damoyolo](https://modelscope.cn/models/damo/cv_tinynas_object-detection_damoyolo) + +* [cv_unet_person-image-cartoon_compound-models](https://modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models) + +* [cv_convnextTiny_ocr-recognition-general_damo](https://modelscope.cn/models/damo/cv_convnextTiny_ocr-recognition-general_damo) + +* [cv_resnet18_human-detection](https://modelscope.cn/models/damo/cv_resnet18_human-detection) + +* [cv_resnet50_face-detection_retinaface](https://modelscope.cn/models/damo/cv_resnet50_face-detection_retinaface) + +* [cv_unet_image-matting](https://modelscope.cn/models/damo/cv_unet_image-matting) + +* [cv_F3Net_product-segmentation](https://modelscope.cn/models/damo/cv_F3Net_product-segmentation) + +* [cv_resnest101_general_recognition](https://modelscope.cn/models/damo/cv_resnest101_general_recognition) + + +音声: + +* [speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch) + +* [speech_sambert-hifigan_tts_zh-cn_16k](https://modelscope.cn/models/damo/speech_sambert-hifigan_tts_zh-cn_16k) + +* [speech_charctc_kws_phone-xiaoyun](https://modelscope.cn/models/damo/speech_charctc_kws_phone-xiaoyun) + +* [u2pp_conformer-asr-cn-16k-online](https://modelscope.cn/models/wenet/u2pp_conformer-asr-cn-16k-online) + +* [speech_fsmn_vad_zh-cn-16k-common-pytorch](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) + +* [punc_ct-transformer_zh-cn-common-vocab272727-pytorch](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary) + +* [speech_frcrn_ans_cirm_16k](https://modelscope.cn/models/damo/speech_frcrn_ans_cirm_16k) + +* [speech_dfsmn_aec_psm_16k](https://modelscope.cn/models/damo/speech_dfsmn_aec_psm_16k) + + + +科学用 AI: + +* [uni-fold-monomer](https://modelscope.cn/models/DPTech/uni-fold-monomer/summary) + +* [uni-fold-multimer](https://modelscope.cn/models/DPTech/uni-fold-multimer/summary) + +**注:** ModelScope のほとんどのモデルは公開されており、アカウント登録なしで modelscope のウェブサイト([www.modelscope.cn](www.modelscope.cn))からダウンロードすることができます。modelscope のライブラリや git が提供する api を使用してモデルをダウンロードするには、[モデルのダウンロード](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E4%B8%8B%E8%BD%BD)の説明を参照してください。 + +# クイックツアー + +様々なタスクに対して、`pipeline` による推論、`Trainer` による微調整と評価のための統一されたインターフェースを提供します。 + +入力の種類(画像、テキスト、音声、動画...)を問わず、推論パイプラインはわずか数行のコードで実装することができます。: + +```python +>>> from modelscope.pipelines import pipeline +>>> word_segmentation = pipeline('word-segmentation',model='damo/nlp_structbert_word-segmentation_chinese-base') +>>> word_segmentation('今天天气不错,适合出去游玩') +{'output': '今天 天气 不错 , 适合 出去 游玩'} +``` + +画像があれば、ポートレート・マット(別名、背景除去)は次のコード・スニペットで実現できます: + +![image](data/resource/portrait_input.png) + +```python +>>> import cv2 +>>> from modelscope.pipelines import pipeline + +>>> portrait_matting = pipeline('portrait-matting') +>>> result = portrait_matting('https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_matting.png') +>>> cv2.imwrite('result.png', result['output_img']) +``` + +背景を除去した出力画像は次のようになります: +![image](data/resource/portrait_output.png) + + +ファインチューニングと評価も、トレーニングデータセットとトレーナーをセットアップする数行のコードで行うことができ、モデルのトレーニングと評価の重い作業は `traner.train()` と `trainer.evaluate()` インターフェースの実装に +カプセル化されています。 + +例えば、gpt3 の基本モデル(1.3B)を中国語詩のデータセットでファインチューニングすることで、中国語詩の生成に使用できるモデルを得ることができる。 + +```python +>>> from modelscope.metainfo import Trainers +>>> from modelscope.msdatasets import MsDataset +>>> from modelscope.trainers import build_trainer + +>>> train_dataset = MsDataset.load('chinese-poetry-collection', split='train'). remap_columns({'text1': 'src_txt'}) +>>> eval_dataset = MsDataset.load('chinese-poetry-collection', split='test').remap_columns({'text1': 'src_txt'}) +>>> max_epochs = 10 +>>> tmp_dir = './gpt3_poetry' + +>>> kwargs = dict( + model='damo/nlp_gpt3_text-generation_1.3B', + train_dataset=train_dataset, + eval_dataset=eval_dataset, + max_epochs=max_epochs, + work_dir=tmp_dir) + +>>> trainer = build_trainer(name=Trainers.gpt3_trainer, default_args=kwargs) +>>> trainer.train() +``` + +# ModelScope ライブラリを使用する理由 + +1. 統一された簡潔なユーザーインターフェースは、異なるタスクや異なるモデル用に抽象化されている。モデルの推論とトレーニングは、それぞれわずか 3 行と 10 行のコードで実装できる。ModelScope コミュニティで異なる分野のモデルを探索するのに便利です。ModelScope に統合されたモデルはすべてすぐに使用できるため、教育現場でも産業現場でも、AI を簡単に使い始めることができます。 + +2. ModelScope は、モデル中心の開発とアプリケーション体験を提供します。モデルのトレーニング、推論、エクスポート、デプロイメントのサポートを合理化し、ユーザーが ModelScope エコシステムに基づいて独自の MLO を構築することを容易にします。 + +3. モデルの推論とトレーニングのプロセスでは、モジュール設計が導入され、豊富な機能モジュールの実装が提供され、ユーザーが独自のモデルの推論、トレーニング、その他のプロセスをカスタマイズするのに便利です。 + +4. 分散モデル学習、特に大規模モデルに対しては、データ並列、モデル並列、ハイブリッド並列など、豊富な学習ストラテジーサポートを提供する。 + +# インストール + +## Docker + +ModelScope ライブラリは現在、PyTorch、TensorFlow、ONNX を含む、モデルの学習と推論のための一般的なディープラーニングフレームワークをサポートしています。すべてのリリースは、Python 3.7+、Pytorch 1.8+、Tensorflow1.15、または Tensorflow2.0+ でテストされ、実行されます。 + +ModelScope のすべてのモデルをすぐに使えるようにするため、すべてのリリースで公式の docker イメージが提供されています。開発者はこの docker イメージをベースに、環境のインストールや設定をすべて省略して直接使用することができます。現在、CPU イメージと GPU イメージの最新バージョンは以下から入手できます: + +CPU docker イメージ +```shell +# py37 +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-py37-torch1.11.0-tf1.15.5-1.6.1 + +# py38 +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-py38-torch1.11.0-tf1.15.5-1.6.1 +``` + +GPU docker イメージ +```shell +# py37 +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-1.6.1 + +# py38 +registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.3.0-py38-torch1.11.0-tf1.15.5-1.6.1 +``` + +## ローカル Python 環境のセットアップ + +pip と conda を使って、ModelScope のローカル環境を構築することもできます。 ローカルの Python 環境を構築するには [anaconda](https://docs.anaconda.com/anaconda/install/) をお勧めします: + +```shell +conda create -n modelscope python=3.7 +conda activate modelscope +``` + +PyTorch または TensorFlow は、それぞれのモデルの要件に応じて個別にインストールすることができます。 +* pytorch のインストール [doc](https://pytorch.org/get-started/locally/) +* Tensorflow のインストール [doc](https://www.tensorflow.org/install/pip) + +必要な機械学習フレームワークをインストールした後、以下のように modelscope ライブラリをインストールします: + +モデル/データセットのダウンロードを試したり、modelscope フレームワークで遊びたいだけなら、modelscope のコア・コンポーネントをインストールすることができます: +```shell +pip install modelscope +``` + +マルチモーダルモデルを使いたい場合: +```shell +pip install modelscope[multi-modal] +``` + +nlp モデルを使いたい場合: +```shell +pip install modelscope[nlp] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +CV モデルを使いたい場合: +```shell +pip install modelscope[cv] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +オーディオモデルを使用したい場合: +```shell +pip install modelscope[audio] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +科学モデルを使いたい場合: +```shell +pip install modelscope[science] -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html +``` + +`備考`: +1. 現在、一部のオーディオタスクモデルは python3.7、tensorflow1.15.4 の Linux 環境のみに対応しています。他のほとんどのモデルは Windows と Mac(x86) にインストールして使うことができます。 + +2. オーディオ分野では、wav ファイルの処理にサードパーティ製のライブラリ SoundFile を使用している機種がある。Linux では、SoundFile の libsndfile([doc link](https://github.com/bastibe/python-soundfile#installation)) を手動でインストールする必要があります。Windows や MacOS では、ユーザーが操作しなくても自動的にインストールされる。例えば、Ubuntu の場合、以下のコマンドでインストールできます: + ```shell + sudo apt-get update + sudo apt-get install libsndfile1 + ``` + +3. コンピュータビジョンのモデルによっては mmcv-full が必要です。mmcv [インストールガイド](https://github.com/open-mmlab/mmcv#installation)を参照してください。最小限のインストールは以下の通りです: + + ```shell + pip uninstall mmcv # mmcv をインストールしている場合は、アンインストールしてください + pip install -U openmim + mim install mmcv-full + ``` + + + +# 詳細 + +私たちは、以下のような追加書類を提供します: +* [より詳細なインストールガイド](https://modelscope.cn/docs/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85) +* [タスクの紹介](https://modelscope.cn/docs/%E4%BB%BB%E5%8A%A1%E7%9A%84%E4%BB%8B%E7%BB%8D) +* [モデル推論にパイプラインを使う](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E6%8E%A8%E7%90%86Pipeline) +* [ファインチューニング例](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AE%AD%E7%BB%83Train) +* [データの前処理](https://modelscope.cn/docs/%E6%95%B0%E6%8D%AE%E7%9A%84%E9%A2%84%E5%A4%84%E7%90%86) +* [評価](https://modelscope.cn/docs/%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0) +* [ModelScope に自分のモデルを投稿する](https://modelscope.cn/docs/ModelScope%E6%A8%A1%E5%9E%8B%E6%8E%A5%E5%85%A5%E6%B5%81%E7%A8%8B%E6%A6%82%E8%A7%88) + +# ライセンス + +このプロジェクトのライセンスは [Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE) です。 diff --git a/README_zh.md b/README_zh.md index f5401f33..7cac99fb 100644 --- a/README_zh.md +++ b/README_zh.md @@ -21,7 +21,8 @@

English | - 中文 + 中文 | + 日本語

diff --git a/examples/pytorch/auto_speech_recognition/finetune_speech_recognition.py b/examples/pytorch/auto_speech_recognition/finetune_speech_recognition.py index 4d62f66f..47af0b90 100644 --- a/examples/pytorch/auto_speech_recognition/finetune_speech_recognition.py +++ b/examples/pytorch/auto_speech_recognition/finetune_speech_recognition.py @@ -1,15 +1,19 @@ import os from modelscope.metainfo import Trainers -from modelscope.msdatasets.audio.asr_dataset import ASRDataset +from modelscope.msdatasets.dataset_cls.custom_datasets import ASRDataset from modelscope.trainers import build_trainer +from modelscope.utils.constant import DownloadMode def modelscope_finetune(params): if not os.path.exists(params.output_dir): os.makedirs(params.output_dir, exist_ok=True) # dataset split ["train", "validation"] - ds_dict = ASRDataset.load(params.data_path, namespace='speech_asr') + ds_dict = ASRDataset.load( + params.data_path, + namespace='speech_asr', + download_mode=params.download_mode) kwargs = dict( model=params.model, data_dir=ds_dict, @@ -36,5 +40,6 @@ if __name__ == '__main__': # 如果dataset_type="large",batch_bins单位为毫秒, params.max_epoch = 50 # 最大训练轮数 params.lr = 0.00005 # 设置学习率 + params.download_mode = DownloadMode.FORCE_REDOWNLOAD # 重新下载数据,否则设置为默认值DownloadMode.REUSE_DATASET_IF_EXISTS modelscope_finetune(params) diff --git a/examples/pytorch/baichuan/finetune_baichuan.py b/examples/pytorch/baichuan/finetune_baichuan.py index 075ebc31..353f5023 100644 --- a/examples/pytorch/baichuan/finetune_baichuan.py +++ b/examples/pytorch/baichuan/finetune_baichuan.py @@ -219,9 +219,7 @@ kwargs = dict( train_dataset=train_dataset, eval_dataset=validation_dataset, seed=args.seed, - cfg_modify_fn=cfg_modify_fn, - # No placement for model, leave the model to `device_map` - device='cpu' if args.device_map else 'gpu') + cfg_modify_fn=cfg_modify_fn) trainer: EpochBasedTrainer = build_trainer( name=args.trainer, default_args=kwargs) diff --git a/examples/pytorch/chatglm6b/chatglm_trainer.py b/examples/pytorch/chatglm6b/chatglm_trainer.py index b34563bd..84167713 100644 --- a/examples/pytorch/chatglm6b/chatglm_trainer.py +++ b/examples/pytorch/chatglm6b/chatglm_trainer.py @@ -6,7 +6,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled from modelscope import EpochBasedTrainer, get_logger -logger = get_logger(__name__) +logger = get_logger() class Seq2SeqTrainer(EpochBasedTrainer): @@ -16,6 +16,8 @@ class Seq2SeqTrainer(EpochBasedTrainer): if ignore_pad_token_for_loss: tokens = np.where(tokens != -100, tokens, self.tokenizer.pad_token_id) + tokens = np.where(tokens < self.tokenizer.vocab_size, tokens, + self.tokenizer.pad_token_id) return [ t for t in self.tokenizer.batch_decode( tokens, skip_special_tokens=True) if t != '' @@ -59,7 +61,9 @@ class Seq2SeqTrainer(EpochBasedTrainer): gen_kwargs['input_ids'] = generation_inputs gen_kwargs['pad_token_id'] = self.tokenizer.pad_token_id - generated_tokens = self.model.generate(**gen_kwargs) + self.model.eval() + with torch.no_grad(): + generated_tokens = self.model.generate(**gen_kwargs) generated_tokens = generated_tokens[:, generation_inputs.size()[-1]:] # in case the batch is shorter than max length, the output should be padded diff --git a/examples/pytorch/chatglm6b/finetune.py b/examples/pytorch/chatglm6b/finetune.py index 40eb8720..0e31ce28 100644 --- a/examples/pytorch/chatglm6b/finetune.py +++ b/examples/pytorch/chatglm6b/finetune.py @@ -143,6 +143,14 @@ class Chatglm6bArguments(TrainingArgs): metadata={'help': 'The lora alpha'}, ) + use_amp: int = field( + default=0, + metadata={ + 'help': + 'Whether to use amp(automatic mixed precision) to train the model.' + }, + ) + args = Chatglm6bArguments(eval_metrics='chatglm').parse_cli() print(args) @@ -160,6 +168,13 @@ def cfg_modify_fn(cfg): cfg.merge_from_dict(config) else: cfg = config + if args.use_amp: + if not getattr(cfg.train, 'hooks', None): + cfg.train.hooks = [] + cfg.train.hooks.append({ + 'type': 'TorchAMPOptimizerHook', + # Optional loss_scale parameter here. + }) if cfg.train.lr_scheduler.type == 'LinearLR': cfg.train.lr_scheduler['total_iters'] = \ int(len(train_dataset) / cfg.train.dataloader.batch_size_per_gpu) * cfg.train.max_epochs @@ -193,13 +208,15 @@ model_config['model'] = ConfigDict({ 'type': config['model']['type'], }) -if config['model']['type'] == 'chatglm6b': - model_config['model']['pre_seq_len'] = args.pre_seq_len - model_config['model']['prefix_projection'] = args.prefix_projection - +model_config['model']['pre_seq_len'] = args.pre_seq_len +model_config['model']['prefix_projection'] = args.prefix_projection tokenizer = ChatGLMTokenizer.from_pretrained(model_dir, trust_remote_code=True) + +device_map_kwargs = {} +if args.use_lora != 0 and torch.cuda.device_count() > 1: + device_map_kwargs['device_map'] = 'auto' model = Model.from_pretrained( - model_dir, cfg_dict=model_config, device_map='auto') + model_dir, cfg_dict=model_config, **device_map_kwargs) if args.ptuning_checkpoint is not None: # Evaluation @@ -230,7 +247,10 @@ if args.use_lora != 0: rank=args.lora_rank, lora_alpha=args.lora_alpha, lora_dropout=args.lora_dropout) - model = model.bfloat16() + if args.use_amp: + model = model.float() + else: + model = model.bfloat16() Swift.prepare_model(model, lora_config) prefix = args.source_prefix if args.source_prefix is not None else '' @@ -333,13 +353,10 @@ def preprocess_function_train(examples): pad_len = max_seq_length - len(input_ids) input_ids = input_ids + [tokenizer.pad_token_id] * pad_len - if config['model']['type'] == 'chatglm6b': - labels = labels + [tokenizer.pad_token_id] * pad_len - if args.ignore_pad_token_for_loss: - labels = [(lb if lb != tokenizer.pad_token_id else -100) - for lb in labels] - else: - labels = labels + [-100] * pad_len + labels = labels + [tokenizer.pad_token_id] * pad_len + if args.ignore_pad_token_for_loss: + labels = [(lb if lb != tokenizer.pad_token_id else -100) + for lb in labels] model_inputs['input_ids'].append(input_ids) model_inputs['labels'].append(labels) @@ -371,8 +388,7 @@ data_collator = DataCollatorForSeq2Seq( padding=False) model.gradient_checkpointing_enable() -if config['model']['type'] == 'chatglm6b': - model.enable_input_require_grads() +model.enable_input_require_grads() # import torch # model = torch.nn.DataParallel(model).cuda() @@ -384,8 +400,6 @@ trainer = Seq2SeqTrainer( seed=args.seed, data_collator=data_collator, remove_unused_data=True, - # No placement for model, leave the model to `device_map` - device='cpu', cfg_modify_fn=cfg_modify_fn) trainer.tokenizer = tokenizer trainer.train() diff --git a/examples/pytorch/chatglm6b/run_train_chatglm2_ptuning_adv_v2.sh b/examples/pytorch/chatglm6b/run_train_chatglm2_ptuning_adv_v2.sh new file mode 100644 index 00000000..582c464c --- /dev/null +++ b/examples/pytorch/chatglm6b/run_train_chatglm2_ptuning_adv_v2.sh @@ -0,0 +1,26 @@ +PRE_SEQ_LEN=128 +LR=2e-2 + +PYTHONPATH=. python examples/pytorch/chatglm6b/finetune.py \ + --train_dataset_name AdvertiseGen/train.json \ + --val_dataset_name AdvertiseGen/dev.json \ + --prompt_column content \ + --response_column summary \ + --model "ZhipuAI/chatglm2-6b" \ + --max_source_length 64 \ + --max_target_length 128 \ + --per_device_train_batch_size 16 \ + --per_device_eval_batch_size 1 \ + --train.optimizer.options.cumulative_iters 1 \ + --max_epochs 1 \ + --save_strategy 'by_step' \ + --save_interval 1000 \ + --lr $LR \ + --eval_strategy "by_step" \ + --eval_interval 1000 \ + --lr_strategy 'by_step' \ + --task 'chat' \ + --model.type 'chatglm2-6b' \ + --pre_seq_len $PRE_SEQ_LEN \ + --quantization_bit 4 \ + --work_dir ptuning_adv_target \ diff --git a/examples/pytorch/chatglm6b/text_generation_metric.py b/examples/pytorch/chatglm6b/text_generation_metric.py index 2083453a..536bbe06 100644 --- a/examples/pytorch/chatglm6b/text_generation_metric.py +++ b/examples/pytorch/chatglm6b/text_generation_metric.py @@ -53,7 +53,7 @@ class TextGenerationMetric(Metric): } for pred, label in zip(preds, labels): hypothesis = list(jieba.cut(pred)) - if len(hypothesis) == 0: + if len(hypothesis) == 0 or ''.join(hypothesis) == '.': hypothesis = [''] reference = list(jieba.cut(label)) rouge = Rouge() diff --git a/examples/pytorch/llm/_parser.py b/examples/pytorch/llm/_parser.py new file mode 100644 index 00000000..480cfdce --- /dev/null +++ b/examples/pytorch/llm/_parser.py @@ -0,0 +1,69 @@ +import os +from dataclasses import dataclass, field +from typing import List, Optional, Tuple, Type, TypeVar, Union + +import torch +from torch import device as Device +from transformers import HfArgumentParser + +from modelscope import get_logger + +logger = get_logger() + + +def _format_device(device: Union[List[int], str]) -> Tuple[List[int], str]: + if isinstance(device, list): + device_ids = device + device_str = ','.join([str(d) for d in device]) + else: + device_ids = [int(d) for d in device.split(',') if d != '-1'] + device_str = device + device_str = device_str.replace(' ', '') + return device_ids, device_str + + +def select_device(device: Union[List[int], str]) -> Device: + """Call this function before cuda is initialized. + device: e.g. []: 'cpu', [0], [0, 1, 2] + e.g. '-1': 'cpu', '0', '0,1,2' + """ + if torch.cuda.is_initialized(): + logger.warning('CUDA has been initialized! Device selection fails!') + return torch.device('cuda:0') + + device_ids, device_str = _format_device(device) + os.environ['CUDA_VISIBLE_DEVICES'] = device_str + log_s = 'Using device: ' + if len(device_ids) == 0: + master_device: str = 'cpu' + log_s += 'cpu' + else: + assert torch.cuda.is_available( + ) and torch.cuda.device_count() >= len(device_ids) + master_device = 'cuda:0' + log_s += f'cuda:{device_str}' + logger.info(log_s) + return torch.device(master_device) + + +_T = TypeVar('_T') + + +def parse_args(class_type: Type[_T], + argv: Optional[List[str]] = None) -> Tuple[_T, List[str]]: + parser = HfArgumentParser([class_type]) + args, remaining_args = parser.parse_args_into_dataclasses( + argv, return_remaining_strings=True) + logger.info(f'args: {args}') + return args, remaining_args + + +@dataclass +class DeviceArguments: + device: str = '0' # e.g. '-1'; '0'; '0,1' + + +def parse_device(argv: Optional[List[str]] = None) -> List[str]: + args, remaining_args = parse_args(DeviceArguments, argv) + select_device(args.device) + return remaining_args diff --git a/examples/pytorch/llm/llm_infer.py b/examples/pytorch/llm/llm_infer.py new file mode 100644 index 00000000..614e3d36 --- /dev/null +++ b/examples/pytorch/llm/llm_infer.py @@ -0,0 +1,123 @@ +# ### Setting up experimental environment. + +if __name__ == '__main__': + # Avoid cuda initialization caused by library import (e.g. peft, accelerate) + from _parser import * + # argv = parse_device(['--device', '1']) + argv = parse_device() + +from utils import * + + +@dataclass +class InferArguments: + model_type: str = field( + default='baichuan-7b', metadata={'choices': list(MODEL_MAPPER.keys())}) + sft_type: str = field( + default='lora', metadata={'choices': ['lora', 'full']}) + ckpt_path: str = '/path/to/your/iter_xxx.pth' + eval_human: bool = False # False: eval test_dataset + ignore_args_error: bool = True # False: notebook compatibility + + dataset: str = field( + default='alpaca-en,alpaca-zh', + metadata={'help': f'dataset choices: {list(DATASET_MAPPER.keys())}'}) + dataset_seed: int = 42 + dataset_sample: Optional[int] = None + dataset_test_size: float = 0.01 + prompt: str = DEFAULT_PROMPT + max_length: Optional[int] = 2048 + + lora_target_modules: Optional[List[str]] = None + lora_rank: int = 8 + lora_alpha: int = 32 + lora_dropout_p: float = 0.1 + + max_new_tokens: int = 512 + temperature: float = 0.9 + top_k: int = 50 + top_p: float = 0.9 + + def __post_init__(self): + if self.lora_target_modules is None: + self.lora_target_modules = MODEL_MAPPER[self.model_type]['lora_TM'] + + if not os.path.isfile(self.ckpt_path): + raise ValueError( + f'Please enter a valid ckpt_path: {self.ckpt_path}') + + +def llm_infer(args: InferArguments) -> None: + # ### Loading Model and Tokenizer + support_bf16 = torch.cuda.is_bf16_supported() + if not support_bf16: + logger.warning(f'support_bf16: {support_bf16}') + model, tokenizer, _ = get_model_tokenizer( + args.model_type, torch_dtype=torch.bfloat16) + + # ### Preparing lora + if args.sft_type == 'lora': + lora_config = LoRAConfig( + replace_modules=args.lora_target_modules, + rank=args.lora_rank, + lora_alpha=args.lora_alpha, + lora_dropout=args.lora_dropout_p, + pretrained_weights=args.ckpt_path) + logger.info(f'lora_config: {lora_config}') + model = Swift.prepare_model(model, lora_config) + elif args.sft_type == 'full': + state_dict = torch.load(args.ckpt_path, map_location='cpu') + model.load_state_dict(state_dict) + else: + raise ValueError(f'args.sft_type: {args.sft_type}') + + # ### Inference + tokenize_func = partial( + tokenize_function, + tokenizer=tokenizer, + prompt=args.prompt, + max_length=args.max_length) + streamer = TextStreamer( + tokenizer, skip_prompt=True, skip_special_tokens=True) + generation_config = GenerationConfig( + max_new_tokens=args.max_new_tokens, + temperature=args.temperature, + top_k=args.top_k, + top_p=args.top_p, + do_sample=True, + pad_token_id=tokenizer.eos_token_id) + logger.info(f'generation_config: {generation_config}') + + if args.eval_human: + while True: + instruction = input('<<< ') + data = {'instruction': instruction} + input_ids = tokenize_func(data)['input_ids'] + inference(input_ids, model, tokenizer, streamer, generation_config) + print('-' * 80) + else: + dataset = get_dataset(args.dataset) + _, test_dataset = process_dataset(dataset, args.dataset_test_size, + args.dataset_sample, + args.dataset_seed) + mini_test_dataset = test_dataset.select(range(10)) + del dataset + for data in mini_test_dataset: + output = data['output'] + data['output'] = None + input_ids = tokenize_func(data)['input_ids'] + inference(input_ids, model, tokenizer, streamer, generation_config) + print() + print(f'[LABELS]{output}') + print('-' * 80) + # input('next[ENTER]') + + +if __name__ == '__main__': + args, remaining_argv = parse_args(InferArguments, argv) + if len(remaining_argv) > 0: + if args.ignore_args_error: + logger.warning(f'remaining_argv: {remaining_argv}') + else: + raise ValueError(f'remaining_argv: {remaining_argv}') + llm_infer(args) diff --git a/examples/pytorch/llm/llm_sft.py b/examples/pytorch/llm/llm_sft.py new file mode 100644 index 00000000..a7dabf77 --- /dev/null +++ b/examples/pytorch/llm/llm_sft.py @@ -0,0 +1,266 @@ +# ### Setting up experimental environment. +""" +# Install the latest version of modelscope from source +git clone https://github.com/modelscope/modelscope.git +cd modelscope +pip install . + +conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia +pip install numpy pandas -U # Resolve torchmetrics dependencies and update numpy +pip install matplotlib scikit-learn -U +pip install transformers datasets -U +pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer -U +pip install accelerate transformers_stream_generator -U +""" + +if __name__ == '__main__': + # Avoid cuda initialization caused by library import (e.g. peft, accelerate) + from _parser import * + # argv = parse_device(['--device', '1']) + argv = parse_device() + +from utils import * + + +@dataclass +class SftArguments: + seed: int = 42 + model_type: str = field( + default='baichuan-7b', metadata={'choices': list(MODEL_MAPPER.keys())}) + # baichuan-7b: 'lora': 16G; 'full': 80G + sft_type: str = field( + default='lora', metadata={'choices': ['lora', 'full']}) + ignore_args_error: bool = True # False: notebook compatibility + + dataset: str = field( + default='alpaca-en,alpaca-zh', + metadata={'help': f'dataset choices: {list(DATASET_MAPPER.keys())}'}) + dataset_seed: int = 42 + dataset_sample: Optional[int] = None + dataset_test_size: float = 0.01 + prompt: str = DEFAULT_PROMPT + max_length: Optional[int] = 2048 + + lora_target_modules: Optional[List[str]] = None + lora_rank: int = 8 + lora_alpha: int = 32 + lora_dropout_p: float = 0.1 + + gradient_checkpoint: bool = True + batch_size: int = 1 + max_epochs: int = 1 + learning_rate: Optional[float] = None + weight_decay: float = 0.01 + n_accumulate_grad: int = 16 + grad_clip_norm: float = 1. + warmup_iters: int = 200 + + save_trainer_state: Optional[bool] = None + eval_interval: int = 500 + last_save_interval: Optional[int] = None + last_max_checkpoint_num: int = 1 + best_max_checkpoint_num: int = 1 + logging_interval: int = 5 + tb_interval: int = 5 + + def __post_init__(self): + if self.sft_type == 'lora': + if self.learning_rate is None: + self.learning_rate = 1e-4 + if self.save_trainer_state is None: + self.save_trainer_state = True + if self.last_save_interval is None: + self.last_save_interval = self.eval_interval + elif self.sft_type == 'full': + if self.learning_rate is None: + self.learning_rate = 1e-5 + if self.save_trainer_state is None: + self.save_trainer_state = False # save disk space + if self.last_save_interval is None: + # Saving the model takes a long time + self.last_save_interval = self.eval_interval * 4 + else: + raise ValueError(f'sft_type: {self.sft_type}') + + if self.lora_target_modules is None: + self.lora_target_modules = MODEL_MAPPER[self.model_type]['lora_TM'] + + +def llm_sft(args: SftArguments) -> None: + seed_everything(args.seed) + + # ### Loading Model and Tokenizer + support_bf16 = torch.cuda.is_bf16_supported() + if not support_bf16: + logger.warning(f'support_bf16: {support_bf16}') + model, tokenizer, model_dir = get_model_tokenizer( + args.model_type, torch_dtype=torch.bfloat16) + + if args.gradient_checkpoint: + # baichuan-13b does not implement the `get_input_embeddings` function + if args.model_type == 'baichuan-13b': + model.get_input_embeddings = MethodType( + lambda self: self.model.embed_tokens, model) + model.gradient_checkpointing_enable() + model.enable_input_require_grads() + + # ### Preparing lora + if args.sft_type == 'lora': + lora_config = LoRAConfig( + replace_modules=args.lora_target_modules, + rank=args.lora_rank, + lora_alpha=args.lora_alpha, + lora_dropout=args.lora_dropout_p) + logger.info(f'lora_config: {lora_config}') + model = Swift.prepare_model(model, lora_config) + + show_freeze_layers(model) + print_model_info(model) + # check the device and dtype of the model + _p: Tensor = list(model.parameters())[-1] + logger.info(f'device: {_p.device}, dtype: {_p.dtype}') + + # ### Loading Dataset + dataset = get_dataset(args.dataset) + train_dataset, val_dataset = process_dataset(dataset, + args.dataset_test_size, + args.dataset_sample, + args.dataset_seed) + tokenize_func = partial( + tokenize_function, + tokenizer=tokenizer, + prompt=args.prompt, + max_length=args.max_length) + train_dataset = train_dataset.map(tokenize_func) + val_dataset = val_dataset.map(tokenize_func) + del dataset + # Data analysis + stat_dataset(train_dataset) + stat_dataset(val_dataset) + data_collator = partial(data_collate_fn, tokenizer=tokenizer) + print_example(train_dataset[0], tokenizer) + + # ### Setting Config + cfg_file = os.path.join(model_dir, 'configuration.json') + + T_max = get_T_max( + len(train_dataset), args.batch_size, args.max_epochs, True) + work_dir = get_work_dir(f'runs/{args.model_type}') + config = Config({ + 'train': { + 'dataloader': { + 'batch_size_per_gpu': args.batch_size, + 'workers_per_gpu': 1, + 'shuffle': True, + 'drop_last': True, + 'pin_memory': True + }, + 'max_epochs': + args.max_epochs, + 'work_dir': + work_dir, + 'optimizer': { + 'type': 'AdamW', + 'lr': args.learning_rate, + 'weight_decay': args.weight_decay, + 'options': { + 'cumulative_iters': args.n_accumulate_grad, + 'grad_clip': { + 'norm_type': 2, + 'max_norm': args.grad_clip_norm + } + } + }, + 'lr_scheduler': { + 'type': 'CosineAnnealingLR', + 'T_max': T_max, + 'eta_min': args.learning_rate * 0.1, + 'options': { + 'by_epoch': False, + 'warmup': { + 'type': 'LinearWarmup', + 'warmup_ratio': 0.1, + 'warmup_iters': args.warmup_iters + } + } + }, + 'hooks': [ + { + 'type': 'CheckpointHook', + 'by_epoch': False, + 'interval': args.last_save_interval, + 'max_checkpoint_num': args.last_max_checkpoint_num, + 'save_trainer_state': args.save_trainer_state + }, + { + 'type': 'EvaluationHook', + 'by_epoch': False, + 'interval': args.eval_interval + }, + { + 'type': 'BestCkptSaverHook', + 'metric_key': 'loss', + 'save_best': True, + 'rule': 'min', + 'max_checkpoint_num': args.best_max_checkpoint_num, + 'save_trainer_state': args.save_trainer_state + }, + { + 'type': 'TextLoggerHook', + 'by_epoch': True, # Whether EpochBasedTrainer is used + 'interval': args.logging_interval + }, + { + 'type': 'TensorboardHook', + 'by_epoch': False, + 'interval': args.tb_interval + } + ] + }, + 'evaluation': { + 'dataloader': { + 'batch_size_per_gpu': args.batch_size, + 'workers_per_gpu': 1, + 'shuffle': False, + 'drop_last': False, + 'pin_memory': True + }, + 'metrics': [{ + 'type': 'my_metric', + 'vocab_size': tokenizer.vocab_size + }] + } + }) + + # ### Finetuning + + def cfg_modify_fn(cfg: Config) -> Config: + cfg.update(config) + return cfg + + trainer = EpochBasedTrainer( + model=model, + cfg_file=cfg_file, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=val_dataset, + remove_unused_data=True, + seed=42, + cfg_modify_fn=cfg_modify_fn, + ) + + trainer.train() + + # ### Visualization + tb_dir = os.path.join(work_dir, 'tensorboard_output') + plot_images(tb_dir, ['loss'], 0.9) + + +if __name__ == '__main__': + args, remaining_argv = parse_args(SftArguments, argv) + if len(remaining_argv) > 0: + if args.ignore_args_error: + logger.warning(f'remaining_argv: {remaining_argv}') + else: + raise ValueError(f'remaining_argv: {remaining_argv}') + llm_sft(args) diff --git a/examples/pytorch/llm/run_infer.sh b/examples/pytorch/llm/run_infer.sh new file mode 100644 index 00000000..aa1a1a04 --- /dev/null +++ b/examples/pytorch/llm/run_infer.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +python llm_infer.py \ + --device 0,1 \ + --model_type openbuddy-llama2-13b \ + --ckpt_path "runs/openbuddy-llama2-13b/vx_xxx/output_best/pytorch_model.bin" \ + --eval_human true diff --git a/examples/pytorch/llm/run_sft.sh b/examples/pytorch/llm/run_sft.sh new file mode 100644 index 00000000..3a6d9ff4 --- /dev/null +++ b/examples/pytorch/llm/run_sft.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +DATE=$(date +"%Y%m%d-%H%M%S") +nohup python llm_sft.py \ + --device 0,1 \ + --model_type openbuddy-llama2-13b \ + --dataset alpaca-en,alpaca-zh \ + --dataset_sample 20000 \ +&> train_$DATE.out & diff --git a/examples/pytorch/llm/utils/__init__.py b/examples/pytorch/llm/utils/__init__.py new file mode 100644 index 00000000..e4772c03 --- /dev/null +++ b/examples/pytorch/llm/utils/__init__.py @@ -0,0 +1,5 @@ +from _parser import * + +from .dataset import * +from .models import * +from .utils import * diff --git a/examples/pytorch/llm/utils/dataset.py b/examples/pytorch/llm/utils/dataset.py new file mode 100644 index 00000000..3035ba78 --- /dev/null +++ b/examples/pytorch/llm/utils/dataset.py @@ -0,0 +1,72 @@ +from typing import Optional, Tuple + +import numpy as np +from datasets import Dataset as HfDataset +from datasets import concatenate_datasets +from numpy.random import RandomState + +from modelscope import MsDataset + + +def _processing_alpaca(dataset: HfDataset) -> HfDataset: + instruction = dataset['instruction'] + input_ = dataset['input'] + res = [] + for inst, inp in zip(instruction, input_): + if inp is not None and inp != '': + if inp.startswith('输入:'): + inp = inp[3:] + inst = f'{inst}\n{inp}' + res.append(inst) + dataset = HfDataset.from_dict({ + 'instruction': res, + 'output': dataset['output'] + }) + return dataset + + +def get_alpaca_en_dataset() -> HfDataset: + dataset_en: HfDataset = MsDataset.load( + 'AI-ModelScope/alpaca-gpt4-data-en', split='train').to_hf_dataset() + dataset_en = dataset_en.remove_columns(['text']) + return _processing_alpaca(dataset_en) + + +def get_alpaca_zh_dataset() -> HfDataset: + dataset_zh: HfDataset = MsDataset.load( + 'AI-ModelScope/alpaca-gpt4-data-zh', split='train').to_hf_dataset() + return _processing_alpaca(dataset_zh) + + +def get_seed(random_state: RandomState) -> int: + seed_max = np.iinfo(np.int32).max + seed = random_state.randint(0, seed_max) + return seed + + +def process_dataset(dataset: HfDataset, dataset_test_size: float, + dataset_sample: Optional[int], + dataset_seed: int) -> Tuple[HfDataset, HfDataset]: + random_state = np.random.RandomState(dataset_seed) + if dataset_sample is not None: + index = random_state.permutation(len(dataset))[:dataset_sample] + dataset = dataset.select(index) + dataset = dataset.train_test_split( + dataset_test_size, seed=get_seed(random_state)) + return dataset['train'], dataset['test'] + + +DATASET_MAPPER = { + 'alpaca-en': get_alpaca_en_dataset, + 'alpaca-zh': get_alpaca_zh_dataset, +} + + +def get_dataset(dataset_names: str) -> HfDataset: + dataset_name_list = dataset_names.split(',') + dataset_list = [] + for dataset_name in dataset_name_list: + get_function = DATASET_MAPPER[dataset_name] + dataset_list.append(get_function()) + dataset = concatenate_datasets(dataset_list) + return dataset diff --git a/examples/pytorch/llm/utils/models.py b/examples/pytorch/llm/utils/models.py new file mode 100644 index 00000000..c95df561 --- /dev/null +++ b/examples/pytorch/llm/utils/models.py @@ -0,0 +1,133 @@ +from typing import NamedTuple + +import torch +from torch import dtype as Dtype + +from modelscope import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, Model, + get_logger, read_config, snapshot_download) +from modelscope.models.nlp.chatglm2 import ChatGLM2Config, ChatGLM2Tokenizer + +logger = get_logger() + + +def _add_special_token(tokenizer): + if tokenizer.eos_token_id is None: + tokenizer.eos_token_id = 2 + if tokenizer.bos_token_id is None: + tokenizer.bos_token_id = 1 + if tokenizer.pad_token_id is None: + tokenizer.pad_token_id = 0 + logger.info(f'bos_token_id: {tokenizer.bos_token_id}, ' + f'eos_token_id: {tokenizer.eos_token_id}, ' + f'pad_token_id: {tokenizer.pad_token_id}') + + +def get_model_tokenizer_default(model_dir: str, + load_model: bool = True, + add_special_token: bool = True, + torch_dtype: Dtype = torch.float16): + """load from an independent repository""" + model_config = AutoConfig.from_pretrained( + model_dir, trust_remote_code=True) + model_config.torch_dtype = torch_dtype + logger.info(f'model_config: {model_config}') + tokenizer = AutoTokenizer.from_pretrained( + model_dir, trust_remote_code=True) + model = None + if load_model: + model = AutoModelForCausalLM.from_pretrained( + model_dir, + config=model_config, + device_map='auto', + torch_dtype=torch_dtype, + trust_remote_code=True) + + if add_special_token: + _add_special_token(tokenizer) + return model, tokenizer + + +def get_model_tokenizer_chatglm2(model_dir: str, + load_model: bool = True, + add_special_token: bool = True, + torch_dtype: Dtype = torch.float16): + """load from ms library""" + config = read_config(model_dir) + logger.info(config) + model_config = ChatGLM2Config.from_pretrained(model_dir) + model_config.torch_dtype = torch_dtype + logger.info(model_config) + tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir) + model = None + if load_model: + model = Model.from_pretrained( + model_dir, + cfg_dict=config, + config=model_config, + device_map='auto', + torch_dtype=torch_dtype) + if add_special_token: + _add_special_token(tokenizer) + return model, tokenizer + + +class LoRATM(NamedTuple): + # default lora target modules + baichuan = ['W_pack'] + chatglm2 = ['query_key_value'] + llama2 = ['q_proj', 'k_proj', 'v_proj'] + + +# Reference: 'https://modelscope.cn/models/{model_id}/summary' +MODEL_MAPPER = { + 'baichuan-7b': { + 'model_id': 'baichuan-inc/baichuan-7B', + 'revision': 'v1.0.7', + 'lora_TM': LoRATM.baichuan + }, + 'baichuan-13b': { + 'model_id': 'baichuan-inc/Baichuan-13B-Base', + 'revision': 'v1.0.3', + 'lora_TM': LoRATM.baichuan + }, + 'chatglm2': { + 'model_id': 'ZhipuAI/chatglm2-6b', + 'revision': 'v1.0.6', + 'get_function': get_model_tokenizer_chatglm2, + 'lora_TM': LoRATM.chatglm2 + }, + 'llama2-7b': { + 'model_id': 'modelscope/Llama-2-7b-ms', + 'revision': 'v1.0.2', + 'ignore_file_pattern': [r'.+\.bin$'], # use safetensors + 'lora_TM': LoRATM.llama2 + }, + 'llama2-13b': { + 'model_id': 'modelscope/Llama-2-13b-ms', + 'revision': 'v1.0.2', + 'ignore_file_pattern': [r'.+\.bin$'], + 'lora_TM': LoRATM.llama2 + }, + 'openbuddy-llama2-13b': { + 'model_id': 'OpenBuddy/openbuddy-llama2-13b-v8.1-fp16', + 'lora_TM': LoRATM.llama2 + } +} + + +def get_model_tokenizer(model_type: str, + load_model: bool = True, + add_special_token: bool = True, + torch_dtype: Dtype = torch.float16): + data = MODEL_MAPPER.get(model_type) + if data is None: + raise ValueError(f'model_type: {model_type}') + model_id = data['model_id'] + revision = data.get('revision', 'master') + get_function = data.get('get_function', get_model_tokenizer_default) + ignore_file_pattern = data.get('ignore_file_pattern', []) + model_dir = snapshot_download( + model_id, revision, ignore_file_pattern=ignore_file_pattern) + model, tokenizer = get_function(model_dir, load_model, add_special_token, + torch_dtype) + return model, tokenizer, model_dir diff --git a/examples/pytorch/llm/utils/utils.py b/examples/pytorch/llm/utils/utils.py new file mode 100644 index 00000000..5b8ee163 --- /dev/null +++ b/examples/pytorch/llm/utils/utils.py @@ -0,0 +1,321 @@ +import datetime as dt +import math +import os +import random +import re +import sys +from dataclasses import dataclass, field +from functools import partial +from types import MethodType +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import matplotlib.pyplot as plt +import numpy as np +import torch +from datasets import Dataset as HfDataset +from numpy import ndarray +from tensorboard.backend.event_processing.event_accumulator import \ + EventAccumulator +from torch import Tensor +from torch import device as Device +from torch import dtype as Dtype +from torch.nn import Module +from torch.nn.utils.rnn import pad_sequence +from torchmetrics import Accuracy, MeanMetric +from tqdm import tqdm +from transformers import GenerationConfig, TextStreamer + +from modelscope import get_logger +from modelscope.metrics.base import Metric +from modelscope.metrics.builder import METRICS +from modelscope.swift import LoRAConfig, Swift +from modelscope.trainers import EpochBasedTrainer +from modelscope.utils.config import Config, ConfigDict +from modelscope.utils.registry import default_group + +COLOR, COLOR_S = '#FFE2D9', '#FF7043' + +DEFAULT_PROMPT = """Here's a conversation between a human and an AI assistant. \ +The AI assistant provides detailed, friendly answers for the human. + +### Human: +{instruction} + +### AI: +""" + +logger = get_logger() +os.environ['TOKENIZERS_PARALLELISM'] = 'true' + + +def _get_version(work_dir: str) -> int: + if os.path.isdir(work_dir): + fnames = os.listdir(work_dir) + else: + fnames = [] + v_list = [-1] + for fname in fnames: + m = re.match(r'v(\d+)', fname) + if m is None: + continue + v = m.group(1) + v_list.append(int(v)) + return max(v_list) + 1 + + +def get_work_dir(work_dir: str) -> str: + """add version""" + work_dir = os.path.abspath(work_dir) + version = _get_version(work_dir) + time = dt.datetime.now().strftime('%Y%m%d-%H%M%S') + + work_dir = os.path.join(work_dir, f'v{version}-{time}') + logger.info(f'work_dir: {work_dir}') + return work_dir + + +def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int: + if seed is None: + seed_max = np.iinfo(np.int32).max + seed = random.randint(0, seed_max) + + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + logger.info(f'Global seed set to {seed}') + if gpu_dtm: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + logger.info(f'Setting deterministic: {True}, benchmark: {False}') + return seed + + +def get_T_max(dataset_len: int, batch_size: int, max_epochs: int, + drop_last: bool) -> int: + """Calculate T_max in CosineAnnealingLR""" + if drop_last: + T_max = dataset_len // batch_size + else: + T_max = math.ceil(dataset_len / batch_size) + T_max *= max_epochs + return T_max + + +def tokenize_function(example: Dict[str, Optional[str]], + tokenizer, + prompt: str = DEFAULT_PROMPT, + max_length: Optional[int] = 2048) -> Dict[str, Any]: + instruction: str = example['instruction'] + output = example.get('output') + src_text = prompt.format(instruction=instruction) + src_input_ids: List[int] = tokenizer( + src_text, return_attention_mask=False, + add_special_tokens=True)['input_ids'] + + tgt_input_ids = [] + if output is not None: + tgt_input_ids += tokenizer( + output, return_attention_mask=False, + add_special_tokens=False)['input_ids'] + tgt_input_ids += [tokenizer.eos_token_id] + labels = [-100] * len(src_input_ids) + tgt_input_ids + else: + labels = None + input_ids = src_input_ids + tgt_input_ids + + if max_length is not None: + input_ids = input_ids[-max_length:] + if labels is not None: + labels = labels[-max_length:] + + return {'input_ids': input_ids, 'labels': labels} + + +def stat_dataset(dataset: HfDataset) -> None: + """Statistical analysis was performed on the dataset""" + _token_len = [] + for d in dataset: + _token_len.append(len(d['input_ids'])) + _token_len = np.array(_token_len) + mean = _token_len.mean().item() + std = _token_len.std().item() + min_ = _token_len.min().item() + max_ = _token_len.max().item() + logger.info( + f'Dataset Token Length: {mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={_token_len.shape[0]}' + ) + + +def print_example(example: Dict[str, Any], tokenizer) -> None: + input_ids, labels = example['input_ids'], example['labels'] + print(f'[INPUT_IDS] {input_ids}') + print(f'[INPUT] {tokenizer.decode(input_ids)}') + print() + print(f'[LABLES_IDS] {labels}') + print( + f'[LABLES] {tokenizer.decode([lb if lb != -100 else 0 for lb in labels])}' + ) + + +def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]: + input_ids = [torch.tensor(b['input_ids']) for b in batch] + labels = [torch.tensor(b['labels']) for b in batch] + attention_mask = [ + torch.ones(len(input_ids[i]), dtype=torch.int64) + for i in range(len(input_ids)) + ] + + input_ids = pad_sequence( + input_ids, batch_first=True, padding_value=tokenizer.pad_token_id) + attention_mask = pad_sequence( + attention_mask, batch_first=True, padding_value=0) + labels = pad_sequence(labels, batch_first=True, padding_value=-100) + return { + 'input_ids': input_ids, + 'attention_mask': attention_mask, + 'labels': labels + } + + +def print_model_info(model: Module, name: Optional[str] = None) -> None: + if name is None: + name = model.__class__.__name__ + + n_params = sum(p.numel() for p in model.parameters()) + n_grads = sum(p.numel() for p in model.parameters() if p.requires_grad) + n_buffers = sum(p.numel() for p in model.buffers()) + + n_params /= 1e6 + n_grads /= 1e6 + n_buffers /= 1e6 + s = [ + f'{name}: ', + f'{n_params:.4f}M Params ({n_grads:.4f}M Trainable), ', + f'{n_buffers:.4f}M Buffers', + ] + s += '.' + logger.info(''.join(s)) + + +def show_freeze_layers(model: Module, max_lines: int = 20) -> None: + named_p = list(model.named_parameters()) + for i, (n, p) in enumerate(named_p): + if i >= max_lines: + logger.info('...') + break + logger.info(f'{n}: requires_grad={p.requires_grad}') + + +@METRICS.register_module(group_key=default_group, module_name='my_metric') +class MyMetric(Metric): + + def __init__(self, vocab_size: int): + self.acc = Accuracy('multiclass', num_classes=vocab_size) + self.loss = MeanMetric() + + def add(self, outputs: Dict[str, Any], inputs: Dict[str, Any]) -> None: + loss: Tensor = outputs.loss + self.loss.update(loss.cpu()) + + labels: Tensor = inputs['labels'] + labels = labels[:, 1:] + labels_mask = labels != -100 + logits: Tensor = outputs.logits[:, :-1] + logits = logits[labels_mask].contiguous().view(-1, logits.shape[-1]) + pred = logits.argmax(dim=-1) + labels = labels[labels_mask].to(logits.device) + self.acc.update(pred.cpu(), labels.cpu()) + + def evaluate(self): + return { + 'acc': self.acc.compute().item(), + 'loss': self.loss.compute().item() + } + + def merge(self, other: 'MyMetric') -> None: + """This script does not support ddp. TODO""" + raise NotImplementedError + + +Item = Dict[str, float] + + +def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]: + if not os.path.isfile(fpath): + raise FileNotFoundError(f'fpath: {fpath}') + ea = EventAccumulator(fpath) + ea.Reload() + res = {} + tags = ea.Tags()['scalars'] + for tag in tags: + values = ea.Scalars(tag) + r = [] + for v in values: + r.append({'step': v.step, 'value': v.value}) + res[tag] = r + return res + + +def tensorboard_smoothing(values: List[float], + smooth: float = 0.9) -> List[float]: + norm_factor = 1 + x = 0 + res = [] + for i in range(len(values)): + x = x * smooth + values[i] # Exponential decay + res.append(x / norm_factor) + + norm_factor *= smooth + norm_factor += 1 + return res + + +def plot_images(tb_dir: str, + smooth_key: List[str], + smooth_val: float = 0.9, + figsize: Tuple[int, int] = (8, 5), + dpi: int = 100) -> None: + images_dir = os.path.join(os.path.dirname(tb_dir), 'images') + os.makedirs(images_dir, exist_ok=True) + + fname = os.listdir(tb_dir)[0] + tb_path = os.path.join(tb_dir, fname) + data = read_tensorboard_file(tb_path) + + for k in data.keys(): + _data = data[k] + steps = [d['step'] for d in _data] + values = [d['value'] for d in _data] + if len(values) == 0: + continue + _, ax = plt.subplots(1, 1, squeeze=True, figsize=figsize, dpi=dpi) + ax.set_title(k) + if len(values) == 1: + ax.scatter(steps, values, color=COLOR_S) + elif k in smooth_key: + ax.plot(steps, values, color=COLOR) + values_s = tensorboard_smoothing(values, smooth_val) + ax.plot(steps, values_s, color=COLOR_S) + else: + ax.plot(steps, values, color=COLOR_S) + fpath = os.path.join(images_dir, k.replace('/', '_')) + plt.savefig(fpath, dpi=dpi, bbox_inches='tight') + + +def inference(input_ids: List[int], + model, + tokenizer, + streamer: Optional[TextStreamer] = None, + generation_config: Optional[GenerationConfig] = None, + tag: str = '[INFERENCE]') -> str: + print(f'{tag}{tokenizer.decode(input_ids)}', end='') + input_ids = torch.tensor(input_ids)[None].cuda() + attention_mask = torch.ones_like(input_ids) + generate_ids = model.generate( + input_ids=input_ids, + attention_mask=attention_mask, + streamer=streamer, + generation_config=generation_config) + output_text = tokenizer.decode(generate_ids[0]) + return output_text diff --git a/examples/pytorch/llm_agent/_common.py b/examples/pytorch/llm_agent/_common.py new file mode 100644 index 00000000..dd07ef31 --- /dev/null +++ b/examples/pytorch/llm_agent/_common.py @@ -0,0 +1,426 @@ +import ast +import datetime as dt +import math +import os +import random +import re +import sys +from functools import partial +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import json +import matplotlib.pyplot as plt +import numpy as np +# +import torch +import torch.nn as nn +import torch.optim as optim +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from numpy import ndarray +from tensorboard.backend.event_processing.event_accumulator import \ + EventAccumulator +from torch import Tensor +from torch import device as Device +from torch import dtype as Dtype +from torch.nn import Module +from torch.nn.parameter import Parameter +from torch.nn.utils.rnn import pad_sequence +from torch.optim import Optimizer +from torch.optim import lr_scheduler as lrs +from torch.optim.lr_scheduler import _LRScheduler as LRScheduler +from torch.utils.data import Dataset +# +from torchmetrics import Accuracy, MeanMetric +# +from tqdm import tqdm + +# +from modelscope import (Model, MsDataset, get_logger, read_config, + snapshot_download) +from modelscope.metrics.base import Metric +from modelscope.metrics.builder import METRICS +from modelscope.models.nlp.chatglm2 import ChatGLM2Tokenizer +from modelscope.msdatasets.dataset_cls.custom_datasets import \ + TorchCustomDataset +from modelscope.swift import LoRAConfig, Swift +from modelscope.trainers import EpochBasedTrainer +from modelscope.utils.config import Config, ConfigDict +from modelscope.utils.registry import default_group + +# +PROMPT = """System: {system} +Human: {user} +AI: """ +MAX_LENGTH = 2048 +TEST_MAX_LENGTH = MAX_LENGTH + +COLOR, COLOR_S = '#FFE2D9', '#FF7043' +logger = get_logger() +# + + +def _get_version(work_dir: str) -> int: + if os.path.isdir(work_dir): + fnames = os.listdir(work_dir) + else: + fnames = [] + v_list = [-1] + for fname in fnames: + m = re.match(r'v(\d+)', fname) + if m is None: + continue + v = m.group(1) + v_list.append(int(v)) + return max(v_list) + 1 + + +def get_work_dir(work_dir: str) -> str: + """add version""" + work_dir = os.path.abspath(work_dir) + version = _get_version(work_dir) + time = dt.datetime.now().strftime('%Y%m%d-%H%M%S') + # + work_dir = os.path.join(work_dir, f'v{version}-{time}') + logger.info(f'work_dir: {work_dir}') + return work_dir + + +def _format_device(device: Union[List[int], str]) -> Tuple[List[int], str]: + if isinstance(device, list): + device_ids = device + device_str = ','.join([str(d) for d in device]) + else: + device_ids = [int(d) for d in device.split(',') if d != '-1'] + device_str = device + device_str = device_str.replace(' ', '') + return device_ids, device_str + + +def select_device(device: Union[List[int], str]) -> Device: + """Call this function before cuda is initialized. + device: e.g. []: 'cpu', [0], [0, 1, 2] + e.g. '-1': 'cpu', '0', '0,1,2' + """ + if torch.cuda.is_initialized(): + logger.warning('CUDA has been initialized! Device selection fails!') + return torch.device('cuda:0') + # + device_ids, device_str = _format_device(device) + # + os.environ['CUDA_VISIBLE_DEVICES'] = device_str + log_s = 'Using device: ' + if len(device_ids) == 0: + master_device: str = 'cpu' + log_s += 'cpu' + else: + assert torch.cuda.is_available( + ) and torch.cuda.device_count() >= len(device_ids) + master_device = 'cuda:0' + log_s += f'cuda:{device_str}' + logger.info(log_s) + return torch.device(master_device) + + +def seed_everything(seed: Optional[int] = None, gpu_dtm: bool = False) -> int: + if seed is None: + seed_max = np.iinfo(np.int32).max + seed = random.randint(0, seed_max) + + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + logger.info(f'Global seed set to {seed}') + if gpu_dtm: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + logger.info(f'Setting deterministic: {True}, benchmark: {False}') + return seed + + +def get_T_max(dataset_len: int, batch_size: int, max_epochs: int, + drop_last: bool) -> int: + """Calculate T_max in CosineAnnealingLR""" + if drop_last: + T_max = dataset_len // batch_size + else: + T_max = math.ceil(dataset_len / batch_size) + T_max *= max_epochs + return T_max + + +def tokenize_function(system: str, user: str, assistant: Optional[str], + tokenizer) -> Dict[str, Any]: + """Only applicable to baichuan and chatglm2. Other models need to be tested""" + src_text = PROMPT.format(system=system, user=user) + src_input_ids: List[int] = tokenizer( + src_text, return_attention_mask=False, + add_special_tokens=True)['input_ids'] + # + tgt_input_ids: List[int] = [] + if assistant is not None: + tgt_input_ids += tokenizer( + assistant, return_attention_mask=False, + add_special_tokens=False)['input_ids'] + tgt_input_ids += [tokenizer.eos_token_id] + labels = [-100] * len(src_input_ids) + tgt_input_ids + else: + labels = None + input_ids = src_input_ids + tgt_input_ids + # + if assistant is not None: + if len(input_ids) > MAX_LENGTH: + return {} + else: + input_ids = input_ids[-TEST_MAX_LENGTH:] + # + return {'input_ids': input_ids, 'labels': labels} + + +class MyDataset(TorchCustomDataset): + + def __init__(self, system: List[str], user: List[str], + assistant: List[str], tokenize_function) -> None: + self._data = [] + for i in tqdm(range(len(system))): + _d = tokenize_function(system[i], user[i], assistant[i]) + if len(_d) == 0: + continue + self._data.append(_d) + + def __getitem__(self, idx: int) -> Dict[str, Any]: + return self._data[idx] + + def __len__(self) -> int: + return len(self._data) + + +def stat_dataset(dataset: 'MyDataset') -> None: + """Statistical analysis was performed on the data set""" + _token_len = [] + for d in dataset: + _token_len.append(len(d['input_ids'])) + _token_len = np.array(_token_len) + mean = _token_len.mean().item() + std = _token_len.std().item() + min_ = _token_len.min().item() + max_ = _token_len.max().item() + logger.info( + f'Dataset Token Length: {mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={_token_len.shape[0]}' + ) + + +def print_examples(examples: Dict[str, Any], tokenizer) -> None: + input_ids, labels = examples['input_ids'], examples['labels'] + print(f'[INPUT_IDS] {tokenizer.decode(input_ids)}') + print() + print( + f'[LABLES] {tokenizer.decode([lb if lb != -100 else 0 for lb in labels])}' + ) + + +def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, Any]: + input_ids = [torch.tensor(b['input_ids']) for b in batch] + labels = [torch.tensor(b['labels']) for b in batch] + attention_mask = [ + torch.ones(len(input_ids[i]), dtype=torch.int64) + for i in range(len(input_ids)) + ] + # + input_ids = pad_sequence( + input_ids, batch_first=True, padding_value=tokenizer.pad_token_id) + attention_mask = pad_sequence( + attention_mask, batch_first=True, padding_value=0) + labels = pad_sequence(labels, batch_first=True, padding_value=-100) + return { + 'input_ids': input_ids, + 'attention_mask': attention_mask, + 'labels': labels + } + + +def print_model_info(model: Module, name: Optional[str] = None) -> None: + if name is None: + name = model.__class__.__name__ + # + n_params = sum(p.numel() for p in model.parameters()) + n_grads = sum(p.numel() for p in model.parameters() if p.requires_grad) + n_buffers = sum(p.numel() for p in model.buffers()) + # + n_params /= 1e6 + n_grads /= 1e6 + n_buffers /= 1e6 + s = [ + f'{name}: ', + f'{n_params:.4f}M Params ({n_grads:.4f}M Trainable), ', + f'{n_buffers:.4f}M Buffers', + ] + s += '.' + logger.info(''.join(s)) + + +def show_freeze_layers(model: Module, max_lines: int = 20) -> None: + named_p = list(model.named_parameters()) + for i, (n, p) in enumerate(named_p): + if i >= max_lines: + logger.info('...') + break + logger.info(f'{n}: requires_grad={p.requires_grad}') + + +@METRICS.register_module(group_key=default_group, module_name='my_metric') +class MyMetric(Metric): + + def __init__(self, vocab_size: int): + self.acc = Accuracy('multiclass', num_classes=vocab_size) + self.loss = MeanMetric() + + def add(self, outputs: Dict[str, Any], inputs: Dict[str, Any]) -> None: + loss: Tensor = outputs.loss + self.loss.update(loss) + # + labels: Tensor = inputs['labels'] + labels = labels[:, 1:] + labels_mask = labels != -100 + logits: Tensor = outputs.logits[:, :-1] + logits = logits[labels_mask].contiguous().view(-1, logits.shape[-1]) + pred = logits.argmax(dim=-1) + labels = labels[labels_mask].to(logits.device) + self.acc.update(pred, labels) + + def evaluate(self): + return { + 'acc': self.acc.compute().item(), + 'loss': self.loss.compute().item() + } + + def merge(self, other: 'MyMetric') -> None: + """This script does not support ddp""" + raise NotImplementedError + + +def _add_special_token(tokenizer): + if tokenizer.eos_token_id is None: + tokenizer.eos_token_id = 2 + if tokenizer.bos_token_id is None: + tokenizer.bos_token_id = 1 + if tokenizer.pad_token_id is None: + tokenizer.pad_token_id = 0 + logger.info(f'bos_token_id: {tokenizer.bos_token_id}, ' + f'eos_token_id: {tokenizer.eos_token_id}, ' + f'pad_token_id: {tokenizer.pad_token_id}') + + +def get_baichuan7B_model_tokenizer(model_dir: str, + load_model: bool = True, + add_special_token: bool = True): + sys.path.insert(0, model_dir) + from configuration_baichuan import BaiChuanConfig + from tokenization_baichuan import BaiChuanTokenizer + from modeling_baichuan import BaiChuanForCausalLM + model_config = BaiChuanConfig.from_pretrained(model_dir) + model_config.torch_dtype = torch.float16 + logger.info(f'model_config: {model_config}') + tokenizer = BaiChuanTokenizer.from_pretrained(model_dir) + model = None + if load_model: + model = BaiChuanForCausalLM.from_pretrained( + model_dir, + config=model_config, + device_map='auto', + torch_dtype=torch.float16) + # + if add_special_token: + _add_special_token(tokenizer) + return model, tokenizer + + +def get_chatglm2_model_tokenizer(model_dir: str, + load_model: bool = True, + add_special_token: bool = True): + config = read_config(model_dir) + config['model'] = ConfigDict({'type': 'chatglm2-6b'}) + tokenizer = ChatGLM2Tokenizer.from_pretrained(model_dir) + model = None + if load_model: + model = Model.from_pretrained( + model_dir, + cfg_dict=config, + device_map='auto', + torch_dtype=torch.float16) + if add_special_token: + _add_special_token(tokenizer) + return model, tokenizer + + +def make_dataset( + split: str, tokenize_function: Callable[[str, str, Optional[str]], + Dict[str, Any]] +) -> MyDataset: + """ + split: Literal['train', 'validation'] + """ + dataset = MsDataset.load( + 'modelscope/ms_hackathon_23_agent_train_dev', split=split) + system = [] + user = [] + assistant = [] + for d in dataset: + content = ast.literal_eval(d['conversations']) + s = content[0]['value'] + assert len(content) % 2 == 1 + for i in range(len(content) // 2): + system.append(s) + user.append(content[2 * i + 1]['value']) + assistant.append(content[2 * i + 2]['value']) + return MyDataset(system, user, assistant, tokenize_function) + + +Item = Dict[str, float] + + +def read_tensorboard_file(fpath: str) -> Dict[str, List[Item]]: + if not os.path.isfile(fpath): + raise FileNotFoundError(f'fpath: {fpath}') + ea = EventAccumulator(fpath) + ea.Reload() + res = {} + tags = ea.Tags()['scalars'] + for tag in tags: + values = ea.Scalars(tag) + r = [] + for v in values: + r.append({'step': v.step, 'value': v.value}) + res[tag] = r + return res + + +def tensorboard_smoothing(values: List[float], + smooth: float = 0.9) -> List[float]: + norm_factor = 1 + x = 0 + res = [] + for i in range(len(values)): + x = x * smooth + values[i] # Exponential decay + res.append(x / norm_factor) + # + norm_factor *= smooth + norm_factor += 1 + return res + + +def plot_image(data: Dict[str, List[Item]], key_name: str, + smooth: float) -> Figure: + _data = data[key_name] + steps = [d['step'] for d in _data] + values = [d['value'] for d in _data] + fig, ax = plt.subplots(1, 1, squeeze=True, figsize=(8, 5), dpi=100) + ax.set_title(key_name) + if smooth != 0: + ax.plot(steps, values, color=COLOR) + values_s = tensorboard_smoothing(values, smooth) + ax.plot(steps, values_s, color=COLOR_S) + else: + ax.plot(steps, values, color=COLOR_S) + return fig diff --git a/examples/pytorch/llm_agent/baichuan_infer.ipynb b/examples/pytorch/llm_agent/baichuan_infer.ipynb new file mode 100644 index 00000000..7ef29951 --- /dev/null +++ b/examples/pytorch/llm_agent/baichuan_infer.ipynb @@ -0,0 +1,482 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Baichuan 推理" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 配置实验环境" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-02 22:28:00,199] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 22:28:00,675 - modelscope - INFO - PyTorch version 2.0.1 Found.\n", + "2023-07-02 22:28:00,676 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n", + "2023-07-02 22:28:00,700 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n", + "2023-07-02 22:28:01,367 - modelscope - INFO - [0, 1]\n", + "2023-07-02 22:28:01,512 - modelscope - INFO - Using device: cuda:0,1\n" + ] + }, + { + "data": { + "text/plain": [ + "device(type='cuda', index=0)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from _common import *\n", + "from transformers import TextStreamer\n", + "device_ids = [0, 1]\n", + "select_device(device_ids)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Model, Tokenizer\n", + "Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 22:28:03,375 - modelscope - INFO - Model revision not specified, use default: master in development mode\n", + "2023-07-02 22:28:03,375 - modelscope - INFO - Development mode use revision: master\n", + "2023-07-02 22:28:03,695 - modelscope - INFO - model_config: BaiChuanConfig {\n", + " \"architectures\": [\n", + " \"BaiChuanForCausalLM\"\n", + " ],\n", + " \"auto_map\": {\n", + " \"AutoConfig\": \"configuration_baichuan.BaiChuanConfig\",\n", + " \"AutoModelForCausalLM\": \"modeling_baichuan.BaiChuanForCausalLM\"\n", + " },\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 4096,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 11008,\n", + " \"max_position_embeddings\": 4096,\n", + " \"model_type\": \"baichuan\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 32,\n", + " \"pad_token_id\": 0,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"float16\",\n", + " \"transformers_version\": \"4.30.2\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 64000\n", + "}\n", + "\n", + "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n" + ] + }, + { + "data": { + "text/plain": [ + "BaiChuanForCausalLM(\n", + " (model): Model(\n", + " (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n", + " (layers): ModuleList(\n", + " (0-31): 32 x DecoderLayer(\n", + " (self_attn): Attention(\n", + " (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n", + " (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n", + " (rotary_emb): RotaryEmbedding()\n", + " )\n", + " (mlp): MLP(\n", + " (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n", + " (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n", + " (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n", + " (act_fn): SiLUActivation()\n", + " )\n", + " (input_layernorm): RMSNorm()\n", + " (post_attention_layernorm): RMSNorm()\n", + " )\n", + " )\n", + " (norm): RMSNorm()\n", + " )\n", + " (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n", + ")" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "CKPT_FAPTH = '/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin'\n", + "LORA_TARGET_MODULES = ['W_pack']\n", + "\n", + "model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n", + "model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n", + "model.bfloat16() # Consistent with training" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Lora" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 22:28:14,108 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['W_pack'], lora_alpha=32, lora_dropout=0, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights='/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/output_best/pytorch_model.bin')\n" + ] + }, + { + "data": { + "text/plain": [ + "BaiChuanForCausalLM(\n", + " (model): Model(\n", + " (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n", + " (layers): ModuleList(\n", + " (0-31): 32 x DecoderLayer(\n", + " (self_attn): Attention(\n", + " (W_pack): Linear(in_features=4096, out_features=12288, bias=False)\n", + " (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n", + " (rotary_emb): RotaryEmbedding()\n", + " )\n", + " (mlp): MLP(\n", + " (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n", + " (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n", + " (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n", + " (act_fn): SiLUActivation()\n", + " )\n", + " (input_layernorm): RMSNorm()\n", + " (post_attention_layernorm): RMSNorm()\n", + " )\n", + " )\n", + " (norm): RMSNorm()\n", + " )\n", + " (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LORA_RANK = 8\n", + "LORA_ALPHA = 32\n", + "LORA_DROPOUT_P = 0 # Arbitrary value\n", + "lora_config = LoRAConfig(\n", + " replace_modules=LORA_TARGET_MODULES,\n", + " rank=LORA_RANK,\n", + " lora_alpha=LORA_ALPHA,\n", + " lora_dropout=LORA_DROPOUT_P,\n", + " pretrained_weights=CKPT_FAPTH)\n", + "logger.info(f'lora_config: {lora_config}')\n", + "Swift.prepare_model(model, lora_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 22:28:28,832 - modelscope - INFO - No subset_name specified, defaulting to the default\n", + "2023-07-02 22:28:29,317 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n", + "2023-07-02 22:28:29,318 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n", + "2023-07-02 22:28:29,318 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "682dc9eedfce4092a25fcadc977c794a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading data files: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8e53d79d8e4845618231f3afb5bc096f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Extracting data files: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 285/285 [00:00<00:00, 1566679.74it/s]\n" + ] + } + ], + "source": [ + "test_dataset = make_dataset('validation', lambda system, user, assistant:\n", + " {'system': system, 'user': user, 'assistant': assistant})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 推理" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://90.49.118.175:2603/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://132.94.116.115:5983/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_speech-generation\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://94.43.176.75:1062/\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "生成一首诗歌,主题为“秋天的美景”,读出来这段话 \n", + "\n", + "### 助手\n", + "秋天,是一个美丽的季节,是一个收获的季节,是一个充满诗意的季节。秋天的天空,湛蓝湛蓝的,像一块蓝宝石;秋天的田野,金黄色的稻谷,像一片金色的海洋;秋天的果园,硕果累累,像一幅美丽的画卷。秋天的山林,层林尽染,像一幅色彩斑斓的油画;秋天的河流,清澈见底,像一条银色的丝带。秋天的天空,湛蓝湛蓝的,像一块蓝宝石;秋天的田野,金黄色的稻谷,像一片金色的海洋;秋天的果园,硕果累累,像一幅美丽的画卷。秋天的山林,层林尽染,像一幅色彩斑斓的油画;秋天的河流,清澈见底,像一条银色的丝带。\n", + "\n", + "[LABELS]秋树红叶舞飘零,\n", + "山间小溪水潺潺。\n", + "微风拂面感清凉,\n", + "散步赏景心旷神怡。\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零,\n", + "山间小溪水潺潺。\n", + "微风拂面感清凉,\n", + "散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"result\": \"\"}\n", + "```<|endofexec|>\n", + "\n", + "-----------------------------------------------------------------------------------\n", + "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "现在我给你另一条地址,请识别出里面的元素。输入地址:广东省深圳市南山区科技园北区 \n", + "\n", + "### 助手\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}\n", + "```<|endofexec|>\n", + "地址识别json表示:{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"community\": \"科技园北区\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n", + "\n", + "[LABELS]<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n", + "```<|endofexec|>\n", + "地址识别json表示:{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n", + "-----------------------------------------------------------------------------------\n", + "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下,请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件,则需要将插件调用请求按照json格式给出,必须包含api_name、url、parameters字段,并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复;若无需调用插件,则直接给出对应回复即可:\n", + "\n", + "1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n", + "\n", + "2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n", + "\n", + "3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入,生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入,生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n", + "\n", + "### 用户\n", + "歌手:古巨基\n", + "歌曲名:爱情马戏班\n", + "经典歌词:情是何等诡秘能令人使出看家把戏;恋爱就像走纲线般惊险;为你献技 像马戏班\n", + "请结合以上信息,编写一个智能音响的播放导语,需要有文采,字数30字以内,凸显一下即将播放该歌曲 \n", + "\n", + "### 助手\n", + "古巨基的《爱情马戏班》,是一首经典的情歌,歌词中充满了对爱情的向往和对爱情的渴望,让人不禁沉醉其中。这首歌的旋律优美动听,歌词朗朗上口,让人听了就忍不住跟着哼唱。\n", + "\n", + "[LABELS]亲爱的主人,今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险,让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧!\n", + "-----------------------------------------------------------------------------------\n", + "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "按照给定的schema抽取出下面文本对应的信息\n", + "schema:{\"动物\": null, \"食物\": null, \"颜色\": null}\n", + "这只棕色的狗狗很喜欢吃狗粮。 \n", + "\n", + "### 助手\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"动物\": [\"棕色的狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "\n", + "[LABELS]<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "-----------------------------------------------------------------------------------\n", + "[TEST] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "按照给定的schema抽取出下面文本对应的信息\n", + "schema:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n", + "谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。 \n", + "\n", + "### 助手\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://144.67.18.142:6381/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [\"谷歌公司\"], \"地理位置\": [\"美国\"], \"组织机构\": [\"科技公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "\n", + "[LABELS]<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "-----------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", + "for d in test_dataset[:5]:\n", + " system = d['system']\n", + " user = d['user']\n", + " assistant = d['assistant']\n", + " input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n", + " print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n", + " input_ids = torch.tensor(input_ids)[None].cuda()\n", + " attention_mask = torch.ones_like(input_ids)\n", + " generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n", + " attention_mask=attention_mask,\n", + " streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n", + " temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n", + " print()\n", + " print(f'[LABELS]{assistant}')\n", + " print('-----------------------------------------------------------------------------------')\n", + " # input('next[ENTER]')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch/llm_agent/baichuan_sft.ipynb b/examples/pytorch/llm_agent/baichuan_sft.ipynb new file mode 100644 index 00000000..6c41ff25 --- /dev/null +++ b/examples/pytorch/llm_agent/baichuan_sft.ipynb @@ -0,0 +1,1814 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Baichuan + Lora + Agent\n", + "baichuan-7B是由百川智能开发的一个开源的大规模预训练模型。基于Transformer结构,在大约1.2万亿tokens上训练的70亿参数模型,支持中英双语,上下文窗口长度为4096。在标准的中文和英文权威benchmark(C-EVAL/MMLU)上均取得同尺寸最好的效果。" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Ref: https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary\n", + "2. 以下脚本可以在2*A10环境下正常运行, 大概占用40G显存\n", + "3. python>=3.8" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 配置实验环境" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install modelscope\n", + "# !pip install numpy pandas matplotlib scikit-learn\n", + "# !pip install transformers datasets\n", + "# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n", + "# !pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate\n", + "\n", + "# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-02 17:24:09,391] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hackathon/miniconda3/envs/hackathon/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-07-02 17:24:09,870 - modelscope - INFO - PyTorch version 2.0.1 Found.\n", + "2023-07-02 17:24:09,871 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n", + "2023-07-02 17:24:09,895 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n", + "2023-07-02 17:24:10,570 - modelscope - INFO - [0, 1]\n", + "2023-07-02 17:24:10,719 - modelscope - INFO - Using device: cuda:0,1\n", + "2023-07-02 17:24:10,720 - modelscope - INFO - Global seed set to 42\n" + ] + } + ], + "source": [ + "from _common import *\n", + "device_ids = [0, 1]\n", + "select_device(device_ids)\n", + "_ = seed_everything(42)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Model, Tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 17:24:11,036 - modelscope - INFO - Model revision not specified, use default: master in development mode\n", + "2023-07-02 17:24:11,037 - modelscope - INFO - Development mode use revision: master\n", + "2023-07-02 17:24:11,364 - modelscope - INFO - model_config: BaiChuanConfig {\n", + " \"architectures\": [\n", + " \"BaiChuanForCausalLM\"\n", + " ],\n", + " \"auto_map\": {\n", + " \"AutoConfig\": \"configuration_baichuan.BaiChuanConfig\",\n", + " \"AutoModelForCausalLM\": \"modeling_baichuan.BaiChuanForCausalLM\"\n", + " },\n", + " \"bos_token_id\": 1,\n", + " \"eos_token_id\": 2,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 4096,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 11008,\n", + " \"max_position_embeddings\": 4096,\n", + " \"model_type\": \"baichuan\",\n", + " \"num_attention_heads\": 32,\n", + " \"num_hidden_layers\": 32,\n", + " \"pad_token_id\": 0,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"float16\",\n", + " \"transformers_version\": \"4.30.2\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 64000\n", + "}\n", + "\n", + "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n" + ] + } + ], + "source": [ + "WORK_DIR = 'runs/baichuan'\n", + "LORA_TARGET_MODULES = ['W_pack']\n", + "#\n", + "model_dir = snapshot_download('baichuan-inc/baichuan-7B', 'v1.0.5')\n", + "model, tokenizer = get_baichuan7B_model_tokenizer(model_dir)\n", + "#\n", + "GRADIENT_CHECKPOINTING = True\n", + "if GRADIENT_CHECKPOINTING:\n", + " model.gradient_checkpointing_enable()\n", + " model.enable_input_require_grads()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 准备Lora" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 17:24:21,741 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['W_pack'], lora_alpha=32, lora_dropout=0.1, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights=None)\n", + "2023-07-02 17:24:36,360 - modelscope - INFO - model.embed_tokens.weight: requires_grad=False\n", + "2023-07-02 17:24:36,360 - modelscope - INFO - model.layers.0.self_attn.W_pack.weight: requires_grad=False\n", + "2023-07-02 17:24:36,361 - modelscope - INFO - model.layers.0.self_attn.W_pack.lora_A: requires_grad=True\n", + "2023-07-02 17:24:36,361 - modelscope - INFO - model.layers.0.self_attn.W_pack.lora_B: requires_grad=True\n", + "2023-07-02 17:24:36,361 - modelscope - INFO - model.layers.0.self_attn.o_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,362 - modelscope - INFO - model.layers.0.mlp.gate_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,362 - modelscope - INFO - model.layers.0.mlp.down_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,363 - modelscope - INFO - model.layers.0.mlp.up_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,363 - modelscope - INFO - model.layers.0.input_layernorm.weight: requires_grad=False\n", + "2023-07-02 17:24:36,363 - modelscope - INFO - model.layers.0.post_attention_layernorm.weight: requires_grad=False\n", + "2023-07-02 17:24:36,363 - modelscope - INFO - model.layers.1.self_attn.W_pack.weight: requires_grad=False\n", + "2023-07-02 17:24:36,364 - modelscope - INFO - model.layers.1.self_attn.W_pack.lora_A: requires_grad=True\n", + "2023-07-02 17:24:36,364 - modelscope - INFO - model.layers.1.self_attn.W_pack.lora_B: requires_grad=True\n", + "2023-07-02 17:24:36,364 - modelscope - INFO - model.layers.1.self_attn.o_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,364 - modelscope - INFO - model.layers.1.mlp.gate_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,365 - modelscope - INFO - model.layers.1.mlp.down_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,365 - modelscope - INFO - model.layers.1.mlp.up_proj.weight: requires_grad=False\n", + "2023-07-02 17:24:36,365 - modelscope - INFO - model.layers.1.input_layernorm.weight: requires_grad=False\n", + "2023-07-02 17:24:36,365 - modelscope - INFO - model.layers.1.post_attention_layernorm.weight: requires_grad=False\n", + "2023-07-02 17:24:36,365 - modelscope - INFO - model.layers.2.self_attn.W_pack.weight: requires_grad=False\n", + "2023-07-02 17:24:36,366 - modelscope - INFO - ...\n", + "2023-07-02 17:24:36,368 - modelscope - INFO - BaiChuanForCausalLM: 7004.7539M Params (4.1943M Trainable), 33.5565M Buffers.\n", + "2023-07-02 17:24:36,370 - modelscope - INFO - device: cuda:0, dtype: torch.float16\n" + ] + }, + { + "data": { + "text/plain": [ + "BaiChuanForCausalLM(\n", + " (model): Model(\n", + " (embed_tokens): Embedding(64000, 4096, padding_idx=0)\n", + " (layers): ModuleList(\n", + " (0-31): 32 x DecoderLayer(\n", + " (self_attn): Attention(\n", + " (W_pack): Linear(\n", + " in_features=4096, out_features=12288, bias=False\n", + " (lora_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n", + " (rotary_emb): RotaryEmbedding()\n", + " )\n", + " (mlp): MLP(\n", + " (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n", + " (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n", + " (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n", + " (act_fn): SiLUActivation()\n", + " )\n", + " (input_layernorm): RMSNorm()\n", + " (post_attention_layernorm): RMSNorm()\n", + " )\n", + " )\n", + " (norm): RMSNorm()\n", + " )\n", + " (lm_head): Linear(in_features=4096, out_features=64000, bias=False)\n", + ")" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LORA_RANK = 8\n", + "LORA_ALPHA = 32\n", + "LORA_DROPOUT_P = 0.1\n", + "lora_config = LoRAConfig(\n", + " replace_modules=LORA_TARGET_MODULES,\n", + " rank=LORA_RANK,\n", + " lora_alpha=LORA_ALPHA,\n", + " lora_dropout=LORA_DROPOUT_P)\n", + "logger.info(f'lora_config: {lora_config}')\n", + "Swift.prepare_model(model, lora_config)\n", + "#\n", + "show_freeze_layers(model)\n", + "print_model_info(model)\n", + "_p = list(model.parameters())[100]\n", + "logger.info(f'device: {_p.device}, dtype: {_p.dtype}')\n", + "model.bfloat16()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5036/5036 [00:12<00:00, 398.82it/s]\n", + "100%|██████████| 285/285 [00:00<00:00, 383.15it/s]\n", + "2023-07-02 17:24:49,863 - modelscope - INFO - Dataset Token Length: 958.649707±371.357483, min=44.000000, max=2045.000000, size=4953\n", + "2023-07-02 17:24:49,864 - modelscope - INFO - Dataset Token Length: 993.447653±337.821458, min=75.000000, max=1946.000000, size=277\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INPUT_IDS] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://109.199.101.10:1485/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://9.32.64.200:5873/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://54.149.78.185:3979/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "按照给定的schema抽取出下面文本对应的信息\n", + "schema:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n", + "近日,美国政府宣布将对中国1000多种商品加征关税,并威胁进一步加征关税。 \n", + "\n", + "### 助手\n", + " <|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://9.32.64.200:5873/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"近日,美国政府宣布将对中国1000多种商品加征关税,并威胁进一步加征关税。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "\n", + "[LABLES] <|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://9.32.64.200:5873/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"近日,美国政府宣布将对中国1000多种商品加征关税,并威胁进一步加征关税。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n" + ] + } + ], + "source": [ + "tokenize_function = partial(tokenize_function, tokenizer=tokenizer)\n", + "train_dataset = make_dataset('train', tokenize_function)\n", + "val_dataset = make_dataset('validation', tokenize_function)\n", + "# Data analysis\n", + "stat_dataset(train_dataset)\n", + "stat_dataset(val_dataset)\n", + "data_collate_fn = partial(data_collate_fn, tokenizer=tokenizer)\n", + "print_examples(train_dataset[0], tokenizer)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 配置Config" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 17:24:49,892 - modelscope - INFO - work_dir: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449\n" + ] + } + ], + "source": [ + "cfg_file = os.path.join(model_dir, 'configuration.json')\n", + "#\n", + "BATCH_SIZE = 1\n", + "MAX_EPOCHS = 1\n", + "T_max = get_T_max(len(train_dataset), BATCH_SIZE, MAX_EPOCHS, True)\n", + "WORK_DIR = get_work_dir(WORK_DIR)\n", + "EVAL_INTERVAL = 200\n", + "CONFIG = Config({\n", + " 'train': {\n", + " 'dataloader': {\n", + " 'batch_size_per_gpu': BATCH_SIZE,\n", + " 'workers_per_gpu': 1,\n", + " 'shuffle': True,\n", + " 'drop_last': True,\n", + " 'pin_memory': True\n", + " },\n", + " 'max_epochs': MAX_EPOCHS,\n", + " 'work_dir': WORK_DIR,\n", + " 'optimizer': {\n", + " 'type': 'AdamW',\n", + " 'lr': 1e-4,\n", + " 'weight_decay': 0.01,\n", + " 'options': {\n", + " 'cumulative_iters': 16, 'grad_clip': {\n", + " 'norm_type': 2,\n", + " 'max_norm': 2.0\n", + " }\n", + " }\n", + " },\n", + " 'lr_scheduler': {\n", + " 'type': 'CosineAnnealingLR',\n", + " 'T_max': T_max,\n", + " 'eta_min': 1e-5,\n", + " 'options': {\n", + " 'by_epoch': False,\n", + " 'warmup': {\n", + " 'type': 'LinearWarmup',\n", + " 'warmup_ratio': 0.1,\n", + " 'warmup_iters': 200\n", + " }\n", + " }\n", + " },\n", + " 'hooks': [\n", + " {'type': 'CheckpointHook', 'by_epoch': False, 'interval': EVAL_INTERVAL, 'max_checkpoint_num': 1},\n", + " {'type': 'EvaluationHook', 'by_epoch': False, 'interval': EVAL_INTERVAL},\n", + " {'type': 'BestCkptSaverHook',\n", + " 'metric_key': 'acc',\n", + " 'save_best': True, 'rule': 'max', 'max_checkpoint_num': 1},\n", + " {'type': 'TextLoggerHook',\n", + " 'by_epoch': True, # Whether EpochBasedTrainer is used\n", + " 'interval': 5},\n", + " {'type': 'TensorboardHook', 'by_epoch': False, 'interval': 5}\n", + " ]\n", + " },\n", + " 'evaluation': {\n", + " 'dataloader': {\n", + " 'batch_size_per_gpu': BATCH_SIZE,\n", + " 'workers_per_gpu': 1,\n", + " 'shuffle': False,\n", + " 'drop_last': False,\n", + " 'pin_memory': True\n", + " },\n", + " 'metrics': [\n", + " {'type': 'my_metric', 'vocab_size': tokenizer.vocab_size}\n", + " ]\n", + " }\n", + "})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 微调" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 17:24:49,903 - modelscope - INFO - ==========================Training Config Start==========================\n", + "2023-07-02 17:24:49,904 - modelscope - INFO - {\n", + " \"framework\": \"pytorch\",\n", + " \"task\": \"text-generation\",\n", + " \"model\": {\n", + " \"type\": \"Baichuan-7B\"\n", + " },\n", + " \"pipeline\": {\n", + " \"type\": \"Baichuan-7B-text-generation-pipe\"\n", + " },\n", + " \"allow_remote\": true,\n", + " \"train\": {\n", + " \"hooks\": [\n", + " {\n", + " \"type\": \"TensorboardHook\",\n", + " \"by_epoch\": false,\n", + " \"interval\": 5\n", + " }\n", + " ],\n", + " \"dataloader\": {\n", + " \"batch_size_per_gpu\": 1,\n", + " \"workers_per_gpu\": 1,\n", + " \"shuffle\": true,\n", + " \"drop_last\": true,\n", + " \"pin_memory\": true\n", + " },\n", + " \"max_epochs\": 1,\n", + " \"work_dir\": \"/home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449\",\n", + " \"optimizer\": {\n", + " \"type\": \"AdamW\",\n", + " \"lr\": 0.0001,\n", + " \"weight_decay\": 0.01,\n", + " \"options\": {\n", + " \"cumulative_iters\": 16,\n", + " \"grad_clip\": {\n", + " \"norm_type\": 2,\n", + " \"max_norm\": 2.0\n", + " }\n", + " }\n", + " },\n", + " \"lr_scheduler\": {\n", + " \"type\": \"CosineAnnealingLR\",\n", + " \"T_max\": 4953,\n", + " \"eta_min\": 1e-05,\n", + " \"options\": {\n", + " \"by_epoch\": false,\n", + " \"warmup\": {\n", + " \"type\": \"LinearWarmup\",\n", + " \"warmup_ratio\": 0.1,\n", + " \"warmup_iters\": 200\n", + " }\n", + " }\n", + " },\n", + " \"checkpoint\": {\n", + " \"period\": {\n", + " \"by_epoch\": false,\n", + " \"interval\": 200,\n", + " \"max_checkpoint_num\": 1\n", + " },\n", + " \"best\": {\n", + " \"metric_key\": \"acc\",\n", + " \"save_best\": true,\n", + " \"rule\": \"max\",\n", + " \"max_checkpoint_num\": 1\n", + " }\n", + " },\n", + " \"logging\": {\n", + " \"by_epoch\": true,\n", + " \"interval\": 5\n", + " }\n", + " },\n", + " \"evaluation\": {\n", + " \"dataloader\": {\n", + " \"batch_size_per_gpu\": 1,\n", + " \"workers_per_gpu\": 1,\n", + " \"shuffle\": false,\n", + " \"drop_last\": false,\n", + " \"pin_memory\": true\n", + " },\n", + " \"metrics\": [\n", + " {\n", + " \"type\": \"my_metric\",\n", + " \"vocab_size\": 64000\n", + " }\n", + " ],\n", + " \"period\": {\n", + " \"by_epoch\": false,\n", + " \"interval\": 200\n", + " }\n", + " }\n", + "}\n", + "2023-07-02 17:24:49,904 - modelscope - INFO - ===========================Training Config End===========================\n", + "2023-07-02 17:24:49,905 - modelscope - WARNING - ('OPTIMIZER', 'default', 'AdamW') not found in ast index file\n", + "2023-07-02 17:24:49,906 - modelscope - WARNING - ('LR_SCHEDULER', 'default', 'CosineAnnealingLR') not found in ast index file\n", + "2023-07-02 17:24:49,907 - modelscope - INFO - Stage: before_run:\n", + " (ABOVE_NORMAL) OptimizerHook \n", + " (LOW ) LrSchedulerHook \n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: before_train_epoch:\n", + " (LOW ) LrSchedulerHook \n", + " -------------------- \n", + "Stage: before_train_iter:\n", + " (ABOVE_NORMAL) OptimizerHook \n", + " -------------------- \n", + "Stage: after_train_iter:\n", + " (ABOVE_NORMAL) OptimizerHook \n", + " (NORMAL ) EvaluationHook \n", + " (LOW ) LrSchedulerHook \n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: after_train_epoch:\n", + " (NORMAL ) EvaluationHook \n", + " (LOW ) LrSchedulerHook \n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: after_val_epoch:\n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: after_run:\n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "2023-07-02 17:24:49,913 - modelscope - INFO - Checkpoints will be saved to /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449\n", + "2023-07-02 17:24:49,916 - modelscope - INFO - Checkpoints will be saved to /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449\n", + "2023-07-02 17:24:49,917 - modelscope - INFO - Text logs will be saved to /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449\n", + "2023-07-02 17:24:49,917 - modelscope - INFO - tensorboard files will be saved to /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/tensorboard_output\n", + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n", + "2023-07-02 17:24:55,315 - modelscope - INFO - epoch [1][5/4953]\tlr: 1.000e-05, memory: 7084, loss: 5.2094\n", + "2023-07-02 17:24:59,926 - modelscope - INFO - epoch [1][10/4953]\tlr: 1.000e-05, memory: 7084, loss: 1.9516\n", + "2023-07-02 17:25:05,112 - modelscope - INFO - epoch [1][15/4953]\tlr: 1.000e-05, memory: 7504, loss: 1.8344\n", + "2023-07-02 17:25:13,131 - modelscope - INFO - epoch [1][20/4953]\tlr: 1.225e-05, memory: 8075, loss: 3.3937\n", + "2023-07-02 17:25:19,098 - modelscope - INFO - epoch [1][25/4953]\tlr: 1.450e-05, memory: 8102, loss: 1.8047\n", + "2023-07-02 17:25:25,763 - modelscope - INFO - epoch [1][30/4953]\tlr: 1.675e-05, memory: 8102, loss: 1.5594\n", + "2023-07-02 17:25:33,888 - modelscope - INFO - epoch [1][35/4953]\tlr: 1.900e-05, memory: 8293, loss: 1.5852\n", + "2023-07-02 17:25:39,548 - modelscope - INFO - epoch [1][40/4953]\tlr: 2.125e-05, memory: 8293, loss: 1.7828\n", + "2023-07-02 17:25:44,599 - modelscope - INFO - epoch [1][45/4953]\tlr: 2.350e-05, memory: 8293, loss: 5.5922\n", + "2023-07-02 17:25:49,692 - modelscope - INFO - epoch [1][50/4953]\tlr: 2.575e-05, memory: 8293, loss: 2.6641\n", + "2023-07-02 17:25:56,104 - modelscope - INFO - epoch [1][55/4953]\tlr: 2.800e-05, memory: 8742, loss: 2.2344\n", + "2023-07-02 17:26:04,765 - modelscope - INFO - epoch [1][60/4953]\tlr: 3.025e-05, memory: 8742, loss: 1.7320\n", + "2023-07-02 17:26:10,288 - modelscope - INFO - epoch [1][65/4953]\tlr: 3.250e-05, memory: 8742, loss: 5.0578\n", + "2023-07-02 17:26:14,998 - modelscope - INFO - epoch [1][70/4953]\tlr: 3.475e-05, memory: 8742, loss: 4.0109\n", + "2023-07-02 17:26:21,600 - modelscope - INFO - epoch [1][75/4953]\tlr: 3.700e-05, memory: 8742, loss: 1.7266\n", + "2023-07-02 17:26:26,920 - modelscope - INFO - epoch [1][80/4953]\tlr: 3.925e-05, memory: 8742, loss: 2.9578\n", + "2023-07-02 17:26:32,447 - modelscope - INFO - epoch [1][85/4953]\tlr: 4.150e-05, memory: 8742, loss: 5.8422\n", + "2023-07-02 17:26:38,768 - modelscope - INFO - epoch [1][90/4953]\tlr: 4.375e-05, memory: 8742, loss: 1.8719\n", + "2023-07-02 17:26:45,955 - modelscope - INFO - epoch [1][95/4953]\tlr: 4.600e-05, memory: 8742, loss: 1.4359\n", + "2023-07-02 17:26:50,324 - modelscope - INFO - epoch [1][100/4953]\tlr: 4.825e-05, memory: 8742, loss: 5.6125\n", + "2023-07-02 17:26:58,123 - modelscope - INFO - epoch [1][105/4953]\tlr: 5.050e-05, memory: 8742, loss: 2.9656\n", + "2023-07-02 17:27:04,523 - modelscope - INFO - epoch [1][110/4953]\tlr: 5.275e-05, memory: 8742, loss: 1.7484\n", + "2023-07-02 17:27:09,550 - modelscope - INFO - epoch [1][115/4953]\tlr: 5.500e-05, memory: 8742, loss: 2.7133\n", + "2023-07-02 17:27:17,037 - modelscope - INFO - epoch [1][120/4953]\tlr: 5.725e-05, memory: 8742, loss: 1.9953\n", + "2023-07-02 17:27:22,364 - modelscope - INFO - epoch [1][125/4953]\tlr: 5.950e-05, memory: 8742, loss: 4.4578\n", + "2023-07-02 17:27:26,915 - modelscope - INFO - epoch [1][130/4953]\tlr: 6.175e-05, memory: 8742, loss: 4.4344\n", + "2023-07-02 17:27:34,586 - modelscope - INFO - epoch [1][135/4953]\tlr: 6.400e-05, memory: 8742, loss: 1.6328\n", + "2023-07-02 17:27:41,580 - modelscope - INFO - epoch [1][140/4953]\tlr: 6.625e-05, memory: 8742, loss: 3.9422\n", + "2023-07-02 17:27:47,073 - modelscope - INFO - epoch [1][145/4953]\tlr: 6.850e-05, memory: 8742, loss: 2.0562\n", + "2023-07-02 17:27:53,069 - modelscope - INFO - epoch [1][150/4953]\tlr: 7.075e-05, memory: 8742, loss: 1.8477\n", + "2023-07-02 17:27:58,364 - modelscope - INFO - epoch [1][155/4953]\tlr: 7.300e-05, memory: 8742, loss: 4.5445\n", + "2023-07-02 17:28:05,747 - modelscope - INFO - epoch [1][160/4953]\tlr: 7.525e-05, memory: 8742, loss: 4.0109\n", + "2023-07-02 17:28:12,108 - modelscope - INFO - epoch [1][165/4953]\tlr: 7.750e-05, memory: 8742, loss: 2.0578\n", + "2023-07-02 17:28:17,145 - modelscope - INFO - epoch [1][170/4953]\tlr: 7.975e-05, memory: 8742, loss: 1.9109\n", + "2023-07-02 17:28:23,027 - modelscope - INFO - epoch [1][175/4953]\tlr: 8.200e-05, memory: 8742, loss: 3.2410\n", + "2023-07-02 17:28:27,778 - modelscope - INFO - epoch [1][180/4953]\tlr: 8.425e-05, memory: 8742, loss: 2.9000\n", + "2023-07-02 17:28:34,508 - modelscope - INFO - epoch [1][185/4953]\tlr: 8.650e-05, memory: 8742, loss: 1.6062\n", + "2023-07-02 17:28:40,560 - modelscope - INFO - epoch [1][190/4953]\tlr: 8.875e-05, memory: 8742, loss: 1.5594\n", + "2023-07-02 17:28:46,479 - modelscope - INFO - epoch [1][195/4953]\tlr: 9.100e-05, memory: 8742, loss: 1.9875\n", + "2023-07-02 17:28:53,324 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 17:31:08,796 - modelscope - INFO - Saving checkpoint at 200 iter\n", + "2023-07-02 17:31:08,837 - modelscope - INFO - Saving checkpoint at 200 iter\n", + "2023-07-02 17:31:08,875 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8742, evaluation/acc: 0.7108, evaluation/loss: 2.4241, loss: 1.8062\n", + "2023-07-02 17:31:15,472 - modelscope - INFO - epoch [1][205/4953]\tlr: 9.550e-05, memory: 8742, loss: 1.9172\n", + "2023-07-02 17:31:21,195 - modelscope - INFO - epoch [1][210/4953]\tlr: 9.775e-05, memory: 8742, loss: 2.5586\n", + "2023-07-02 17:31:26,642 - modelscope - INFO - epoch [1][215/4953]\tlr: 1.000e-04, memory: 8742, loss: 2.1422\n", + "2023-07-02 17:31:32,941 - modelscope - INFO - epoch [1][220/4953]\tlr: 9.998e-05, memory: 8742, loss: 2.8609\n", + "2023-07-02 17:31:37,465 - modelscope - INFO - epoch [1][225/4953]\tlr: 9.996e-05, memory: 8742, loss: 1.9953\n", + "2023-07-02 17:31:42,190 - modelscope - INFO - epoch [1][230/4953]\tlr: 9.994e-05, memory: 8742, loss: 1.8422\n", + "2023-07-02 17:31:49,617 - modelscope - INFO - epoch [1][235/4953]\tlr: 9.992e-05, memory: 8742, loss: 1.8328\n", + "2023-07-02 17:31:54,582 - modelscope - INFO - epoch [1][240/4953]\tlr: 9.990e-05, memory: 8742, loss: 2.5031\n", + "2023-07-02 17:32:03,094 - modelscope - INFO - epoch [1][245/4953]\tlr: 9.988e-05, memory: 8742, loss: 3.4578\n", + "2023-07-02 17:32:09,110 - modelscope - INFO - epoch [1][250/4953]\tlr: 9.986e-05, memory: 8742, loss: 3.1359\n", + "2023-07-02 17:32:14,901 - modelscope - INFO - epoch [1][255/4953]\tlr: 9.984e-05, memory: 8742, loss: 3.4672\n", + "2023-07-02 17:32:21,012 - modelscope - INFO - epoch [1][260/4953]\tlr: 9.982e-05, memory: 8742, loss: 1.3734\n", + "2023-07-02 17:32:26,921 - modelscope - INFO - epoch [1][265/4953]\tlr: 9.979e-05, memory: 8742, loss: 1.7055\n", + "2023-07-02 17:32:33,958 - modelscope - INFO - epoch [1][270/4953]\tlr: 9.977e-05, memory: 8933, loss: 4.9609\n", + "2023-07-02 17:32:39,555 - modelscope - INFO - epoch [1][275/4953]\tlr: 9.975e-05, memory: 8933, loss: 3.0906\n", + "2023-07-02 17:32:45,339 - modelscope - INFO - epoch [1][280/4953]\tlr: 9.972e-05, memory: 8933, loss: 3.2016\n", + "2023-07-02 17:32:51,159 - modelscope - INFO - epoch [1][285/4953]\tlr: 9.970e-05, memory: 8933, loss: 3.4461\n", + "2023-07-02 17:32:57,166 - modelscope - INFO - epoch [1][290/4953]\tlr: 9.967e-05, memory: 8933, loss: 1.9609\n", + "2023-07-02 17:33:06,217 - modelscope - INFO - epoch [1][295/4953]\tlr: 9.965e-05, memory: 8933, loss: 1.9680\n", + "2023-07-02 17:33:12,393 - modelscope - INFO - epoch [1][300/4953]\tlr: 9.962e-05, memory: 8933, loss: 1.5422\n", + "2023-07-02 17:33:17,688 - modelscope - INFO - epoch [1][305/4953]\tlr: 9.960e-05, memory: 8933, loss: 2.6953\n", + "2023-07-02 17:33:21,863 - modelscope - INFO - epoch [1][310/4953]\tlr: 9.957e-05, memory: 8933, loss: 3.0094\n", + "2023-07-02 17:33:27,411 - modelscope - INFO - epoch [1][315/4953]\tlr: 9.954e-05, memory: 8933, loss: 1.9156\n", + "2023-07-02 17:33:33,136 - modelscope - INFO - epoch [1][320/4953]\tlr: 9.952e-05, memory: 8933, loss: 1.9672\n", + "2023-07-02 17:33:38,217 - modelscope - INFO - epoch [1][325/4953]\tlr: 9.949e-05, memory: 8933, loss: 4.3375\n", + "2023-07-02 17:33:44,012 - modelscope - INFO - epoch [1][330/4953]\tlr: 9.946e-05, memory: 8933, loss: 1.8797\n", + "2023-07-02 17:33:49,670 - modelscope - INFO - epoch [1][335/4953]\tlr: 9.943e-05, memory: 8933, loss: 3.0969\n", + "2023-07-02 17:33:55,428 - modelscope - INFO - epoch [1][340/4953]\tlr: 9.940e-05, memory: 8933, loss: 3.2477\n", + "2023-07-02 17:34:02,117 - modelscope - INFO - epoch [1][345/4953]\tlr: 9.937e-05, memory: 8933, loss: 2.7969\n", + "2023-07-02 17:34:08,037 - modelscope - INFO - epoch [1][350/4953]\tlr: 9.934e-05, memory: 8933, loss: 2.3578\n", + "2023-07-02 17:34:13,172 - modelscope - INFO - epoch [1][355/4953]\tlr: 9.931e-05, memory: 8933, loss: 2.0656\n", + "2023-07-02 17:34:19,283 - modelscope - INFO - epoch [1][360/4953]\tlr: 9.928e-05, memory: 8933, loss: 1.8438\n", + "2023-07-02 17:34:25,323 - modelscope - INFO - epoch [1][365/4953]\tlr: 9.925e-05, memory: 8933, loss: 2.1828\n", + "2023-07-02 17:34:31,845 - modelscope - INFO - epoch [1][370/4953]\tlr: 9.922e-05, memory: 8933, loss: 2.0234\n", + "2023-07-02 17:34:40,587 - modelscope - INFO - epoch [1][375/4953]\tlr: 9.919e-05, memory: 8933, loss: 2.3086\n", + "2023-07-02 17:34:45,650 - modelscope - INFO - epoch [1][380/4953]\tlr: 9.915e-05, memory: 8933, loss: 3.6734\n", + "2023-07-02 17:34:51,009 - modelscope - INFO - epoch [1][385/4953]\tlr: 9.912e-05, memory: 8933, loss: 1.3594\n", + "2023-07-02 17:34:57,229 - modelscope - INFO - epoch [1][390/4953]\tlr: 9.909e-05, memory: 8933, loss: 2.3117\n", + "2023-07-02 17:35:03,231 - modelscope - INFO - epoch [1][395/4953]\tlr: 9.905e-05, memory: 8933, loss: 1.4961\n", + "2023-07-02 17:35:08,373 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.05it/s]\n", + "2023-07-02 17:37:23,763 - modelscope - INFO - Saving checkpoint at 400 iter\n", + "2023-07-02 17:37:23,803 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_200\n", + "2023-07-02 17:37:23,807 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8933, evaluation/acc: 0.7079, evaluation/loss: 2.1381, loss: 1.9438\n", + "2023-07-02 17:37:28,880 - modelscope - INFO - epoch [1][405/4953]\tlr: 9.898e-05, memory: 8933, loss: 3.1016\n", + "2023-07-02 17:37:35,463 - modelscope - INFO - epoch [1][410/4953]\tlr: 9.895e-05, memory: 8933, loss: 2.5531\n", + "2023-07-02 17:37:41,349 - modelscope - INFO - epoch [1][415/4953]\tlr: 9.891e-05, memory: 8933, loss: 2.2984\n", + "2023-07-02 17:37:47,522 - modelscope - INFO - epoch [1][420/4953]\tlr: 9.888e-05, memory: 8933, loss: 1.5930\n", + "2023-07-02 17:37:54,150 - modelscope - INFO - epoch [1][425/4953]\tlr: 9.884e-05, memory: 8933, loss: 2.2938\n", + "2023-07-02 17:37:59,915 - modelscope - INFO - epoch [1][430/4953]\tlr: 9.880e-05, memory: 8933, loss: 2.5562\n", + "2023-07-02 17:38:07,433 - modelscope - INFO - epoch [1][435/4953]\tlr: 9.877e-05, memory: 8933, loss: 1.5555\n", + "2023-07-02 17:38:14,761 - modelscope - INFO - epoch [1][440/4953]\tlr: 9.873e-05, memory: 8933, loss: 2.9109\n", + "2023-07-02 17:38:19,100 - modelscope - INFO - epoch [1][445/4953]\tlr: 9.869e-05, memory: 8933, loss: 1.6234\n", + "2023-07-02 17:38:24,534 - modelscope - INFO - epoch [1][450/4953]\tlr: 9.865e-05, memory: 8933, loss: 2.2734\n", + "2023-07-02 17:38:31,059 - modelscope - INFO - epoch [1][455/4953]\tlr: 9.861e-05, memory: 8933, loss: 1.3438\n", + "2023-07-02 17:38:37,366 - modelscope - INFO - epoch [1][460/4953]\tlr: 9.857e-05, memory: 8933, loss: 1.8469\n", + "2023-07-02 17:38:43,640 - modelscope - INFO - epoch [1][465/4953]\tlr: 9.853e-05, memory: 8933, loss: 1.7102\n", + "2023-07-02 17:38:48,102 - modelscope - INFO - epoch [1][470/4953]\tlr: 9.849e-05, memory: 8933, loss: 2.1500\n", + "2023-07-02 17:38:52,751 - modelscope - INFO - epoch [1][475/4953]\tlr: 9.845e-05, memory: 8933, loss: 2.4086\n", + "2023-07-02 17:38:59,938 - modelscope - INFO - epoch [1][480/4953]\tlr: 9.841e-05, memory: 8933, loss: 1.1828\n", + "2023-07-02 17:39:06,061 - modelscope - INFO - epoch [1][485/4953]\tlr: 9.837e-05, memory: 8933, loss: 1.0625\n", + "2023-07-02 17:39:13,230 - modelscope - INFO - epoch [1][490/4953]\tlr: 9.832e-05, memory: 8933, loss: 1.5750\n", + "2023-07-02 17:39:19,107 - modelscope - INFO - epoch [1][495/4953]\tlr: 9.828e-05, memory: 8933, loss: 1.9844\n", + "2023-07-02 17:39:27,177 - modelscope - INFO - epoch [1][500/4953]\tlr: 9.824e-05, memory: 8933, loss: 1.7211\n", + "2023-07-02 17:39:31,312 - modelscope - INFO - epoch [1][505/4953]\tlr: 9.819e-05, memory: 8933, loss: 2.9953\n", + "2023-07-02 17:39:37,871 - modelscope - INFO - epoch [1][510/4953]\tlr: 9.815e-05, memory: 8933, loss: 1.7234\n", + "2023-07-02 17:39:42,983 - modelscope - INFO - epoch [1][515/4953]\tlr: 9.811e-05, memory: 8933, loss: 3.3328\n", + "2023-07-02 17:39:50,299 - modelscope - INFO - epoch [1][520/4953]\tlr: 9.806e-05, memory: 8933, loss: 1.1523\n", + "2023-07-02 17:39:57,449 - modelscope - INFO - epoch [1][525/4953]\tlr: 9.802e-05, memory: 8933, loss: 2.2969\n", + "2023-07-02 17:40:03,936 - modelscope - INFO - epoch [1][530/4953]\tlr: 9.797e-05, memory: 8933, loss: 2.0359\n", + "2023-07-02 17:40:10,017 - modelscope - INFO - epoch [1][535/4953]\tlr: 9.792e-05, memory: 8933, loss: 2.2484\n", + "2023-07-02 17:40:15,110 - modelscope - INFO - epoch [1][540/4953]\tlr: 9.788e-05, memory: 8933, loss: 2.5000\n", + "2023-07-02 17:40:22,837 - modelscope - INFO - epoch [1][545/4953]\tlr: 9.783e-05, memory: 8933, loss: 1.6344\n", + "2023-07-02 17:40:27,326 - modelscope - INFO - epoch [1][550/4953]\tlr: 9.778e-05, memory: 8933, loss: 1.9516\n", + "2023-07-02 17:40:32,836 - modelscope - INFO - epoch [1][555/4953]\tlr: 9.774e-05, memory: 8933, loss: 2.7078\n", + "2023-07-02 17:40:38,900 - modelscope - INFO - epoch [1][560/4953]\tlr: 9.769e-05, memory: 8933, loss: 2.9023\n", + "2023-07-02 17:40:44,092 - modelscope - INFO - epoch [1][565/4953]\tlr: 9.764e-05, memory: 8933, loss: 3.7687\n", + "2023-07-02 17:40:51,182 - modelscope - INFO - epoch [1][570/4953]\tlr: 9.759e-05, memory: 8933, loss: 2.8531\n", + "2023-07-02 17:40:56,580 - modelscope - INFO - epoch [1][575/4953]\tlr: 9.754e-05, memory: 8933, loss: 1.8938\n", + "2023-07-02 17:41:04,432 - modelscope - INFO - epoch [1][580/4953]\tlr: 9.749e-05, memory: 8933, loss: 1.4187\n", + "2023-07-02 17:41:11,299 - modelscope - INFO - epoch [1][585/4953]\tlr: 9.744e-05, memory: 8933, loss: 2.2406\n", + "2023-07-02 17:41:17,405 - modelscope - INFO - epoch [1][590/4953]\tlr: 9.739e-05, memory: 8933, loss: 3.2250\n", + "2023-07-02 17:41:23,093 - modelscope - INFO - epoch [1][595/4953]\tlr: 9.734e-05, memory: 8933, loss: 1.5625\n", + "2023-07-02 17:41:29,552 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.05it/s]\n", + "2023-07-02 17:43:44,919 - modelscope - INFO - Saving checkpoint at 600 iter\n", + "2023-07-02 17:43:44,959 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter200_acc0.7107985615730286\n", + "2023-07-02 17:43:44,963 - modelscope - INFO - Saving checkpoint at 600 iter\n", + "2023-07-02 17:43:45,002 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_400\n", + "2023-07-02 17:43:45,006 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8933, evaluation/acc: 0.7199, evaluation/loss: 1.9766, loss: 1.2516\n", + "2023-07-02 17:43:50,488 - modelscope - INFO - epoch [1][605/4953]\tlr: 9.723e-05, memory: 8933, loss: 1.8469\n", + "2023-07-02 17:43:56,664 - modelscope - INFO - epoch [1][610/4953]\tlr: 9.718e-05, memory: 8933, loss: 1.5445\n", + "2023-07-02 17:44:02,529 - modelscope - INFO - epoch [1][615/4953]\tlr: 9.713e-05, memory: 8933, loss: 1.8422\n", + "2023-07-02 17:44:07,376 - modelscope - INFO - epoch [1][620/4953]\tlr: 9.707e-05, memory: 8933, loss: 2.4242\n", + "2023-07-02 17:44:12,991 - modelscope - INFO - epoch [1][625/4953]\tlr: 9.702e-05, memory: 8933, loss: 1.8070\n", + "2023-07-02 17:44:17,716 - modelscope - INFO - epoch [1][630/4953]\tlr: 9.697e-05, memory: 8933, loss: 2.0000\n", + "2023-07-02 17:44:22,023 - modelscope - INFO - epoch [1][635/4953]\tlr: 9.691e-05, memory: 8933, loss: 1.3898\n", + "2023-07-02 17:44:27,160 - modelscope - INFO - epoch [1][640/4953]\tlr: 9.686e-05, memory: 8933, loss: 1.6227\n", + "2023-07-02 17:44:33,519 - modelscope - INFO - epoch [1][645/4953]\tlr: 9.680e-05, memory: 8933, loss: 1.6672\n", + "2023-07-02 17:44:40,193 - modelscope - INFO - epoch [1][650/4953]\tlr: 9.674e-05, memory: 8933, loss: 1.4438\n", + "2023-07-02 17:44:44,906 - modelscope - INFO - epoch [1][655/4953]\tlr: 9.669e-05, memory: 8933, loss: 1.6648\n", + "2023-07-02 17:44:49,519 - modelscope - INFO - epoch [1][660/4953]\tlr: 9.663e-05, memory: 8933, loss: 1.2945\n", + "2023-07-02 17:44:55,845 - modelscope - INFO - epoch [1][665/4953]\tlr: 9.657e-05, memory: 8933, loss: 1.5773\n", + "2023-07-02 17:45:02,184 - modelscope - INFO - epoch [1][670/4953]\tlr: 9.652e-05, memory: 8933, loss: 1.8625\n", + "2023-07-02 17:45:05,554 - modelscope - INFO - epoch [1][675/4953]\tlr: 9.646e-05, memory: 8933, loss: 1.7039\n", + "2023-07-02 17:45:10,948 - modelscope - INFO - epoch [1][680/4953]\tlr: 9.640e-05, memory: 8933, loss: 2.0211\n", + "2023-07-02 17:45:15,605 - modelscope - INFO - epoch [1][685/4953]\tlr: 9.634e-05, memory: 8933, loss: 1.5969\n", + "2023-07-02 17:45:19,449 - modelscope - INFO - epoch [1][690/4953]\tlr: 9.628e-05, memory: 8933, loss: 1.7523\n", + "2023-07-02 17:45:26,684 - modelscope - INFO - epoch [1][695/4953]\tlr: 9.622e-05, memory: 8933, loss: 1.0891\n", + "2023-07-02 17:45:32,244 - modelscope - INFO - epoch [1][700/4953]\tlr: 9.616e-05, memory: 8933, loss: 1.9469\n", + "2023-07-02 17:45:37,894 - modelscope - INFO - epoch [1][705/4953]\tlr: 9.610e-05, memory: 8933, loss: 2.0938\n", + "2023-07-02 17:45:43,345 - modelscope - INFO - epoch [1][710/4953]\tlr: 9.604e-05, memory: 8933, loss: 2.7961\n", + "2023-07-02 17:45:49,260 - modelscope - INFO - epoch [1][715/4953]\tlr: 9.598e-05, memory: 8933, loss: 1.4719\n", + "2023-07-02 17:45:56,740 - modelscope - INFO - epoch [1][720/4953]\tlr: 9.592e-05, memory: 8992, loss: 2.2742\n", + "2023-07-02 17:46:00,368 - modelscope - INFO - epoch [1][725/4953]\tlr: 9.585e-05, memory: 8992, loss: 2.5391\n", + "2023-07-02 17:46:06,793 - modelscope - INFO - epoch [1][730/4953]\tlr: 9.579e-05, memory: 8992, loss: 1.0074\n", + "2023-07-02 17:46:13,010 - modelscope - INFO - epoch [1][735/4953]\tlr: 9.573e-05, memory: 8992, loss: 1.9289\n", + "2023-07-02 17:46:19,044 - modelscope - INFO - epoch [1][740/4953]\tlr: 9.567e-05, memory: 8992, loss: 1.7352\n", + "2023-07-02 17:46:26,858 - modelscope - INFO - epoch [1][745/4953]\tlr: 9.560e-05, memory: 8992, loss: 1.6711\n", + "2023-07-02 17:46:32,975 - modelscope - INFO - epoch [1][750/4953]\tlr: 9.554e-05, memory: 8992, loss: 2.0008\n", + "2023-07-02 17:46:41,458 - modelscope - INFO - epoch [1][755/4953]\tlr: 9.547e-05, memory: 8992, loss: 1.4602\n", + "2023-07-02 17:46:45,793 - modelscope - INFO - epoch [1][760/4953]\tlr: 9.541e-05, memory: 8992, loss: 3.6859\n", + "2023-07-02 17:46:50,447 - modelscope - INFO - epoch [1][765/4953]\tlr: 9.534e-05, memory: 8992, loss: 2.0977\n", + "2023-07-02 17:46:56,543 - modelscope - INFO - epoch [1][770/4953]\tlr: 9.528e-05, memory: 8992, loss: 1.6078\n", + "2023-07-02 17:47:02,551 - modelscope - INFO - epoch [1][775/4953]\tlr: 9.521e-05, memory: 8992, loss: 2.8766\n", + "2023-07-02 17:47:09,599 - modelscope - INFO - epoch [1][780/4953]\tlr: 9.514e-05, memory: 8992, loss: 2.9023\n", + "2023-07-02 17:47:15,456 - modelscope - INFO - epoch [1][785/4953]\tlr: 9.508e-05, memory: 8992, loss: 1.2570\n", + "2023-07-02 17:47:22,689 - modelscope - INFO - epoch [1][790/4953]\tlr: 9.501e-05, memory: 8992, loss: 1.7406\n", + "2023-07-02 17:47:28,263 - modelscope - INFO - epoch [1][795/4953]\tlr: 9.494e-05, memory: 8992, loss: 1.9820\n", + "2023-07-02 17:47:34,260 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:16<00:00, 2.04it/s]\n", + "2023-07-02 17:49:50,358 - modelscope - INFO - Saving checkpoint at 800 iter\n", + "2023-07-02 17:49:50,399 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter600_acc0.7198567390441895\n", + "2023-07-02 17:49:50,403 - modelscope - INFO - Saving checkpoint at 800 iter\n", + "2023-07-02 17:49:50,442 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_600\n", + "2023-07-02 17:49:50,447 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8992, evaluation/acc: 0.7412, evaluation/loss: 1.8238, loss: 1.3484\n", + "2023-07-02 17:49:56,027 - modelscope - INFO - epoch [1][805/4953]\tlr: 9.481e-05, memory: 8992, loss: 1.9234\n", + "2023-07-02 17:50:02,709 - modelscope - INFO - epoch [1][810/4953]\tlr: 9.474e-05, memory: 8992, loss: 1.3625\n", + "2023-07-02 17:50:05,927 - modelscope - INFO - epoch [1][815/4953]\tlr: 9.467e-05, memory: 8992, loss: 3.0219\n", + "2023-07-02 17:50:11,744 - modelscope - INFO - epoch [1][820/4953]\tlr: 9.460e-05, memory: 8992, loss: 1.4125\n", + "2023-07-02 17:50:17,173 - modelscope - INFO - epoch [1][825/4953]\tlr: 9.453e-05, memory: 8992, loss: 2.7422\n", + "2023-07-02 17:50:20,860 - modelscope - INFO - epoch [1][830/4953]\tlr: 9.446e-05, memory: 8992, loss: 2.2609\n", + "2023-07-02 17:50:26,716 - modelscope - INFO - epoch [1][835/4953]\tlr: 9.439e-05, memory: 8992, loss: 2.0391\n", + "2023-07-02 17:50:33,433 - modelscope - INFO - epoch [1][840/4953]\tlr: 9.431e-05, memory: 8992, loss: 1.2227\n", + "2023-07-02 17:50:38,310 - modelscope - INFO - epoch [1][845/4953]\tlr: 9.424e-05, memory: 8992, loss: 2.3312\n", + "2023-07-02 17:50:42,956 - modelscope - INFO - epoch [1][850/4953]\tlr: 9.417e-05, memory: 8992, loss: 1.8562\n", + "2023-07-02 17:50:48,973 - modelscope - INFO - epoch [1][855/4953]\tlr: 9.410e-05, memory: 8992, loss: 1.5039\n", + "2023-07-02 17:50:52,835 - modelscope - INFO - epoch [1][860/4953]\tlr: 9.402e-05, memory: 8992, loss: 2.6664\n", + "2023-07-02 17:50:59,665 - modelscope - INFO - epoch [1][865/4953]\tlr: 9.395e-05, memory: 8992, loss: 1.1352\n", + "2023-07-02 17:51:05,311 - modelscope - INFO - epoch [1][870/4953]\tlr: 9.388e-05, memory: 8992, loss: 0.9805\n", + "2023-07-02 17:51:10,329 - modelscope - INFO - epoch [1][875/4953]\tlr: 9.380e-05, memory: 8992, loss: 1.9438\n", + "2023-07-02 17:51:15,416 - modelscope - INFO - epoch [1][880/4953]\tlr: 9.373e-05, memory: 8992, loss: 1.5938\n", + "2023-07-02 17:51:18,285 - modelscope - INFO - epoch [1][885/4953]\tlr: 9.365e-05, memory: 8992, loss: 3.1656\n", + "2023-07-02 17:51:23,293 - modelscope - INFO - epoch [1][890/4953]\tlr: 9.358e-05, memory: 8992, loss: 1.3336\n", + "2023-07-02 17:51:29,054 - modelscope - INFO - epoch [1][895/4953]\tlr: 9.350e-05, memory: 8992, loss: 1.9094\n", + "2023-07-02 17:51:34,572 - modelscope - INFO - epoch [1][900/4953]\tlr: 9.343e-05, memory: 8992, loss: 2.2406\n", + "2023-07-02 17:51:40,191 - modelscope - INFO - epoch [1][905/4953]\tlr: 9.335e-05, memory: 8992, loss: 1.1078\n", + "2023-07-02 17:51:49,310 - modelscope - INFO - epoch [1][910/4953]\tlr: 9.327e-05, memory: 8992, loss: 1.4352\n", + "2023-07-02 17:51:53,688 - modelscope - INFO - epoch [1][915/4953]\tlr: 9.320e-05, memory: 8992, loss: 2.3406\n", + "2023-07-02 17:51:58,710 - modelscope - INFO - epoch [1][920/4953]\tlr: 9.312e-05, memory: 8992, loss: 1.6012\n", + "2023-07-02 17:52:04,686 - modelscope - INFO - epoch [1][925/4953]\tlr: 9.304e-05, memory: 8992, loss: 1.7086\n", + "2023-07-02 17:52:12,123 - modelscope - INFO - epoch [1][930/4953]\tlr: 9.296e-05, memory: 8992, loss: 1.3492\n", + "2023-07-02 17:52:15,935 - modelscope - INFO - epoch [1][935/4953]\tlr: 9.288e-05, memory: 8992, loss: 1.4781\n", + "2023-07-02 17:52:20,994 - modelscope - INFO - epoch [1][940/4953]\tlr: 9.280e-05, memory: 8992, loss: 2.1047\n", + "2023-07-02 17:52:28,615 - modelscope - INFO - epoch [1][945/4953]\tlr: 9.272e-05, memory: 8992, loss: 1.2547\n", + "2023-07-02 17:52:34,278 - modelscope - INFO - epoch [1][950/4953]\tlr: 9.264e-05, memory: 8992, loss: 1.7332\n", + "2023-07-02 17:52:40,908 - modelscope - INFO - epoch [1][955/4953]\tlr: 9.256e-05, memory: 8992, loss: 1.2336\n", + "2023-07-02 17:52:45,957 - modelscope - INFO - epoch [1][960/4953]\tlr: 9.248e-05, memory: 8992, loss: 1.3078\n", + "2023-07-02 17:52:51,185 - modelscope - INFO - epoch [1][965/4953]\tlr: 9.240e-05, memory: 8992, loss: 2.4461\n", + "2023-07-02 17:52:56,088 - modelscope - INFO - epoch [1][970/4953]\tlr: 9.232e-05, memory: 8992, loss: 2.0934\n", + "2023-07-02 17:53:00,822 - modelscope - INFO - epoch [1][975/4953]\tlr: 9.224e-05, memory: 8992, loss: 1.5676\n", + "2023-07-02 17:53:04,695 - modelscope - INFO - epoch [1][980/4953]\tlr: 9.216e-05, memory: 8992, loss: 2.7031\n", + "2023-07-02 17:53:09,760 - modelscope - INFO - epoch [1][985/4953]\tlr: 9.207e-05, memory: 8992, loss: 1.9406\n", + "2023-07-02 17:53:14,950 - modelscope - INFO - epoch [1][990/4953]\tlr: 9.199e-05, memory: 8992, loss: 1.9484\n", + "2023-07-02 17:53:20,534 - modelscope - INFO - epoch [1][995/4953]\tlr: 9.191e-05, memory: 8992, loss: 3.2953\n", + "2023-07-02 17:53:25,342 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:16<00:00, 2.04it/s]\n", + "2023-07-02 17:55:41,348 - modelscope - INFO - Saving checkpoint at 1000 iter\n", + "2023-07-02 17:55:41,389 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter800_acc0.7412243485450745\n", + "2023-07-02 17:55:41,393 - modelscope - INFO - Saving checkpoint at 1000 iter\n", + "2023-07-02 17:55:41,431 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_800\n", + "2023-07-02 17:55:41,435 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8992, evaluation/acc: 0.7551, evaluation/loss: 1.6418, loss: 2.1023\n", + "2023-07-02 17:55:48,321 - modelscope - INFO - epoch [1][1005/4953]\tlr: 9.174e-05, memory: 8992, loss: 0.9020\n", + "2023-07-02 17:55:52,978 - modelscope - INFO - epoch [1][1010/4953]\tlr: 9.166e-05, memory: 8992, loss: 2.8094\n", + "2023-07-02 17:55:59,951 - modelscope - INFO - epoch [1][1015/4953]\tlr: 9.157e-05, memory: 8992, loss: 1.5145\n", + "2023-07-02 17:56:06,752 - modelscope - INFO - epoch [1][1020/4953]\tlr: 9.149e-05, memory: 8992, loss: 1.2547\n", + "2023-07-02 17:56:13,123 - modelscope - INFO - epoch [1][1025/4953]\tlr: 9.140e-05, memory: 8992, loss: 1.5836\n", + "2023-07-02 17:56:18,535 - modelscope - INFO - epoch [1][1030/4953]\tlr: 9.132e-05, memory: 8992, loss: 1.5500\n", + "2023-07-02 17:56:23,898 - modelscope - INFO - epoch [1][1035/4953]\tlr: 9.123e-05, memory: 8992, loss: 1.1477\n", + "2023-07-02 17:56:29,262 - modelscope - INFO - epoch [1][1040/4953]\tlr: 9.114e-05, memory: 8992, loss: 1.8488\n", + "2023-07-02 17:56:36,281 - modelscope - INFO - epoch [1][1045/4953]\tlr: 9.106e-05, memory: 8992, loss: 1.7969\n", + "2023-07-02 17:56:42,786 - modelscope - INFO - epoch [1][1050/4953]\tlr: 9.097e-05, memory: 8992, loss: 1.0703\n", + "2023-07-02 17:56:48,367 - modelscope - INFO - epoch [1][1055/4953]\tlr: 9.088e-05, memory: 8992, loss: 1.5227\n", + "2023-07-02 17:56:53,185 - modelscope - INFO - epoch [1][1060/4953]\tlr: 9.079e-05, memory: 8992, loss: 2.5859\n", + "2023-07-02 17:56:59,040 - modelscope - INFO - epoch [1][1065/4953]\tlr: 9.070e-05, memory: 8992, loss: 1.4641\n", + "2023-07-02 17:57:05,006 - modelscope - INFO - epoch [1][1070/4953]\tlr: 9.062e-05, memory: 8992, loss: 0.9602\n", + "2023-07-02 17:57:08,833 - modelscope - INFO - epoch [1][1075/4953]\tlr: 9.053e-05, memory: 8992, loss: 2.7281\n", + "2023-07-02 17:57:15,081 - modelscope - INFO - epoch [1][1080/4953]\tlr: 9.044e-05, memory: 8992, loss: 0.8438\n", + "2023-07-02 17:57:19,054 - modelscope - INFO - epoch [1][1085/4953]\tlr: 9.035e-05, memory: 8992, loss: 2.0336\n", + "2023-07-02 17:57:27,789 - modelscope - INFO - epoch [1][1090/4953]\tlr: 9.026e-05, memory: 8992, loss: 1.0059\n", + "2023-07-02 17:57:32,658 - modelscope - INFO - epoch [1][1095/4953]\tlr: 9.017e-05, memory: 8992, loss: 1.4187\n", + "2023-07-02 17:57:37,809 - modelscope - INFO - epoch [1][1100/4953]\tlr: 9.008e-05, memory: 8992, loss: 1.8813\n", + "2023-07-02 17:57:44,029 - modelscope - INFO - epoch [1][1105/4953]\tlr: 8.999e-05, memory: 8992, loss: 1.2219\n", + "2023-07-02 17:57:49,772 - modelscope - INFO - epoch [1][1110/4953]\tlr: 8.989e-05, memory: 8992, loss: 1.0527\n", + "2023-07-02 17:57:53,867 - modelscope - INFO - epoch [1][1115/4953]\tlr: 8.980e-05, memory: 8992, loss: 1.7289\n", + "2023-07-02 17:57:59,243 - modelscope - INFO - epoch [1][1120/4953]\tlr: 8.971e-05, memory: 8992, loss: 2.4305\n", + "2023-07-02 17:58:08,887 - modelscope - INFO - epoch [1][1125/4953]\tlr: 8.962e-05, memory: 8992, loss: 0.7469\n", + "2023-07-02 17:58:16,138 - modelscope - INFO - epoch [1][1130/4953]\tlr: 8.952e-05, memory: 8992, loss: 1.7727\n", + "2023-07-02 17:58:23,930 - modelscope - INFO - epoch [1][1135/4953]\tlr: 8.943e-05, memory: 8992, loss: 2.0129\n", + "2023-07-02 17:58:30,185 - modelscope - INFO - epoch [1][1140/4953]\tlr: 8.934e-05, memory: 8992, loss: 2.9025\n", + "2023-07-02 17:58:36,114 - modelscope - INFO - epoch [1][1145/4953]\tlr: 8.924e-05, memory: 8992, loss: 1.8898\n", + "2023-07-02 17:58:42,583 - modelscope - INFO - epoch [1][1150/4953]\tlr: 8.915e-05, memory: 8992, loss: 1.6789\n", + "2023-07-02 17:58:47,491 - modelscope - INFO - epoch [1][1155/4953]\tlr: 8.905e-05, memory: 8992, loss: 1.5578\n", + "2023-07-02 17:58:51,182 - modelscope - INFO - epoch [1][1160/4953]\tlr: 8.896e-05, memory: 8992, loss: 2.6266\n", + "2023-07-02 17:58:56,692 - modelscope - INFO - epoch [1][1165/4953]\tlr: 8.886e-05, memory: 8992, loss: 1.8508\n", + "2023-07-02 17:59:01,780 - modelscope - INFO - epoch [1][1170/4953]\tlr: 8.877e-05, memory: 8992, loss: 1.7000\n", + "2023-07-02 17:59:05,790 - modelscope - INFO - epoch [1][1175/4953]\tlr: 8.867e-05, memory: 8992, loss: 2.2281\n", + "2023-07-02 17:59:10,420 - modelscope - INFO - epoch [1][1180/4953]\tlr: 8.858e-05, memory: 8992, loss: 2.2180\n", + "2023-07-02 17:59:15,762 - modelscope - INFO - epoch [1][1185/4953]\tlr: 8.848e-05, memory: 8992, loss: 1.2668\n", + "2023-07-02 17:59:20,930 - modelscope - INFO - epoch [1][1190/4953]\tlr: 8.838e-05, memory: 8992, loss: 1.8664\n", + "2023-07-02 17:59:27,122 - modelscope - INFO - epoch [1][1195/4953]\tlr: 8.828e-05, memory: 8992, loss: 2.4109\n", + "2023-07-02 17:59:32,910 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:01:48,692 - modelscope - INFO - Saving checkpoint at 1200 iter\n", + "2023-07-02 18:01:48,732 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter1000_acc0.7551158666610718\n", + "2023-07-02 18:01:48,736 - modelscope - INFO - Saving checkpoint at 1200 iter\n", + "2023-07-02 18:01:48,775 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_1000\n", + "2023-07-02 18:01:48,780 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8992, evaluation/acc: 0.7694, evaluation/loss: 1.5234, loss: 1.7117\n", + "2023-07-02 18:01:56,354 - modelscope - INFO - epoch [1][1205/4953]\tlr: 8.809e-05, memory: 8992, loss: 1.2402\n", + "2023-07-02 18:02:00,660 - modelscope - INFO - epoch [1][1210/4953]\tlr: 8.799e-05, memory: 8992, loss: 1.9062\n", + "2023-07-02 18:02:04,421 - modelscope - INFO - epoch [1][1215/4953]\tlr: 8.789e-05, memory: 8992, loss: 1.4750\n", + "2023-07-02 18:02:10,614 - modelscope - INFO - epoch [1][1220/4953]\tlr: 8.779e-05, memory: 8992, loss: 1.0879\n", + "2023-07-02 18:02:16,579 - modelscope - INFO - epoch [1][1225/4953]\tlr: 8.769e-05, memory: 8992, loss: 1.9461\n", + "2023-07-02 18:02:23,602 - modelscope - INFO - epoch [1][1230/4953]\tlr: 8.759e-05, memory: 8992, loss: 2.3242\n", + "2023-07-02 18:02:31,155 - modelscope - INFO - epoch [1][1235/4953]\tlr: 8.749e-05, memory: 8992, loss: 1.9867\n", + "2023-07-02 18:02:36,373 - modelscope - INFO - epoch [1][1240/4953]\tlr: 8.739e-05, memory: 8992, loss: 2.1641\n", + "2023-07-02 18:02:41,792 - modelscope - INFO - epoch [1][1245/4953]\tlr: 8.729e-05, memory: 8992, loss: 1.9109\n", + "2023-07-02 18:02:49,746 - modelscope - INFO - epoch [1][1250/4953]\tlr: 8.719e-05, memory: 8992, loss: 0.7258\n", + "2023-07-02 18:02:54,809 - modelscope - INFO - epoch [1][1255/4953]\tlr: 8.709e-05, memory: 8992, loss: 1.7203\n", + "2023-07-02 18:03:02,266 - modelscope - INFO - epoch [1][1260/4953]\tlr: 8.699e-05, memory: 8992, loss: 1.3533\n", + "2023-07-02 18:03:10,570 - modelscope - INFO - epoch [1][1265/4953]\tlr: 8.689e-05, memory: 8992, loss: 1.6199\n", + "2023-07-02 18:03:17,332 - modelscope - INFO - epoch [1][1270/4953]\tlr: 8.679e-05, memory: 8992, loss: 1.4033\n", + "2023-07-02 18:03:24,075 - modelscope - INFO - epoch [1][1275/4953]\tlr: 8.668e-05, memory: 8992, loss: 1.3773\n", + "2023-07-02 18:03:31,046 - modelscope - INFO - epoch [1][1280/4953]\tlr: 8.658e-05, memory: 8992, loss: 1.3973\n", + "2023-07-02 18:03:37,326 - modelscope - INFO - epoch [1][1285/4953]\tlr: 8.648e-05, memory: 8992, loss: 1.6422\n", + "2023-07-02 18:03:42,789 - modelscope - INFO - epoch [1][1290/4953]\tlr: 8.637e-05, memory: 8992, loss: 1.8156\n", + "2023-07-02 18:03:49,191 - modelscope - INFO - epoch [1][1295/4953]\tlr: 8.627e-05, memory: 8992, loss: 0.8660\n", + "2023-07-02 18:03:57,916 - modelscope - INFO - epoch [1][1300/4953]\tlr: 8.617e-05, memory: 8992, loss: 1.4477\n", + "2023-07-02 18:04:04,809 - modelscope - INFO - epoch [1][1305/4953]\tlr: 8.606e-05, memory: 8992, loss: 0.7375\n", + "2023-07-02 18:04:12,169 - modelscope - INFO - epoch [1][1310/4953]\tlr: 8.596e-05, memory: 8992, loss: 0.4646\n", + "2023-07-02 18:04:17,928 - modelscope - INFO - epoch [1][1315/4953]\tlr: 8.585e-05, memory: 8992, loss: 1.6566\n", + "2023-07-02 18:04:26,868 - modelscope - INFO - epoch [1][1320/4953]\tlr: 8.575e-05, memory: 8992, loss: 1.0375\n", + "2023-07-02 18:04:32,785 - modelscope - INFO - epoch [1][1325/4953]\tlr: 8.564e-05, memory: 8992, loss: 1.1785\n", + "2023-07-02 18:04:36,876 - modelscope - INFO - epoch [1][1330/4953]\tlr: 8.553e-05, memory: 8992, loss: 2.0953\n", + "2023-07-02 18:04:43,149 - modelscope - INFO - epoch [1][1335/4953]\tlr: 8.543e-05, memory: 8992, loss: 1.4941\n", + "2023-07-02 18:04:48,128 - modelscope - INFO - epoch [1][1340/4953]\tlr: 8.532e-05, memory: 8992, loss: 2.3219\n", + "2023-07-02 18:04:54,519 - modelscope - INFO - epoch [1][1345/4953]\tlr: 8.521e-05, memory: 8992, loss: 1.7479\n", + "2023-07-02 18:05:00,734 - modelscope - INFO - epoch [1][1350/4953]\tlr: 8.511e-05, memory: 8992, loss: 2.5168\n", + "2023-07-02 18:05:07,571 - modelscope - INFO - epoch [1][1355/4953]\tlr: 8.500e-05, memory: 8992, loss: 1.5414\n", + "2023-07-02 18:05:13,130 - modelscope - INFO - epoch [1][1360/4953]\tlr: 8.489e-05, memory: 8992, loss: 1.8086\n", + "2023-07-02 18:05:22,837 - modelscope - INFO - epoch [1][1365/4953]\tlr: 8.478e-05, memory: 8992, loss: 1.1250\n", + "2023-07-02 18:05:28,381 - modelscope - INFO - epoch [1][1370/4953]\tlr: 8.468e-05, memory: 8992, loss: 1.2740\n", + "2023-07-02 18:05:34,762 - modelscope - INFO - epoch [1][1375/4953]\tlr: 8.457e-05, memory: 8992, loss: 1.6906\n", + "2023-07-02 18:05:40,998 - modelscope - INFO - epoch [1][1380/4953]\tlr: 8.446e-05, memory: 8992, loss: 2.1523\n", + "2023-07-02 18:05:48,330 - modelscope - INFO - epoch [1][1385/4953]\tlr: 8.435e-05, memory: 8992, loss: 0.6824\n", + "2023-07-02 18:05:52,136 - modelscope - INFO - epoch [1][1390/4953]\tlr: 8.424e-05, memory: 8992, loss: 1.8422\n", + "2023-07-02 18:05:58,132 - modelscope - INFO - epoch [1][1395/4953]\tlr: 8.413e-05, memory: 8992, loss: 0.8705\n", + "2023-07-02 18:06:04,317 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:08:20,133 - modelscope - INFO - Saving checkpoint at 1400 iter\n", + "2023-07-02 18:08:20,173 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter1200_acc0.7693551182746887\n", + "2023-07-02 18:08:20,177 - modelscope - INFO - Saving checkpoint at 1400 iter\n", + "2023-07-02 18:08:20,216 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_1200\n", + "2023-07-02 18:08:20,220 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 8992, evaluation/acc: 0.7789, evaluation/loss: 1.4656, loss: 1.8477\n", + "2023-07-02 18:08:25,847 - modelscope - INFO - epoch [1][1405/4953]\tlr: 8.391e-05, memory: 8992, loss: 1.5250\n", + "2023-07-02 18:08:32,815 - modelscope - INFO - epoch [1][1410/4953]\tlr: 8.380e-05, memory: 8992, loss: 1.2430\n", + "2023-07-02 18:08:38,362 - modelscope - INFO - epoch [1][1415/4953]\tlr: 8.369e-05, memory: 8992, loss: 1.4227\n", + "2023-07-02 18:08:43,312 - modelscope - INFO - epoch [1][1420/4953]\tlr: 8.358e-05, memory: 8992, loss: 1.3088\n", + "2023-07-02 18:08:50,596 - modelscope - INFO - epoch [1][1425/4953]\tlr: 8.346e-05, memory: 8992, loss: 1.0277\n", + "2023-07-02 18:08:55,317 - modelscope - INFO - epoch [1][1430/4953]\tlr: 8.335e-05, memory: 8992, loss: 2.0480\n", + "2023-07-02 18:08:58,994 - modelscope - INFO - epoch [1][1435/4953]\tlr: 8.324e-05, memory: 8992, loss: 3.0969\n", + "2023-07-02 18:09:04,894 - modelscope - INFO - epoch [1][1440/4953]\tlr: 8.313e-05, memory: 8992, loss: 0.7141\n", + "2023-07-02 18:09:10,621 - modelscope - INFO - epoch [1][1445/4953]\tlr: 8.301e-05, memory: 8992, loss: 1.7031\n", + "2023-07-02 18:09:15,960 - modelscope - INFO - epoch [1][1450/4953]\tlr: 8.290e-05, memory: 8992, loss: 1.5277\n", + "2023-07-02 18:09:21,781 - modelscope - INFO - epoch [1][1455/4953]\tlr: 8.279e-05, memory: 8992, loss: 1.7842\n", + "2023-07-02 18:09:29,051 - modelscope - INFO - epoch [1][1460/4953]\tlr: 8.267e-05, memory: 8992, loss: 2.1768\n", + "2023-07-02 18:09:33,405 - modelscope - INFO - epoch [1][1465/4953]\tlr: 8.256e-05, memory: 8992, loss: 1.9969\n", + "2023-07-02 18:09:38,454 - modelscope - INFO - epoch [1][1470/4953]\tlr: 8.245e-05, memory: 8992, loss: 1.6043\n", + "2023-07-02 18:09:44,266 - modelscope - INFO - epoch [1][1475/4953]\tlr: 8.233e-05, memory: 8992, loss: 0.7842\n", + "2023-07-02 18:09:49,575 - modelscope - INFO - epoch [1][1480/4953]\tlr: 8.222e-05, memory: 8992, loss: 1.6766\n", + "2023-07-02 18:09:56,773 - modelscope - INFO - epoch [1][1485/4953]\tlr: 8.210e-05, memory: 8992, loss: 1.1123\n", + "2023-07-02 18:10:05,054 - modelscope - INFO - epoch [1][1490/4953]\tlr: 8.199e-05, memory: 9058, loss: 1.3289\n", + "2023-07-02 18:10:10,678 - modelscope - INFO - epoch [1][1495/4953]\tlr: 8.187e-05, memory: 9058, loss: 1.6414\n", + "2023-07-02 18:10:16,694 - modelscope - INFO - epoch [1][1500/4953]\tlr: 8.176e-05, memory: 9058, loss: 0.8203\n", + "2023-07-02 18:10:24,675 - modelscope - INFO - epoch [1][1505/4953]\tlr: 8.164e-05, memory: 9058, loss: 0.8189\n", + "2023-07-02 18:10:30,053 - modelscope - INFO - epoch [1][1510/4953]\tlr: 8.152e-05, memory: 9058, loss: 1.1646\n", + "2023-07-02 18:10:36,537 - modelscope - INFO - epoch [1][1515/4953]\tlr: 8.141e-05, memory: 9058, loss: 1.1387\n", + "2023-07-02 18:10:42,304 - modelscope - INFO - epoch [1][1520/4953]\tlr: 8.129e-05, memory: 9058, loss: 1.4477\n", + "2023-07-02 18:10:46,424 - modelscope - INFO - epoch [1][1525/4953]\tlr: 8.117e-05, memory: 9058, loss: 3.0531\n", + "2023-07-02 18:10:51,264 - modelscope - INFO - epoch [1][1530/4953]\tlr: 8.106e-05, memory: 9058, loss: 2.3023\n", + "2023-07-02 18:10:59,103 - modelscope - INFO - epoch [1][1535/4953]\tlr: 8.094e-05, memory: 9058, loss: 0.6086\n", + "2023-07-02 18:11:04,295 - modelscope - INFO - epoch [1][1540/4953]\tlr: 8.082e-05, memory: 9058, loss: 1.3912\n", + "2023-07-02 18:11:09,436 - modelscope - INFO - epoch [1][1545/4953]\tlr: 8.070e-05, memory: 9058, loss: 2.1668\n", + "2023-07-02 18:11:16,921 - modelscope - INFO - epoch [1][1550/4953]\tlr: 8.058e-05, memory: 9058, loss: 0.4180\n", + "2023-07-02 18:11:22,852 - modelscope - INFO - epoch [1][1555/4953]\tlr: 8.047e-05, memory: 9058, loss: 1.4855\n", + "2023-07-02 18:11:27,748 - modelscope - INFO - epoch [1][1560/4953]\tlr: 8.035e-05, memory: 9058, loss: 2.0650\n", + "2023-07-02 18:11:30,906 - modelscope - INFO - epoch [1][1565/4953]\tlr: 8.023e-05, memory: 9058, loss: 2.8250\n", + "2023-07-02 18:11:38,069 - modelscope - INFO - epoch [1][1570/4953]\tlr: 8.011e-05, memory: 9058, loss: 1.6609\n", + "2023-07-02 18:11:44,626 - modelscope - INFO - epoch [1][1575/4953]\tlr: 7.999e-05, memory: 9058, loss: 1.0016\n", + "2023-07-02 18:11:49,164 - modelscope - INFO - epoch [1][1580/4953]\tlr: 7.987e-05, memory: 9058, loss: 2.2371\n", + "2023-07-02 18:11:53,217 - modelscope - INFO - epoch [1][1585/4953]\tlr: 7.975e-05, memory: 9058, loss: 2.7695\n", + "2023-07-02 18:11:59,930 - modelscope - INFO - epoch [1][1590/4953]\tlr: 7.963e-05, memory: 9058, loss: 2.2398\n", + "2023-07-02 18:12:04,671 - modelscope - INFO - epoch [1][1595/4953]\tlr: 7.951e-05, memory: 9058, loss: 0.7875\n", + "2023-07-02 18:12:10,417 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:14:26,308 - modelscope - INFO - Saving checkpoint at 1600 iter\n", + "2023-07-02 18:14:26,349 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter1400_acc0.7789175510406494\n", + "2023-07-02 18:14:26,353 - modelscope - INFO - Saving checkpoint at 1600 iter\n", + "2023-07-02 18:14:26,392 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_1400\n", + "2023-07-02 18:14:26,396 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9058, evaluation/acc: 0.7892, evaluation/loss: 1.4188, loss: 2.1477\n", + "2023-07-02 18:14:31,893 - modelscope - INFO - epoch [1][1605/4953]\tlr: 7.927e-05, memory: 9058, loss: 0.7930\n", + "2023-07-02 18:14:37,157 - modelscope - INFO - epoch [1][1610/4953]\tlr: 7.914e-05, memory: 9058, loss: 1.6867\n", + "2023-07-02 18:14:41,163 - modelscope - INFO - epoch [1][1615/4953]\tlr: 7.902e-05, memory: 9058, loss: 1.3123\n", + "2023-07-02 18:14:46,222 - modelscope - INFO - epoch [1][1620/4953]\tlr: 7.890e-05, memory: 9058, loss: 1.9320\n", + "2023-07-02 18:14:50,200 - modelscope - INFO - epoch [1][1625/4953]\tlr: 7.878e-05, memory: 9058, loss: 2.3531\n", + "2023-07-02 18:14:55,640 - modelscope - INFO - epoch [1][1630/4953]\tlr: 7.866e-05, memory: 9058, loss: 2.1230\n", + "2023-07-02 18:15:00,591 - modelscope - INFO - epoch [1][1635/4953]\tlr: 7.853e-05, memory: 9058, loss: 1.2672\n", + "2023-07-02 18:15:06,311 - modelscope - INFO - epoch [1][1640/4953]\tlr: 7.841e-05, memory: 9058, loss: 1.8948\n", + "2023-07-02 18:15:12,067 - modelscope - INFO - epoch [1][1645/4953]\tlr: 7.829e-05, memory: 9058, loss: 1.9506\n", + "2023-07-02 18:15:18,834 - modelscope - INFO - epoch [1][1650/4953]\tlr: 7.817e-05, memory: 9058, loss: 0.8719\n", + "2023-07-02 18:15:24,490 - modelscope - INFO - epoch [1][1655/4953]\tlr: 7.804e-05, memory: 9058, loss: 0.7850\n", + "2023-07-02 18:15:30,533 - modelscope - INFO - epoch [1][1660/4953]\tlr: 7.792e-05, memory: 9058, loss: 1.0324\n", + "2023-07-02 18:15:39,715 - modelscope - INFO - epoch [1][1665/4953]\tlr: 7.779e-05, memory: 9058, loss: 0.8568\n", + "2023-07-02 18:15:46,536 - modelscope - INFO - epoch [1][1670/4953]\tlr: 7.767e-05, memory: 9058, loss: 1.5828\n", + "2023-07-02 18:15:50,976 - modelscope - INFO - epoch [1][1675/4953]\tlr: 7.755e-05, memory: 9058, loss: 1.5391\n", + "2023-07-02 18:15:56,272 - modelscope - INFO - epoch [1][1680/4953]\tlr: 7.742e-05, memory: 9058, loss: 1.6117\n", + "2023-07-02 18:16:04,187 - modelscope - INFO - epoch [1][1685/4953]\tlr: 7.730e-05, memory: 9058, loss: 0.4076\n", + "2023-07-02 18:16:08,882 - modelscope - INFO - epoch [1][1690/4953]\tlr: 7.717e-05, memory: 9058, loss: 1.3816\n", + "2023-07-02 18:16:16,150 - modelscope - INFO - epoch [1][1695/4953]\tlr: 7.705e-05, memory: 9058, loss: 1.9426\n", + "2023-07-02 18:16:20,599 - modelscope - INFO - epoch [1][1700/4953]\tlr: 7.692e-05, memory: 9058, loss: 2.4797\n", + "2023-07-02 18:16:26,001 - modelscope - INFO - epoch [1][1705/4953]\tlr: 7.679e-05, memory: 9058, loss: 1.3273\n", + "2023-07-02 18:16:32,374 - modelscope - INFO - epoch [1][1710/4953]\tlr: 7.667e-05, memory: 9058, loss: 0.9286\n", + "2023-07-02 18:16:39,243 - modelscope - INFO - epoch [1][1715/4953]\tlr: 7.654e-05, memory: 9058, loss: 1.3732\n", + "2023-07-02 18:16:44,919 - modelscope - INFO - epoch [1][1720/4953]\tlr: 7.642e-05, memory: 9058, loss: 1.2824\n", + "2023-07-02 18:16:47,647 - modelscope - INFO - epoch [1][1725/4953]\tlr: 7.629e-05, memory: 9058, loss: 2.0891\n", + "2023-07-02 18:16:53,984 - modelscope - INFO - epoch [1][1730/4953]\tlr: 7.616e-05, memory: 9058, loss: 0.5539\n", + "2023-07-02 18:16:58,439 - modelscope - INFO - epoch [1][1735/4953]\tlr: 7.604e-05, memory: 9058, loss: 1.4975\n", + "2023-07-02 18:17:03,726 - modelscope - INFO - epoch [1][1740/4953]\tlr: 7.591e-05, memory: 9058, loss: 1.6102\n", + "2023-07-02 18:17:08,657 - modelscope - INFO - epoch [1][1745/4953]\tlr: 7.578e-05, memory: 9058, loss: 1.6957\n", + "2023-07-02 18:17:13,371 - modelscope - INFO - epoch [1][1750/4953]\tlr: 7.565e-05, memory: 9058, loss: 1.5684\n", + "2023-07-02 18:17:17,513 - modelscope - INFO - epoch [1][1755/4953]\tlr: 7.553e-05, memory: 9058, loss: 2.9000\n", + "2023-07-02 18:17:24,347 - modelscope - INFO - epoch [1][1760/4953]\tlr: 7.540e-05, memory: 9058, loss: 1.5227\n", + "2023-07-02 18:17:28,183 - modelscope - INFO - epoch [1][1765/4953]\tlr: 7.527e-05, memory: 9058, loss: 2.3375\n", + "2023-07-02 18:17:35,427 - modelscope - INFO - epoch [1][1770/4953]\tlr: 7.514e-05, memory: 9058, loss: 1.0623\n", + "2023-07-02 18:17:39,708 - modelscope - INFO - epoch [1][1775/4953]\tlr: 7.501e-05, memory: 9058, loss: 1.5977\n", + "2023-07-02 18:17:45,757 - modelscope - INFO - epoch [1][1780/4953]\tlr: 7.488e-05, memory: 9058, loss: 1.0781\n", + "2023-07-02 18:17:49,525 - modelscope - INFO - epoch [1][1785/4953]\tlr: 7.475e-05, memory: 9058, loss: 1.6547\n", + "2023-07-02 18:17:55,072 - modelscope - INFO - epoch [1][1790/4953]\tlr: 7.463e-05, memory: 9058, loss: 1.4458\n", + "2023-07-02 18:18:01,439 - modelscope - INFO - epoch [1][1795/4953]\tlr: 7.450e-05, memory: 9058, loss: 1.0096\n", + "2023-07-02 18:18:06,478 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:20:22,335 - modelscope - INFO - Saving checkpoint at 1800 iter\n", + "2023-07-02 18:20:22,375 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter1600_acc0.7891753911972046\n", + "2023-07-02 18:20:22,379 - modelscope - INFO - Saving checkpoint at 1800 iter\n", + "2023-07-02 18:20:22,417 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_1600\n", + "2023-07-02 18:20:22,422 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9058, evaluation/acc: 0.7967, evaluation/loss: 1.3701, loss: 0.9414\n", + "2023-07-02 18:20:28,163 - modelscope - INFO - epoch [1][1805/4953]\tlr: 7.424e-05, memory: 9058, loss: 1.7404\n", + "2023-07-02 18:20:32,265 - modelscope - INFO - epoch [1][1810/4953]\tlr: 7.411e-05, memory: 9058, loss: 1.5176\n", + "2023-07-02 18:20:38,772 - modelscope - INFO - epoch [1][1815/4953]\tlr: 7.398e-05, memory: 9058, loss: 0.9519\n", + "2023-07-02 18:20:44,819 - modelscope - INFO - epoch [1][1820/4953]\tlr: 7.385e-05, memory: 9058, loss: 1.2756\n", + "2023-07-02 18:20:50,296 - modelscope - INFO - epoch [1][1825/4953]\tlr: 7.372e-05, memory: 9058, loss: 1.4785\n", + "2023-07-02 18:20:56,799 - modelscope - INFO - epoch [1][1830/4953]\tlr: 7.358e-05, memory: 9058, loss: 1.5188\n", + "2023-07-02 18:21:03,334 - modelscope - INFO - epoch [1][1835/4953]\tlr: 7.345e-05, memory: 9058, loss: 0.6644\n", + "2023-07-02 18:21:10,067 - modelscope - INFO - epoch [1][1840/4953]\tlr: 7.332e-05, memory: 9058, loss: 0.9434\n", + "2023-07-02 18:21:16,554 - modelscope - INFO - epoch [1][1845/4953]\tlr: 7.319e-05, memory: 9058, loss: 0.7092\n", + "2023-07-02 18:21:23,374 - modelscope - INFO - epoch [1][1850/4953]\tlr: 7.306e-05, memory: 9058, loss: 1.1020\n", + "2023-07-02 18:21:32,187 - modelscope - INFO - epoch [1][1855/4953]\tlr: 7.293e-05, memory: 9058, loss: 1.1508\n", + "2023-07-02 18:21:37,254 - modelscope - INFO - epoch [1][1860/4953]\tlr: 7.280e-05, memory: 9058, loss: 1.6852\n", + "2023-07-02 18:21:42,410 - modelscope - INFO - epoch [1][1865/4953]\tlr: 7.266e-05, memory: 9058, loss: 0.9865\n", + "2023-07-02 18:21:47,494 - modelscope - INFO - epoch [1][1870/4953]\tlr: 7.253e-05, memory: 9058, loss: 1.4111\n", + "2023-07-02 18:21:51,877 - modelscope - INFO - epoch [1][1875/4953]\tlr: 7.240e-05, memory: 9058, loss: 1.9342\n", + "2023-07-02 18:21:57,909 - modelscope - INFO - epoch [1][1880/4953]\tlr: 7.227e-05, memory: 9058, loss: 1.5063\n", + "2023-07-02 18:22:03,018 - modelscope - INFO - epoch [1][1885/4953]\tlr: 7.213e-05, memory: 9058, loss: 1.5504\n", + "2023-07-02 18:22:07,481 - modelscope - INFO - epoch [1][1890/4953]\tlr: 7.200e-05, memory: 9058, loss: 1.2473\n", + "2023-07-02 18:22:12,667 - modelscope - INFO - epoch [1][1895/4953]\tlr: 7.187e-05, memory: 9058, loss: 2.0055\n", + "2023-07-02 18:22:17,967 - modelscope - INFO - epoch [1][1900/4953]\tlr: 7.174e-05, memory: 9058, loss: 0.7781\n", + "2023-07-02 18:22:24,563 - modelscope - INFO - epoch [1][1905/4953]\tlr: 7.160e-05, memory: 9058, loss: 1.1995\n", + "2023-07-02 18:22:28,670 - modelscope - INFO - epoch [1][1910/4953]\tlr: 7.147e-05, memory: 9058, loss: 2.4594\n", + "2023-07-02 18:22:35,136 - modelscope - INFO - epoch [1][1915/4953]\tlr: 7.133e-05, memory: 9058, loss: 0.7545\n", + "2023-07-02 18:22:41,042 - modelscope - INFO - epoch [1][1920/4953]\tlr: 7.120e-05, memory: 9058, loss: 1.8008\n", + "2023-07-02 18:22:45,686 - modelscope - INFO - epoch [1][1925/4953]\tlr: 7.107e-05, memory: 9058, loss: 1.4076\n", + "2023-07-02 18:22:50,652 - modelscope - INFO - epoch [1][1930/4953]\tlr: 7.093e-05, memory: 9058, loss: 1.6135\n", + "2023-07-02 18:22:55,346 - modelscope - INFO - epoch [1][1935/4953]\tlr: 7.080e-05, memory: 9058, loss: 1.3820\n", + "2023-07-02 18:23:00,407 - modelscope - INFO - epoch [1][1940/4953]\tlr: 7.066e-05, memory: 9058, loss: 1.3170\n", + "2023-07-02 18:23:07,089 - modelscope - INFO - epoch [1][1945/4953]\tlr: 7.053e-05, memory: 9058, loss: 1.5059\n", + "2023-07-02 18:23:14,519 - modelscope - INFO - epoch [1][1950/4953]\tlr: 7.039e-05, memory: 9058, loss: 1.1481\n", + "2023-07-02 18:23:20,167 - modelscope - INFO - epoch [1][1955/4953]\tlr: 7.026e-05, memory: 9058, loss: 1.5484\n", + "2023-07-02 18:23:26,522 - modelscope - INFO - epoch [1][1960/4953]\tlr: 7.012e-05, memory: 9058, loss: 1.5056\n", + "2023-07-02 18:23:31,990 - modelscope - INFO - epoch [1][1965/4953]\tlr: 6.999e-05, memory: 9058, loss: 0.8258\n", + "2023-07-02 18:23:36,765 - modelscope - INFO - epoch [1][1970/4953]\tlr: 6.985e-05, memory: 9058, loss: 2.1605\n", + "2023-07-02 18:23:44,015 - modelscope - INFO - epoch [1][1975/4953]\tlr: 6.972e-05, memory: 9058, loss: 0.5347\n", + "2023-07-02 18:23:50,763 - modelscope - INFO - epoch [1][1980/4953]\tlr: 6.958e-05, memory: 9058, loss: 0.5833\n", + "2023-07-02 18:23:56,081 - modelscope - INFO - epoch [1][1985/4953]\tlr: 6.945e-05, memory: 9058, loss: 1.3211\n", + "2023-07-02 18:24:02,890 - modelscope - INFO - epoch [1][1990/4953]\tlr: 6.931e-05, memory: 9058, loss: 0.6614\n", + "2023-07-02 18:24:11,102 - modelscope - INFO - epoch [1][1995/4953]\tlr: 6.917e-05, memory: 9058, loss: 1.0019\n", + "2023-07-02 18:24:15,188 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:26:31,178 - modelscope - INFO - Saving checkpoint at 2000 iter\n", + "2023-07-02 18:26:31,219 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter1800_acc0.79673832654953\n", + "2023-07-02 18:26:31,223 - modelscope - INFO - Saving checkpoint at 2000 iter\n", + "2023-07-02 18:26:31,262 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_1800\n", + "2023-07-02 18:26:31,267 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9058, evaluation/acc: 0.8048, evaluation/loss: 1.3532, loss: 2.3406\n", + "2023-07-02 18:26:36,725 - modelscope - INFO - epoch [1][2005/4953]\tlr: 6.890e-05, memory: 9058, loss: 1.7643\n", + "2023-07-02 18:26:43,719 - modelscope - INFO - epoch [1][2010/4953]\tlr: 6.876e-05, memory: 9058, loss: 1.3211\n", + "2023-07-02 18:26:50,532 - modelscope - INFO - epoch [1][2015/4953]\tlr: 6.863e-05, memory: 9058, loss: 1.0998\n", + "2023-07-02 18:26:55,084 - modelscope - INFO - epoch [1][2020/4953]\tlr: 6.849e-05, memory: 9058, loss: 1.0711\n", + "2023-07-02 18:27:01,229 - modelscope - INFO - epoch [1][2025/4953]\tlr: 6.835e-05, memory: 9058, loss: 0.9915\n", + "2023-07-02 18:27:05,887 - modelscope - INFO - epoch [1][2030/4953]\tlr: 6.822e-05, memory: 9058, loss: 1.4650\n", + "2023-07-02 18:27:10,177 - modelscope - INFO - epoch [1][2035/4953]\tlr: 6.808e-05, memory: 9058, loss: 1.7047\n", + "2023-07-02 18:27:16,232 - modelscope - INFO - epoch [1][2040/4953]\tlr: 6.794e-05, memory: 9058, loss: 1.1574\n", + "2023-07-02 18:27:20,822 - modelscope - INFO - epoch [1][2045/4953]\tlr: 6.780e-05, memory: 9058, loss: 2.8094\n", + "2023-07-02 18:27:26,542 - modelscope - INFO - epoch [1][2050/4953]\tlr: 6.767e-05, memory: 9058, loss: 1.8707\n", + "2023-07-02 18:27:33,544 - modelscope - INFO - epoch [1][2055/4953]\tlr: 6.753e-05, memory: 9058, loss: 0.4879\n", + "2023-07-02 18:27:38,872 - modelscope - INFO - epoch [1][2060/4953]\tlr: 6.739e-05, memory: 9058, loss: 1.4332\n", + "2023-07-02 18:27:45,755 - modelscope - INFO - epoch [1][2065/4953]\tlr: 6.725e-05, memory: 9058, loss: 1.3403\n", + "2023-07-02 18:27:52,231 - modelscope - INFO - epoch [1][2070/4953]\tlr: 6.712e-05, memory: 9058, loss: 1.4531\n", + "2023-07-02 18:27:55,367 - modelscope - INFO - epoch [1][2075/4953]\tlr: 6.698e-05, memory: 9058, loss: 2.8781\n", + "2023-07-02 18:28:03,691 - modelscope - INFO - epoch [1][2080/4953]\tlr: 6.684e-05, memory: 9058, loss: 1.1735\n", + "2023-07-02 18:28:12,186 - modelscope - INFO - epoch [1][2085/4953]\tlr: 6.670e-05, memory: 9058, loss: 0.9088\n", + "2023-07-02 18:28:18,486 - modelscope - INFO - epoch [1][2090/4953]\tlr: 6.656e-05, memory: 9058, loss: 0.4293\n", + "2023-07-02 18:28:24,461 - modelscope - INFO - epoch [1][2095/4953]\tlr: 6.642e-05, memory: 9058, loss: 2.8336\n", + "2023-07-02 18:28:31,009 - modelscope - INFO - epoch [1][2100/4953]\tlr: 6.628e-05, memory: 9058, loss: 0.6750\n", + "2023-07-02 18:28:35,682 - modelscope - INFO - epoch [1][2105/4953]\tlr: 6.614e-05, memory: 9058, loss: 1.2004\n", + "2023-07-02 18:28:42,815 - modelscope - INFO - epoch [1][2110/4953]\tlr: 6.601e-05, memory: 9058, loss: 0.7390\n", + "2023-07-02 18:28:48,536 - modelscope - INFO - epoch [1][2115/4953]\tlr: 6.587e-05, memory: 9058, loss: 1.2892\n", + "2023-07-02 18:28:54,885 - modelscope - INFO - epoch [1][2120/4953]\tlr: 6.573e-05, memory: 9058, loss: 1.1596\n", + "2023-07-02 18:29:01,644 - modelscope - INFO - epoch [1][2125/4953]\tlr: 6.559e-05, memory: 9058, loss: 1.2383\n", + "2023-07-02 18:29:06,513 - modelscope - INFO - epoch [1][2130/4953]\tlr: 6.545e-05, memory: 9058, loss: 1.6500\n", + "2023-07-02 18:29:12,125 - modelscope - INFO - epoch [1][2135/4953]\tlr: 6.531e-05, memory: 9058, loss: 1.4234\n", + "2023-07-02 18:29:16,930 - modelscope - INFO - epoch [1][2140/4953]\tlr: 6.517e-05, memory: 9058, loss: 0.9209\n", + "2023-07-02 18:29:23,051 - modelscope - INFO - epoch [1][2145/4953]\tlr: 6.503e-05, memory: 9058, loss: 1.3340\n", + "2023-07-02 18:29:26,259 - modelscope - INFO - epoch [1][2150/4953]\tlr: 6.489e-05, memory: 9058, loss: 2.2531\n", + "2023-07-02 18:29:30,151 - modelscope - INFO - epoch [1][2155/4953]\tlr: 6.475e-05, memory: 9058, loss: 2.4398\n", + "2023-07-02 18:29:35,984 - modelscope - INFO - epoch [1][2160/4953]\tlr: 6.461e-05, memory: 9058, loss: 1.2609\n", + "2023-07-02 18:29:42,072 - modelscope - INFO - epoch [1][2165/4953]\tlr: 6.447e-05, memory: 9058, loss: 1.3589\n", + "2023-07-02 18:29:47,131 - modelscope - INFO - epoch [1][2170/4953]\tlr: 6.433e-05, memory: 9058, loss: 1.9894\n", + "2023-07-02 18:29:52,463 - modelscope - INFO - epoch [1][2175/4953]\tlr: 6.419e-05, memory: 9058, loss: 1.4546\n", + "2023-07-02 18:29:56,467 - modelscope - INFO - epoch [1][2180/4953]\tlr: 6.405e-05, memory: 9058, loss: 2.2633\n", + "2023-07-02 18:30:00,810 - modelscope - INFO - epoch [1][2185/4953]\tlr: 6.391e-05, memory: 9058, loss: 1.4179\n", + "2023-07-02 18:30:04,745 - modelscope - INFO - epoch [1][2190/4953]\tlr: 6.377e-05, memory: 9058, loss: 1.1947\n", + "2023-07-02 18:30:10,179 - modelscope - INFO - epoch [1][2195/4953]\tlr: 6.363e-05, memory: 9058, loss: 1.5030\n", + "2023-07-02 18:30:16,533 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:16<00:00, 2.04it/s]\n", + "2023-07-02 18:32:32,577 - modelscope - INFO - Saving checkpoint at 2200 iter\n", + "2023-07-02 18:32:32,617 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter2000_acc0.8048229217529297\n", + "2023-07-02 18:32:32,621 - modelscope - INFO - Saving checkpoint at 2200 iter\n", + "2023-07-02 18:32:32,661 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_2000\n", + "2023-07-02 18:32:32,665 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9058, evaluation/acc: 0.8064, evaluation/loss: 1.3193, loss: 0.8660\n", + "2023-07-02 18:32:38,756 - modelscope - INFO - epoch [1][2205/4953]\tlr: 6.334e-05, memory: 9058, loss: 1.2521\n", + "2023-07-02 18:32:45,468 - modelscope - INFO - epoch [1][2210/4953]\tlr: 6.320e-05, memory: 9058, loss: 1.0652\n", + "2023-07-02 18:32:51,626 - modelscope - INFO - epoch [1][2215/4953]\tlr: 6.306e-05, memory: 9058, loss: 0.8250\n", + "2023-07-02 18:32:56,742 - modelscope - INFO - epoch [1][2220/4953]\tlr: 6.292e-05, memory: 9058, loss: 1.2680\n", + "2023-07-02 18:33:02,927 - modelscope - INFO - epoch [1][2225/4953]\tlr: 6.278e-05, memory: 9058, loss: 1.5531\n", + "2023-07-02 18:33:08,196 - modelscope - INFO - epoch [1][2230/4953]\tlr: 6.264e-05, memory: 9058, loss: 1.5766\n", + "2023-07-02 18:33:14,926 - modelscope - INFO - epoch [1][2235/4953]\tlr: 6.250e-05, memory: 9058, loss: 1.6031\n", + "2023-07-02 18:33:19,152 - modelscope - INFO - epoch [1][2240/4953]\tlr: 6.236e-05, memory: 9058, loss: 1.8438\n", + "2023-07-02 18:33:26,986 - modelscope - INFO - epoch [1][2245/4953]\tlr: 6.221e-05, memory: 9058, loss: 1.0715\n", + "2023-07-02 18:33:34,062 - modelscope - INFO - epoch [1][2250/4953]\tlr: 6.207e-05, memory: 9058, loss: 1.3094\n", + "2023-07-02 18:33:40,767 - modelscope - INFO - epoch [1][2255/4953]\tlr: 6.193e-05, memory: 9058, loss: 0.5586\n", + "2023-07-02 18:33:45,996 - modelscope - INFO - epoch [1][2260/4953]\tlr: 6.179e-05, memory: 9058, loss: 1.0727\n", + "2023-07-02 18:33:50,926 - modelscope - INFO - epoch [1][2265/4953]\tlr: 6.165e-05, memory: 9058, loss: 0.5758\n", + "2023-07-02 18:33:54,762 - modelscope - INFO - epoch [1][2270/4953]\tlr: 6.151e-05, memory: 9058, loss: 1.1336\n", + "2023-07-02 18:34:00,210 - modelscope - INFO - epoch [1][2275/4953]\tlr: 6.136e-05, memory: 9058, loss: 1.0373\n", + "2023-07-02 18:34:08,272 - modelscope - INFO - epoch [1][2280/4953]\tlr: 6.122e-05, memory: 9058, loss: 0.7815\n", + "2023-07-02 18:34:14,309 - modelscope - INFO - epoch [1][2285/4953]\tlr: 6.108e-05, memory: 9058, loss: 1.4531\n", + "2023-07-02 18:34:21,626 - modelscope - INFO - epoch [1][2290/4953]\tlr: 6.094e-05, memory: 9058, loss: 1.6297\n", + "2023-07-02 18:34:28,588 - modelscope - INFO - epoch [1][2295/4953]\tlr: 6.080e-05, memory: 9082, loss: 1.6783\n", + "2023-07-02 18:34:33,419 - modelscope - INFO - epoch [1][2300/4953]\tlr: 6.065e-05, memory: 9082, loss: 2.0078\n", + "2023-07-02 18:34:38,966 - modelscope - INFO - epoch [1][2305/4953]\tlr: 6.051e-05, memory: 9082, loss: 1.6065\n", + "2023-07-02 18:34:44,320 - modelscope - INFO - epoch [1][2310/4953]\tlr: 6.037e-05, memory: 9082, loss: 1.6664\n", + "2023-07-02 18:34:49,557 - modelscope - INFO - epoch [1][2315/4953]\tlr: 6.023e-05, memory: 9082, loss: 2.1622\n", + "2023-07-02 18:34:54,691 - modelscope - INFO - epoch [1][2320/4953]\tlr: 6.008e-05, memory: 9082, loss: 2.2738\n", + "2023-07-02 18:35:02,067 - modelscope - INFO - epoch [1][2325/4953]\tlr: 5.994e-05, memory: 9082, loss: 0.6338\n", + "2023-07-02 18:35:07,658 - modelscope - INFO - epoch [1][2330/4953]\tlr: 5.980e-05, memory: 9082, loss: 0.9046\n", + "2023-07-02 18:35:13,966 - modelscope - INFO - epoch [1][2335/4953]\tlr: 5.966e-05, memory: 9082, loss: 1.2388\n", + "2023-07-02 18:35:19,741 - modelscope - INFO - epoch [1][2340/4953]\tlr: 5.951e-05, memory: 9082, loss: 0.7371\n", + "2023-07-02 18:35:25,904 - modelscope - INFO - epoch [1][2345/4953]\tlr: 5.937e-05, memory: 9082, loss: 1.4103\n", + "2023-07-02 18:35:31,382 - modelscope - INFO - epoch [1][2350/4953]\tlr: 5.923e-05, memory: 9082, loss: 1.4088\n", + "2023-07-02 18:35:36,193 - modelscope - INFO - epoch [1][2355/4953]\tlr: 5.909e-05, memory: 9082, loss: 2.0184\n", + "2023-07-02 18:35:40,781 - modelscope - INFO - epoch [1][2360/4953]\tlr: 5.894e-05, memory: 9082, loss: 1.1237\n", + "2023-07-02 18:35:45,133 - modelscope - INFO - epoch [1][2365/4953]\tlr: 5.880e-05, memory: 9082, loss: 2.1938\n", + "2023-07-02 18:35:51,029 - modelscope - INFO - epoch [1][2370/4953]\tlr: 5.866e-05, memory: 9082, loss: 0.9563\n", + "2023-07-02 18:35:57,943 - modelscope - INFO - epoch [1][2375/4953]\tlr: 5.852e-05, memory: 9082, loss: 1.3258\n", + "2023-07-02 18:36:05,016 - modelscope - INFO - epoch [1][2380/4953]\tlr: 5.837e-05, memory: 9082, loss: 1.2687\n", + "2023-07-02 18:36:09,977 - modelscope - INFO - epoch [1][2385/4953]\tlr: 5.823e-05, memory: 9082, loss: 1.2655\n", + "2023-07-02 18:36:16,229 - modelscope - INFO - epoch [1][2390/4953]\tlr: 5.809e-05, memory: 9082, loss: 0.9164\n", + "2023-07-02 18:36:21,471 - modelscope - INFO - epoch [1][2395/4953]\tlr: 5.794e-05, memory: 9082, loss: 1.6281\n", + "2023-07-02 18:36:27,959 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:38:43,433 - modelscope - INFO - Saving checkpoint at 2400 iter\n", + "2023-07-02 18:38:43,474 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter2200_acc0.8063529133796692\n", + "2023-07-02 18:38:43,478 - modelscope - INFO - Saving checkpoint at 2400 iter\n", + "2023-07-02 18:38:43,517 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_2200\n", + "2023-07-02 18:38:43,521 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8076, evaluation/loss: 1.3023, loss: 0.6604\n", + "2023-07-02 18:38:48,050 - modelscope - INFO - epoch [1][2405/4953]\tlr: 5.766e-05, memory: 9082, loss: 1.8258\n", + "2023-07-02 18:38:54,650 - modelscope - INFO - epoch [1][2410/4953]\tlr: 5.751e-05, memory: 9082, loss: 1.3132\n", + "2023-07-02 18:38:59,846 - modelscope - INFO - epoch [1][2415/4953]\tlr: 5.737e-05, memory: 9082, loss: 1.6910\n", + "2023-07-02 18:39:07,443 - modelscope - INFO - epoch [1][2420/4953]\tlr: 5.723e-05, memory: 9082, loss: 1.4445\n", + "2023-07-02 18:39:15,603 - modelscope - INFO - epoch [1][2425/4953]\tlr: 5.708e-05, memory: 9082, loss: 0.9867\n", + "2023-07-02 18:39:21,112 - modelscope - INFO - epoch [1][2430/4953]\tlr: 5.694e-05, memory: 9082, loss: 1.5023\n", + "2023-07-02 18:39:26,278 - modelscope - INFO - epoch [1][2435/4953]\tlr: 5.680e-05, memory: 9082, loss: 1.5297\n", + "2023-07-02 18:39:32,189 - modelscope - INFO - epoch [1][2440/4953]\tlr: 5.666e-05, memory: 9082, loss: 1.2663\n", + "2023-07-02 18:39:39,288 - modelscope - INFO - epoch [1][2445/4953]\tlr: 5.651e-05, memory: 9082, loss: 1.1214\n", + "2023-07-02 18:39:45,604 - modelscope - INFO - epoch [1][2450/4953]\tlr: 5.637e-05, memory: 9082, loss: 0.7744\n", + "2023-07-02 18:39:50,026 - modelscope - INFO - epoch [1][2455/4953]\tlr: 5.623e-05, memory: 9082, loss: 1.3865\n", + "2023-07-02 18:39:57,039 - modelscope - INFO - epoch [1][2460/4953]\tlr: 5.608e-05, memory: 9082, loss: 0.5821\n", + "2023-07-02 18:40:04,905 - modelscope - INFO - epoch [1][2465/4953]\tlr: 5.594e-05, memory: 9082, loss: 1.6459\n", + "2023-07-02 18:40:12,277 - modelscope - INFO - epoch [1][2470/4953]\tlr: 5.580e-05, memory: 9082, loss: 1.5098\n", + "2023-07-02 18:40:21,189 - modelscope - INFO - epoch [1][2475/4953]\tlr: 5.565e-05, memory: 9082, loss: 0.7347\n", + "2023-07-02 18:40:25,832 - modelscope - INFO - epoch [1][2480/4953]\tlr: 5.551e-05, memory: 9082, loss: 1.9617\n", + "2023-07-02 18:40:31,034 - modelscope - INFO - epoch [1][2485/4953]\tlr: 5.537e-05, memory: 9082, loss: 1.3300\n", + "2023-07-02 18:40:35,486 - modelscope - INFO - epoch [1][2490/4953]\tlr: 5.522e-05, memory: 9082, loss: 1.7078\n", + "2023-07-02 18:40:43,211 - modelscope - INFO - epoch [1][2495/4953]\tlr: 5.508e-05, memory: 9082, loss: 1.5921\n", + "2023-07-02 18:40:48,454 - modelscope - INFO - epoch [1][2500/4953]\tlr: 5.494e-05, memory: 9082, loss: 1.9926\n", + "2023-07-02 18:40:53,713 - modelscope - INFO - epoch [1][2505/4953]\tlr: 5.479e-05, memory: 9082, loss: 1.1594\n", + "2023-07-02 18:40:58,439 - modelscope - INFO - epoch [1][2510/4953]\tlr: 5.465e-05, memory: 9082, loss: 1.1770\n", + "2023-07-02 18:41:04,372 - modelscope - INFO - epoch [1][2515/4953]\tlr: 5.451e-05, memory: 9082, loss: 1.6250\n", + "2023-07-02 18:41:09,182 - modelscope - INFO - epoch [1][2520/4953]\tlr: 5.436e-05, memory: 9082, loss: 1.7578\n", + "2023-07-02 18:41:14,114 - modelscope - INFO - epoch [1][2525/4953]\tlr: 5.422e-05, memory: 9082, loss: 2.3328\n", + "2023-07-02 18:41:20,090 - modelscope - INFO - epoch [1][2530/4953]\tlr: 5.408e-05, memory: 9082, loss: 2.0059\n", + "2023-07-02 18:41:24,643 - modelscope - INFO - epoch [1][2535/4953]\tlr: 5.393e-05, memory: 9082, loss: 1.9216\n", + "2023-07-02 18:41:30,805 - modelscope - INFO - epoch [1][2540/4953]\tlr: 5.379e-05, memory: 9082, loss: 0.7870\n", + "2023-07-02 18:41:35,276 - modelscope - INFO - epoch [1][2545/4953]\tlr: 5.365e-05, memory: 9082, loss: 1.8344\n", + "2023-07-02 18:41:40,107 - modelscope - INFO - epoch [1][2550/4953]\tlr: 5.350e-05, memory: 9082, loss: 1.0918\n", + "2023-07-02 18:41:45,127 - modelscope - INFO - epoch [1][2555/4953]\tlr: 5.336e-05, memory: 9082, loss: 0.8277\n", + "2023-07-02 18:41:49,439 - modelscope - INFO - epoch [1][2560/4953]\tlr: 5.322e-05, memory: 9082, loss: 1.3539\n", + "2023-07-02 18:41:54,796 - modelscope - INFO - epoch [1][2565/4953]\tlr: 5.307e-05, memory: 9082, loss: 1.4898\n", + "2023-07-02 18:41:59,982 - modelscope - INFO - epoch [1][2570/4953]\tlr: 5.293e-05, memory: 9082, loss: 1.4383\n", + "2023-07-02 18:42:06,280 - modelscope - INFO - epoch [1][2575/4953]\tlr: 5.279e-05, memory: 9082, loss: 1.3823\n", + "2023-07-02 18:42:11,765 - modelscope - INFO - epoch [1][2580/4953]\tlr: 5.264e-05, memory: 9082, loss: 1.6961\n", + "2023-07-02 18:42:18,475 - modelscope - INFO - epoch [1][2585/4953]\tlr: 5.250e-05, memory: 9082, loss: 1.7096\n", + "2023-07-02 18:42:25,377 - modelscope - INFO - epoch [1][2590/4953]\tlr: 5.236e-05, memory: 9082, loss: 0.2711\n", + "2023-07-02 18:42:31,462 - modelscope - INFO - epoch [1][2595/4953]\tlr: 5.222e-05, memory: 9082, loss: 1.8032\n", + "2023-07-02 18:42:37,270 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:44:53,170 - modelscope - INFO - Saving checkpoint at 2600 iter\n", + "2023-07-02 18:44:53,210 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter2400_acc0.8075699210166931\n", + "2023-07-02 18:44:53,214 - modelscope - INFO - Saving checkpoint at 2600 iter\n", + "2023-07-02 18:44:53,253 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_2400\n", + "2023-07-02 18:44:53,258 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8082, evaluation/loss: 1.3051, loss: 1.3200\n", + "2023-07-02 18:44:56,746 - modelscope - INFO - epoch [1][2605/4953]\tlr: 5.193e-05, memory: 9082, loss: 2.4016\n", + "2023-07-02 18:45:02,237 - modelscope - INFO - epoch [1][2610/4953]\tlr: 5.179e-05, memory: 9082, loss: 1.4620\n", + "2023-07-02 18:45:08,746 - modelscope - INFO - epoch [1][2615/4953]\tlr: 5.164e-05, memory: 9082, loss: 1.0342\n", + "2023-07-02 18:45:15,827 - modelscope - INFO - epoch [1][2620/4953]\tlr: 5.150e-05, memory: 9082, loss: 1.2133\n", + "2023-07-02 18:45:20,967 - modelscope - INFO - epoch [1][2625/4953]\tlr: 5.136e-05, memory: 9082, loss: 1.1039\n", + "2023-07-02 18:45:28,010 - modelscope - INFO - epoch [1][2630/4953]\tlr: 5.122e-05, memory: 9082, loss: 2.2398\n", + "2023-07-02 18:45:33,346 - modelscope - INFO - epoch [1][2635/4953]\tlr: 5.107e-05, memory: 9082, loss: 1.0719\n", + "2023-07-02 18:45:38,505 - modelscope - INFO - epoch [1][2640/4953]\tlr: 5.093e-05, memory: 9082, loss: 2.1718\n", + "2023-07-02 18:45:46,286 - modelscope - INFO - epoch [1][2645/4953]\tlr: 5.079e-05, memory: 9082, loss: 1.4109\n", + "2023-07-02 18:45:50,359 - modelscope - INFO - epoch [1][2650/4953]\tlr: 5.065e-05, memory: 9082, loss: 2.7281\n", + "2023-07-02 18:45:54,451 - modelscope - INFO - epoch [1][2655/4953]\tlr: 5.050e-05, memory: 9082, loss: 1.4117\n", + "2023-07-02 18:46:01,191 - modelscope - INFO - epoch [1][2660/4953]\tlr: 5.036e-05, memory: 9082, loss: 1.0565\n", + "2023-07-02 18:46:06,247 - modelscope - INFO - epoch [1][2665/4953]\tlr: 5.022e-05, memory: 9082, loss: 0.9540\n", + "2023-07-02 18:46:13,076 - modelscope - INFO - epoch [1][2670/4953]\tlr: 5.008e-05, memory: 9082, loss: 1.5935\n", + "2023-07-02 18:46:18,638 - modelscope - INFO - epoch [1][2675/4953]\tlr: 4.993e-05, memory: 9082, loss: 2.1958\n", + "2023-07-02 18:46:23,885 - modelscope - INFO - epoch [1][2680/4953]\tlr: 4.979e-05, memory: 9082, loss: 1.6164\n", + "2023-07-02 18:46:31,178 - modelscope - INFO - epoch [1][2685/4953]\tlr: 4.965e-05, memory: 9082, loss: 0.9352\n", + "2023-07-02 18:46:38,014 - modelscope - INFO - epoch [1][2690/4953]\tlr: 4.951e-05, memory: 9082, loss: 1.4887\n", + "2023-07-02 18:46:41,545 - modelscope - INFO - epoch [1][2695/4953]\tlr: 4.936e-05, memory: 9082, loss: 1.2578\n", + "2023-07-02 18:46:46,458 - modelscope - INFO - epoch [1][2700/4953]\tlr: 4.922e-05, memory: 9082, loss: 1.1711\n", + "2023-07-02 18:46:53,227 - modelscope - INFO - epoch [1][2705/4953]\tlr: 4.908e-05, memory: 9082, loss: 1.3223\n", + "2023-07-02 18:46:59,578 - modelscope - INFO - epoch [1][2710/4953]\tlr: 4.894e-05, memory: 9082, loss: 1.4570\n", + "2023-07-02 18:47:04,896 - modelscope - INFO - epoch [1][2715/4953]\tlr: 4.880e-05, memory: 9082, loss: 1.0868\n", + "2023-07-02 18:47:10,404 - modelscope - INFO - epoch [1][2720/4953]\tlr: 4.865e-05, memory: 9082, loss: 1.5884\n", + "2023-07-02 18:47:16,038 - modelscope - INFO - epoch [1][2725/4953]\tlr: 4.851e-05, memory: 9082, loss: 1.0243\n", + "2023-07-02 18:47:22,354 - modelscope - INFO - epoch [1][2730/4953]\tlr: 4.837e-05, memory: 9082, loss: 1.4346\n", + "2023-07-02 18:47:29,290 - modelscope - INFO - epoch [1][2735/4953]\tlr: 4.823e-05, memory: 9082, loss: 0.9521\n", + "2023-07-02 18:47:37,813 - modelscope - INFO - epoch [1][2740/4953]\tlr: 4.809e-05, memory: 9082, loss: 0.7296\n", + "2023-07-02 18:47:40,908 - modelscope - INFO - epoch [1][2745/4953]\tlr: 4.795e-05, memory: 9082, loss: 1.5844\n", + "2023-07-02 18:47:46,334 - modelscope - INFO - epoch [1][2750/4953]\tlr: 4.781e-05, memory: 9082, loss: 1.5023\n", + "2023-07-02 18:47:51,224 - modelscope - INFO - epoch [1][2755/4953]\tlr: 4.766e-05, memory: 9082, loss: 0.9710\n", + "2023-07-02 18:47:58,431 - modelscope - INFO - epoch [1][2760/4953]\tlr: 4.752e-05, memory: 9082, loss: 1.1539\n", + "2023-07-02 18:48:04,898 - modelscope - INFO - epoch [1][2765/4953]\tlr: 4.738e-05, memory: 9082, loss: 1.6984\n", + "2023-07-02 18:48:10,316 - modelscope - INFO - epoch [1][2770/4953]\tlr: 4.724e-05, memory: 9082, loss: 1.5420\n", + "2023-07-02 18:48:16,843 - modelscope - INFO - epoch [1][2775/4953]\tlr: 4.710e-05, memory: 9082, loss: 1.2396\n", + "2023-07-02 18:48:22,406 - modelscope - INFO - epoch [1][2780/4953]\tlr: 4.696e-05, memory: 9082, loss: 1.8611\n", + "2023-07-02 18:48:28,234 - modelscope - INFO - epoch [1][2785/4953]\tlr: 4.682e-05, memory: 9082, loss: 1.2051\n", + "2023-07-02 18:48:35,175 - modelscope - INFO - epoch [1][2790/4953]\tlr: 4.668e-05, memory: 9082, loss: 0.9440\n", + "2023-07-02 18:48:40,689 - modelscope - INFO - epoch [1][2795/4953]\tlr: 4.654e-05, memory: 9082, loss: 1.5422\n", + "2023-07-02 18:48:46,340 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:51:02,313 - modelscope - INFO - Saving checkpoint at 2800 iter\n", + "2023-07-02 18:51:02,352 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_2600\n", + "2023-07-02 18:51:02,357 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8080, evaluation/loss: 1.2874, loss: 0.3999\n", + "2023-07-02 18:51:09,389 - modelscope - INFO - epoch [1][2805/4953]\tlr: 4.625e-05, memory: 9082, loss: 0.9511\n", + "2023-07-02 18:51:14,406 - modelscope - INFO - epoch [1][2810/4953]\tlr: 4.611e-05, memory: 9082, loss: 0.9344\n", + "2023-07-02 18:51:19,383 - modelscope - INFO - epoch [1][2815/4953]\tlr: 4.597e-05, memory: 9082, loss: 1.5798\n", + "2023-07-02 18:51:26,100 - modelscope - INFO - epoch [1][2820/4953]\tlr: 4.583e-05, memory: 9082, loss: 1.1518\n", + "2023-07-02 18:51:31,560 - modelscope - INFO - epoch [1][2825/4953]\tlr: 4.569e-05, memory: 9082, loss: 1.9438\n", + "2023-07-02 18:51:37,772 - modelscope - INFO - epoch [1][2830/4953]\tlr: 4.555e-05, memory: 9082, loss: 1.2336\n", + "2023-07-02 18:51:45,037 - modelscope - INFO - epoch [1][2835/4953]\tlr: 4.541e-05, memory: 9082, loss: 0.4342\n", + "2023-07-02 18:51:50,379 - modelscope - INFO - epoch [1][2840/4953]\tlr: 4.527e-05, memory: 9082, loss: 1.5258\n", + "2023-07-02 18:51:55,219 - modelscope - INFO - epoch [1][2845/4953]\tlr: 4.513e-05, memory: 9082, loss: 1.3063\n", + "2023-07-02 18:52:00,648 - modelscope - INFO - epoch [1][2850/4953]\tlr: 4.499e-05, memory: 9082, loss: 1.0977\n", + "2023-07-02 18:52:05,123 - modelscope - INFO - epoch [1][2855/4953]\tlr: 4.486e-05, memory: 9082, loss: 1.2469\n", + "2023-07-02 18:52:10,542 - modelscope - INFO - epoch [1][2860/4953]\tlr: 4.472e-05, memory: 9082, loss: 1.0984\n", + "2023-07-02 18:52:17,747 - modelscope - INFO - epoch [1][2865/4953]\tlr: 4.458e-05, memory: 9082, loss: 0.7611\n", + "2023-07-02 18:52:23,635 - modelscope - INFO - epoch [1][2870/4953]\tlr: 4.444e-05, memory: 9082, loss: 1.9703\n", + "2023-07-02 18:52:29,494 - modelscope - INFO - epoch [1][2875/4953]\tlr: 4.430e-05, memory: 9082, loss: 1.2950\n", + "2023-07-02 18:52:35,837 - modelscope - INFO - epoch [1][2880/4953]\tlr: 4.416e-05, memory: 9082, loss: 0.8969\n", + "2023-07-02 18:52:40,187 - modelscope - INFO - epoch [1][2885/4953]\tlr: 4.402e-05, memory: 9082, loss: 2.0484\n", + "2023-07-02 18:52:46,608 - modelscope - INFO - epoch [1][2890/4953]\tlr: 4.388e-05, memory: 9082, loss: 1.3309\n", + "2023-07-02 18:52:52,971 - modelscope - INFO - epoch [1][2895/4953]\tlr: 4.374e-05, memory: 9082, loss: 2.1859\n", + "2023-07-02 18:52:57,418 - modelscope - INFO - epoch [1][2900/4953]\tlr: 4.360e-05, memory: 9082, loss: 1.4730\n", + "2023-07-02 18:53:02,915 - modelscope - INFO - epoch [1][2905/4953]\tlr: 4.347e-05, memory: 9082, loss: 1.1398\n", + "2023-07-02 18:53:08,380 - modelscope - INFO - epoch [1][2910/4953]\tlr: 4.333e-05, memory: 9082, loss: 1.1520\n", + "2023-07-02 18:53:14,293 - modelscope - INFO - epoch [1][2915/4953]\tlr: 4.319e-05, memory: 9082, loss: 1.4763\n", + "2023-07-02 18:53:19,782 - modelscope - INFO - epoch [1][2920/4953]\tlr: 4.305e-05, memory: 9082, loss: 1.3924\n", + "2023-07-02 18:53:24,564 - modelscope - INFO - epoch [1][2925/4953]\tlr: 4.291e-05, memory: 9082, loss: 1.1281\n", + "2023-07-02 18:53:28,764 - modelscope - INFO - epoch [1][2930/4953]\tlr: 4.278e-05, memory: 9082, loss: 1.3961\n", + "2023-07-02 18:53:34,633 - modelscope - INFO - epoch [1][2935/4953]\tlr: 4.264e-05, memory: 9082, loss: 1.1989\n", + "2023-07-02 18:53:40,740 - modelscope - INFO - epoch [1][2940/4953]\tlr: 4.250e-05, memory: 9082, loss: 1.4141\n", + "2023-07-02 18:53:45,991 - modelscope - INFO - epoch [1][2945/4953]\tlr: 4.236e-05, memory: 9082, loss: 1.8516\n", + "2023-07-02 18:53:53,446 - modelscope - INFO - epoch [1][2950/4953]\tlr: 4.223e-05, memory: 9082, loss: 1.0945\n", + "2023-07-02 18:53:57,916 - modelscope - INFO - epoch [1][2955/4953]\tlr: 4.209e-05, memory: 9082, loss: 2.4191\n", + "2023-07-02 18:54:03,814 - modelscope - INFO - epoch [1][2960/4953]\tlr: 4.195e-05, memory: 9082, loss: 1.0555\n", + "2023-07-02 18:54:11,481 - modelscope - INFO - epoch [1][2965/4953]\tlr: 4.181e-05, memory: 9082, loss: 1.0359\n", + "2023-07-02 18:54:18,062 - modelscope - INFO - epoch [1][2970/4953]\tlr: 4.168e-05, memory: 9082, loss: 0.5380\n", + "2023-07-02 18:54:23,157 - modelscope - INFO - epoch [1][2975/4953]\tlr: 4.154e-05, memory: 9082, loss: 1.7539\n", + "2023-07-02 18:54:27,560 - modelscope - INFO - epoch [1][2980/4953]\tlr: 4.140e-05, memory: 9082, loss: 1.5100\n", + "2023-07-02 18:54:32,977 - modelscope - INFO - epoch [1][2985/4953]\tlr: 4.127e-05, memory: 9082, loss: 1.5968\n", + "2023-07-02 18:54:38,633 - modelscope - INFO - epoch [1][2990/4953]\tlr: 4.113e-05, memory: 9082, loss: 1.0911\n", + "2023-07-02 18:54:46,186 - modelscope - INFO - epoch [1][2995/4953]\tlr: 4.100e-05, memory: 9082, loss: 0.9789\n", + "2023-07-02 18:54:52,074 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 18:57:08,067 - modelscope - INFO - Saving checkpoint at 3000 iter\n", + "2023-07-02 18:57:08,107 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter2600_acc0.8082306385040283\n", + "2023-07-02 18:57:08,111 - modelscope - INFO - Saving checkpoint at 3000 iter\n", + "2023-07-02 18:57:08,150 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_2800\n", + "2023-07-02 18:57:08,155 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8084, evaluation/loss: 1.2728, loss: 0.7777\n", + "2023-07-02 18:57:14,568 - modelscope - INFO - epoch [1][3005/4953]\tlr: 4.072e-05, memory: 9082, loss: 1.7105\n", + "2023-07-02 18:57:20,305 - modelscope - INFO - epoch [1][3010/4953]\tlr: 4.059e-05, memory: 9082, loss: 0.9040\n", + "2023-07-02 18:57:25,518 - modelscope - INFO - epoch [1][3015/4953]\tlr: 4.045e-05, memory: 9082, loss: 1.3430\n", + "2023-07-02 18:57:30,679 - modelscope - INFO - epoch [1][3020/4953]\tlr: 4.032e-05, memory: 9082, loss: 1.9619\n", + "2023-07-02 18:57:36,997 - modelscope - INFO - epoch [1][3025/4953]\tlr: 4.018e-05, memory: 9082, loss: 0.9646\n", + "2023-07-02 18:57:42,949 - modelscope - INFO - epoch [1][3030/4953]\tlr: 4.005e-05, memory: 9082, loss: 0.8223\n", + "2023-07-02 18:57:47,568 - modelscope - INFO - epoch [1][3035/4953]\tlr: 3.991e-05, memory: 9082, loss: 1.9203\n", + "2023-07-02 18:57:53,111 - modelscope - INFO - epoch [1][3040/4953]\tlr: 3.978e-05, memory: 9082, loss: 1.0070\n", + "2023-07-02 18:57:59,474 - modelscope - INFO - epoch [1][3045/4953]\tlr: 3.964e-05, memory: 9082, loss: 1.2164\n", + "2023-07-02 18:58:04,237 - modelscope - INFO - epoch [1][3050/4953]\tlr: 3.951e-05, memory: 9082, loss: 1.6008\n", + "2023-07-02 18:58:09,687 - modelscope - INFO - epoch [1][3055/4953]\tlr: 3.937e-05, memory: 9082, loss: 2.0203\n", + "2023-07-02 18:58:14,949 - modelscope - INFO - epoch [1][3060/4953]\tlr: 3.924e-05, memory: 9082, loss: 1.4613\n", + "2023-07-02 18:58:21,818 - modelscope - INFO - epoch [1][3065/4953]\tlr: 3.911e-05, memory: 9082, loss: 1.2766\n", + "2023-07-02 18:58:28,251 - modelscope - INFO - epoch [1][3070/4953]\tlr: 3.897e-05, memory: 9082, loss: 1.2920\n", + "2023-07-02 18:58:34,440 - modelscope - INFO - epoch [1][3075/4953]\tlr: 3.884e-05, memory: 9082, loss: 1.1436\n", + "2023-07-02 18:58:41,344 - modelscope - INFO - epoch [1][3080/4953]\tlr: 3.870e-05, memory: 9082, loss: 1.6750\n", + "2023-07-02 18:58:47,507 - modelscope - INFO - epoch [1][3085/4953]\tlr: 3.857e-05, memory: 9082, loss: 1.4508\n", + "2023-07-02 18:58:53,152 - modelscope - INFO - epoch [1][3090/4953]\tlr: 3.844e-05, memory: 9082, loss: 1.1961\n", + "2023-07-02 18:58:57,615 - modelscope - INFO - epoch [1][3095/4953]\tlr: 3.830e-05, memory: 9082, loss: 2.0420\n", + "2023-07-02 18:59:04,675 - modelscope - INFO - epoch [1][3100/4953]\tlr: 3.817e-05, memory: 9082, loss: 0.3189\n", + "2023-07-02 18:59:09,594 - modelscope - INFO - epoch [1][3105/4953]\tlr: 3.804e-05, memory: 9082, loss: 1.5581\n", + "2023-07-02 18:59:16,591 - modelscope - INFO - epoch [1][3110/4953]\tlr: 3.791e-05, memory: 9082, loss: 0.9396\n", + "2023-07-02 18:59:23,334 - modelscope - INFO - epoch [1][3115/4953]\tlr: 3.777e-05, memory: 9082, loss: 0.6580\n", + "2023-07-02 18:59:28,047 - modelscope - INFO - epoch [1][3120/4953]\tlr: 3.764e-05, memory: 9082, loss: 1.4602\n", + "2023-07-02 18:59:31,315 - modelscope - INFO - epoch [1][3125/4953]\tlr: 3.751e-05, memory: 9082, loss: 1.3484\n", + "2023-07-02 18:59:36,121 - modelscope - INFO - epoch [1][3130/4953]\tlr: 3.738e-05, memory: 9082, loss: 2.1273\n", + "2023-07-02 18:59:44,336 - modelscope - INFO - epoch [1][3135/4953]\tlr: 3.725e-05, memory: 9082, loss: 0.8621\n", + "2023-07-02 18:59:49,884 - modelscope - INFO - epoch [1][3140/4953]\tlr: 3.712e-05, memory: 9082, loss: 1.0844\n", + "2023-07-02 18:59:52,597 - modelscope - INFO - epoch [1][3145/4953]\tlr: 3.698e-05, memory: 9082, loss: 1.5453\n", + "2023-07-02 18:59:59,243 - modelscope - INFO - epoch [1][3150/4953]\tlr: 3.685e-05, memory: 9082, loss: 1.1129\n", + "2023-07-02 19:00:04,220 - modelscope - INFO - epoch [1][3155/4953]\tlr: 3.672e-05, memory: 9082, loss: 1.1824\n", + "2023-07-02 19:00:11,762 - modelscope - INFO - epoch [1][3160/4953]\tlr: 3.659e-05, memory: 9082, loss: 0.5676\n", + "2023-07-02 19:00:18,630 - modelscope - INFO - epoch [1][3165/4953]\tlr: 3.646e-05, memory: 9082, loss: 0.9189\n", + "2023-07-02 19:00:23,483 - modelscope - INFO - epoch [1][3170/4953]\tlr: 3.633e-05, memory: 9082, loss: 1.0324\n", + "2023-07-02 19:00:27,164 - modelscope - INFO - epoch [1][3175/4953]\tlr: 3.620e-05, memory: 9082, loss: 1.2984\n", + "2023-07-02 19:00:32,041 - modelscope - INFO - epoch [1][3180/4953]\tlr: 3.607e-05, memory: 9082, loss: 1.6036\n", + "2023-07-02 19:00:37,245 - modelscope - INFO - epoch [1][3185/4953]\tlr: 3.594e-05, memory: 9082, loss: 1.3896\n", + "2023-07-02 19:00:44,493 - modelscope - INFO - epoch [1][3190/4953]\tlr: 3.581e-05, memory: 9082, loss: 1.1153\n", + "2023-07-02 19:00:49,874 - modelscope - INFO - epoch [1][3195/4953]\tlr: 3.568e-05, memory: 9082, loss: 1.2354\n", + "2023-07-02 19:00:55,061 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 19:03:10,730 - modelscope - INFO - Saving checkpoint at 3200 iter\n", + "2023-07-02 19:03:10,770 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter3000_acc0.8084218502044678\n", + "2023-07-02 19:03:10,774 - modelscope - INFO - Saving checkpoint at 3200 iter\n", + "2023-07-02 19:03:10,813 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_3000\n", + "2023-07-02 19:03:10,818 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8086, evaluation/loss: 1.2627, loss: 1.5492\n", + "2023-07-02 19:03:18,070 - modelscope - INFO - epoch [1][3205/4953]\tlr: 3.542e-05, memory: 9082, loss: 0.1662\n", + "2023-07-02 19:03:26,317 - modelscope - INFO - epoch [1][3210/4953]\tlr: 3.530e-05, memory: 9082, loss: 1.6430\n", + "2023-07-02 19:03:32,449 - modelscope - INFO - epoch [1][3215/4953]\tlr: 3.517e-05, memory: 9082, loss: 0.4798\n", + "2023-07-02 19:03:38,508 - modelscope - INFO - epoch [1][3220/4953]\tlr: 3.504e-05, memory: 9082, loss: 1.0096\n", + "2023-07-02 19:03:45,266 - modelscope - INFO - epoch [1][3225/4953]\tlr: 3.491e-05, memory: 9082, loss: 1.1305\n", + "2023-07-02 19:03:48,361 - modelscope - INFO - epoch [1][3230/4953]\tlr: 3.478e-05, memory: 9082, loss: 1.6721\n", + "2023-07-02 19:03:54,630 - modelscope - INFO - epoch [1][3235/4953]\tlr: 3.465e-05, memory: 9082, loss: 1.1138\n", + "2023-07-02 19:03:59,780 - modelscope - INFO - epoch [1][3240/4953]\tlr: 3.453e-05, memory: 9082, loss: 1.2146\n", + "2023-07-02 19:04:04,310 - modelscope - INFO - epoch [1][3245/4953]\tlr: 3.440e-05, memory: 9082, loss: 0.9602\n", + "2023-07-02 19:04:09,085 - modelscope - INFO - epoch [1][3250/4953]\tlr: 3.427e-05, memory: 9082, loss: 2.0369\n", + "2023-07-02 19:04:13,329 - modelscope - INFO - epoch [1][3255/4953]\tlr: 3.415e-05, memory: 9082, loss: 1.3604\n", + "2023-07-02 19:04:19,728 - modelscope - INFO - epoch [1][3260/4953]\tlr: 3.402e-05, memory: 9082, loss: 1.0500\n", + "2023-07-02 19:04:25,537 - modelscope - INFO - epoch [1][3265/4953]\tlr: 3.389e-05, memory: 9082, loss: 1.0730\n", + "2023-07-02 19:04:33,616 - modelscope - INFO - epoch [1][3270/4953]\tlr: 3.377e-05, memory: 9082, loss: 1.3219\n", + "2023-07-02 19:04:36,942 - modelscope - INFO - epoch [1][3275/4953]\tlr: 3.364e-05, memory: 9082, loss: 0.7494\n", + "2023-07-02 19:04:43,190 - modelscope - INFO - epoch [1][3280/4953]\tlr: 3.351e-05, memory: 9082, loss: 0.8293\n", + "2023-07-02 19:04:51,311 - modelscope - INFO - epoch [1][3285/4953]\tlr: 3.339e-05, memory: 9082, loss: 0.7475\n", + "2023-07-02 19:04:54,815 - modelscope - INFO - epoch [1][3290/4953]\tlr: 3.326e-05, memory: 9082, loss: 1.8000\n", + "2023-07-02 19:05:00,342 - modelscope - INFO - epoch [1][3295/4953]\tlr: 3.314e-05, memory: 9082, loss: 1.9621\n", + "2023-07-02 19:05:06,094 - modelscope - INFO - epoch [1][3300/4953]\tlr: 3.301e-05, memory: 9082, loss: 1.3162\n", + "2023-07-02 19:05:10,639 - modelscope - INFO - epoch [1][3305/4953]\tlr: 3.289e-05, memory: 9082, loss: 1.4781\n", + "2023-07-02 19:05:12,888 - modelscope - INFO - epoch [1][3310/4953]\tlr: 3.276e-05, memory: 9082, loss: 1.9320\n", + "2023-07-02 19:05:18,374 - modelscope - INFO - epoch [1][3315/4953]\tlr: 3.264e-05, memory: 9082, loss: 0.4891\n", + "2023-07-02 19:05:25,255 - modelscope - INFO - epoch [1][3320/4953]\tlr: 3.252e-05, memory: 9082, loss: 0.9572\n", + "2023-07-02 19:05:31,095 - modelscope - INFO - epoch [1][3325/4953]\tlr: 3.239e-05, memory: 9082, loss: 1.0703\n", + "2023-07-02 19:05:37,787 - modelscope - INFO - epoch [1][3330/4953]\tlr: 3.227e-05, memory: 9082, loss: 0.4883\n", + "2023-07-02 19:05:42,067 - modelscope - INFO - epoch [1][3335/4953]\tlr: 3.214e-05, memory: 9082, loss: 2.1445\n", + "2023-07-02 19:05:47,958 - modelscope - INFO - epoch [1][3340/4953]\tlr: 3.202e-05, memory: 9082, loss: 1.5414\n", + "2023-07-02 19:05:52,434 - modelscope - INFO - epoch [1][3345/4953]\tlr: 3.190e-05, memory: 9082, loss: 1.9531\n", + "2023-07-02 19:05:57,227 - modelscope - INFO - epoch [1][3350/4953]\tlr: 3.178e-05, memory: 9082, loss: 1.2508\n", + "2023-07-02 19:06:03,488 - modelscope - INFO - epoch [1][3355/4953]\tlr: 3.165e-05, memory: 9082, loss: 1.1402\n", + "2023-07-02 19:06:08,978 - modelscope - INFO - epoch [1][3360/4953]\tlr: 3.153e-05, memory: 9082, loss: 1.1211\n", + "2023-07-02 19:06:16,191 - modelscope - INFO - epoch [1][3365/4953]\tlr: 3.141e-05, memory: 9082, loss: 0.7613\n", + "2023-07-02 19:06:23,420 - modelscope - INFO - epoch [1][3370/4953]\tlr: 3.129e-05, memory: 9082, loss: 1.3293\n", + "2023-07-02 19:06:30,067 - modelscope - INFO - epoch [1][3375/4953]\tlr: 3.117e-05, memory: 9082, loss: 1.9758\n", + "2023-07-02 19:06:36,844 - modelscope - INFO - epoch [1][3380/4953]\tlr: 3.104e-05, memory: 9082, loss: 0.3589\n", + "2023-07-02 19:06:43,906 - modelscope - INFO - epoch [1][3385/4953]\tlr: 3.092e-05, memory: 9082, loss: 0.9208\n", + "2023-07-02 19:06:49,972 - modelscope - INFO - epoch [1][3390/4953]\tlr: 3.080e-05, memory: 9082, loss: 1.2713\n", + "2023-07-02 19:06:56,815 - modelscope - INFO - epoch [1][3395/4953]\tlr: 3.068e-05, memory: 9082, loss: 1.3320\n", + "2023-07-02 19:07:00,998 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 19:09:16,634 - modelscope - INFO - Saving checkpoint at 3400 iter\n", + "2023-07-02 19:09:16,674 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter3200_acc0.8085957169532776\n", + "2023-07-02 19:09:16,679 - modelscope - INFO - Saving checkpoint at 3400 iter\n", + "2023-07-02 19:09:16,718 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_3200\n", + "2023-07-02 19:09:16,723 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8090, evaluation/loss: 1.2532, loss: 1.3594\n", + "2023-07-02 19:09:23,967 - modelscope - INFO - epoch [1][3405/4953]\tlr: 3.044e-05, memory: 9082, loss: 1.4662\n", + "2023-07-02 19:09:27,883 - modelscope - INFO - epoch [1][3410/4953]\tlr: 3.032e-05, memory: 9082, loss: 1.6219\n", + "2023-07-02 19:09:36,612 - modelscope - INFO - epoch [1][3415/4953]\tlr: 3.020e-05, memory: 9082, loss: 0.8362\n", + "2023-07-02 19:09:43,660 - modelscope - INFO - epoch [1][3420/4953]\tlr: 3.008e-05, memory: 9082, loss: 0.5874\n", + "2023-07-02 19:09:50,318 - modelscope - INFO - epoch [1][3425/4953]\tlr: 2.996e-05, memory: 9082, loss: 0.5588\n", + "2023-07-02 19:09:55,763 - modelscope - INFO - epoch [1][3430/4953]\tlr: 2.985e-05, memory: 9082, loss: 1.5086\n", + "2023-07-02 19:10:00,017 - modelscope - INFO - epoch [1][3435/4953]\tlr: 2.973e-05, memory: 9082, loss: 1.7063\n", + "2023-07-02 19:10:04,359 - modelscope - INFO - epoch [1][3440/4953]\tlr: 2.961e-05, memory: 9082, loss: 1.0250\n", + "2023-07-02 19:10:11,212 - modelscope - INFO - epoch [1][3445/4953]\tlr: 2.949e-05, memory: 9082, loss: 1.7650\n", + "2023-07-02 19:10:18,583 - modelscope - INFO - epoch [1][3450/4953]\tlr: 2.937e-05, memory: 9082, loss: 1.0846\n", + "2023-07-02 19:10:24,668 - modelscope - INFO - epoch [1][3455/4953]\tlr: 2.926e-05, memory: 9082, loss: 0.6735\n", + "2023-07-02 19:10:29,335 - modelscope - INFO - epoch [1][3460/4953]\tlr: 2.914e-05, memory: 9082, loss: 1.6277\n", + "2023-07-02 19:10:36,188 - modelscope - INFO - epoch [1][3465/4953]\tlr: 2.902e-05, memory: 9082, loss: 0.5597\n", + "2023-07-02 19:10:40,421 - modelscope - INFO - epoch [1][3470/4953]\tlr: 2.891e-05, memory: 9082, loss: 1.6338\n", + "2023-07-02 19:10:45,436 - modelscope - INFO - epoch [1][3475/4953]\tlr: 2.879e-05, memory: 9082, loss: 1.2394\n", + "2023-07-02 19:10:51,181 - modelscope - INFO - epoch [1][3480/4953]\tlr: 2.867e-05, memory: 9082, loss: 1.4753\n", + "2023-07-02 19:10:57,524 - modelscope - INFO - epoch [1][3485/4953]\tlr: 2.856e-05, memory: 9082, loss: 0.2870\n", + "2023-07-02 19:11:04,534 - modelscope - INFO - epoch [1][3490/4953]\tlr: 2.844e-05, memory: 9082, loss: 1.1145\n", + "2023-07-02 19:11:09,939 - modelscope - INFO - epoch [1][3495/4953]\tlr: 2.833e-05, memory: 9082, loss: 1.5525\n", + "2023-07-02 19:11:16,051 - modelscope - INFO - epoch [1][3500/4953]\tlr: 2.821e-05, memory: 9082, loss: 0.9821\n", + "2023-07-02 19:11:21,112 - modelscope - INFO - epoch [1][3505/4953]\tlr: 2.810e-05, memory: 9082, loss: 0.5899\n", + "2023-07-02 19:11:26,462 - modelscope - INFO - epoch [1][3510/4953]\tlr: 2.798e-05, memory: 9082, loss: 1.0081\n", + "2023-07-02 19:11:31,458 - modelscope - INFO - epoch [1][3515/4953]\tlr: 2.787e-05, memory: 9082, loss: 1.9700\n", + "2023-07-02 19:11:36,854 - modelscope - INFO - epoch [1][3520/4953]\tlr: 2.775e-05, memory: 9082, loss: 1.4628\n", + "2023-07-02 19:11:42,492 - modelscope - INFO - epoch [1][3525/4953]\tlr: 2.764e-05, memory: 9082, loss: 2.0672\n", + "2023-07-02 19:11:46,917 - modelscope - INFO - epoch [1][3530/4953]\tlr: 2.753e-05, memory: 9082, loss: 1.2469\n", + "2023-07-02 19:11:51,730 - modelscope - INFO - epoch [1][3535/4953]\tlr: 2.741e-05, memory: 9082, loss: 1.8609\n", + "2023-07-02 19:11:58,366 - modelscope - INFO - epoch [1][3540/4953]\tlr: 2.730e-05, memory: 9082, loss: 1.0629\n", + "2023-07-02 19:12:03,036 - modelscope - INFO - epoch [1][3545/4953]\tlr: 2.719e-05, memory: 9082, loss: 1.9508\n", + "2023-07-02 19:12:07,669 - modelscope - INFO - epoch [1][3550/4953]\tlr: 2.707e-05, memory: 9082, loss: 1.1436\n", + "2023-07-02 19:12:12,567 - modelscope - INFO - epoch [1][3555/4953]\tlr: 2.696e-05, memory: 9082, loss: 1.7292\n", + "2023-07-02 19:12:18,906 - modelscope - INFO - epoch [1][3560/4953]\tlr: 2.685e-05, memory: 9082, loss: 1.4152\n", + "2023-07-02 19:12:27,058 - modelscope - INFO - epoch [1][3565/4953]\tlr: 2.674e-05, memory: 9082, loss: 1.5086\n", + "2023-07-02 19:12:34,096 - modelscope - INFO - epoch [1][3570/4953]\tlr: 2.663e-05, memory: 9082, loss: 0.4786\n", + "2023-07-02 19:12:40,666 - modelscope - INFO - epoch [1][3575/4953]\tlr: 2.652e-05, memory: 9082, loss: 1.7496\n", + "2023-07-02 19:12:47,997 - modelscope - INFO - epoch [1][3580/4953]\tlr: 2.641e-05, memory: 9082, loss: 1.0977\n", + "2023-07-02 19:12:51,897 - modelscope - INFO - epoch [1][3585/4953]\tlr: 2.630e-05, memory: 9082, loss: 1.6832\n", + "2023-07-02 19:12:59,020 - modelscope - INFO - epoch [1][3590/4953]\tlr: 2.619e-05, memory: 9082, loss: 0.4163\n", + "2023-07-02 19:13:07,038 - modelscope - INFO - epoch [1][3595/4953]\tlr: 2.608e-05, memory: 9082, loss: 0.7688\n", + "2023-07-02 19:13:13,293 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.05it/s]\n", + "2023-07-02 19:15:28,735 - modelscope - INFO - Saving checkpoint at 3600 iter\n", + "2023-07-02 19:15:28,776 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter3400_acc0.8089956045150757\n", + "2023-07-02 19:15:28,780 - modelscope - INFO - Saving checkpoint at 3600 iter\n", + "2023-07-02 19:15:28,819 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_3400\n", + "2023-07-02 19:15:28,824 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8097, evaluation/loss: 1.2494, loss: 0.8758\n", + "2023-07-02 19:15:35,336 - modelscope - INFO - epoch [1][3605/4953]\tlr: 2.586e-05, memory: 9082, loss: 0.5239\n", + "2023-07-02 19:15:41,849 - modelscope - INFO - epoch [1][3610/4953]\tlr: 2.575e-05, memory: 9082, loss: 1.5448\n", + "2023-07-02 19:15:46,600 - modelscope - INFO - epoch [1][3615/4953]\tlr: 2.564e-05, memory: 9082, loss: 1.2828\n", + "2023-07-02 19:15:53,236 - modelscope - INFO - epoch [1][3620/4953]\tlr: 2.553e-05, memory: 9082, loss: 1.3886\n", + "2023-07-02 19:15:59,060 - modelscope - INFO - epoch [1][3625/4953]\tlr: 2.542e-05, memory: 9082, loss: 1.2750\n", + "2023-07-02 19:16:04,370 - modelscope - INFO - epoch [1][3630/4953]\tlr: 2.532e-05, memory: 9082, loss: 1.0339\n", + "2023-07-02 19:16:09,908 - modelscope - INFO - epoch [1][3635/4953]\tlr: 2.521e-05, memory: 9082, loss: 1.6308\n", + "2023-07-02 19:16:16,808 - modelscope - INFO - epoch [1][3640/4953]\tlr: 2.510e-05, memory: 9082, loss: 1.2590\n", + "2023-07-02 19:16:22,072 - modelscope - INFO - epoch [1][3645/4953]\tlr: 2.500e-05, memory: 9082, loss: 2.3364\n", + "2023-07-02 19:16:29,035 - modelscope - INFO - epoch [1][3650/4953]\tlr: 2.489e-05, memory: 9082, loss: 1.1231\n", + "2023-07-02 19:16:35,184 - modelscope - INFO - epoch [1][3655/4953]\tlr: 2.478e-05, memory: 9082, loss: 0.8313\n", + "2023-07-02 19:16:41,731 - modelscope - INFO - epoch [1][3660/4953]\tlr: 2.468e-05, memory: 9082, loss: 1.2649\n", + "2023-07-02 19:16:47,773 - modelscope - INFO - epoch [1][3665/4953]\tlr: 2.457e-05, memory: 9082, loss: 0.1984\n", + "2023-07-02 19:16:53,645 - modelscope - INFO - epoch [1][3670/4953]\tlr: 2.447e-05, memory: 9082, loss: 1.2534\n", + "2023-07-02 19:16:58,300 - modelscope - INFO - epoch [1][3675/4953]\tlr: 2.436e-05, memory: 9082, loss: 1.1865\n", + "2023-07-02 19:17:02,935 - modelscope - INFO - epoch [1][3680/4953]\tlr: 2.426e-05, memory: 9082, loss: 1.0458\n", + "2023-07-02 19:17:10,508 - modelscope - INFO - epoch [1][3685/4953]\tlr: 2.415e-05, memory: 9082, loss: 1.4961\n", + "2023-07-02 19:17:15,416 - modelscope - INFO - epoch [1][3690/4953]\tlr: 2.405e-05, memory: 9082, loss: 1.9992\n", + "2023-07-02 19:17:21,634 - modelscope - INFO - epoch [1][3695/4953]\tlr: 2.394e-05, memory: 9082, loss: 1.0555\n", + "2023-07-02 19:17:25,173 - modelscope - INFO - epoch [1][3700/4953]\tlr: 2.384e-05, memory: 9082, loss: 1.3477\n", + "2023-07-02 19:17:31,506 - modelscope - INFO - epoch [1][3705/4953]\tlr: 2.374e-05, memory: 9082, loss: 1.4563\n", + "2023-07-02 19:17:37,274 - modelscope - INFO - epoch [1][3710/4953]\tlr: 2.364e-05, memory: 9082, loss: 1.0638\n", + "2023-07-02 19:17:42,368 - modelscope - INFO - epoch [1][3715/4953]\tlr: 2.353e-05, memory: 9082, loss: 1.0961\n", + "2023-07-02 19:17:48,384 - modelscope - INFO - epoch [1][3720/4953]\tlr: 2.343e-05, memory: 9082, loss: 0.6570\n", + "2023-07-02 19:17:54,584 - modelscope - INFO - epoch [1][3725/4953]\tlr: 2.333e-05, memory: 9082, loss: 1.4391\n", + "2023-07-02 19:18:00,199 - modelscope - INFO - epoch [1][3730/4953]\tlr: 2.323e-05, memory: 9082, loss: 1.0986\n", + "2023-07-02 19:18:06,613 - modelscope - INFO - epoch [1][3735/4953]\tlr: 2.313e-05, memory: 9082, loss: 1.2259\n", + "2023-07-02 19:18:11,954 - modelscope - INFO - epoch [1][3740/4953]\tlr: 2.303e-05, memory: 9082, loss: 1.2266\n", + "2023-07-02 19:18:19,245 - modelscope - INFO - epoch [1][3745/4953]\tlr: 2.293e-05, memory: 9082, loss: 0.8633\n", + "2023-07-02 19:18:24,296 - modelscope - INFO - epoch [1][3750/4953]\tlr: 2.283e-05, memory: 9082, loss: 1.2285\n", + "2023-07-02 19:18:31,793 - modelscope - INFO - epoch [1][3755/4953]\tlr: 2.273e-05, memory: 9082, loss: 1.7500\n", + "2023-07-02 19:18:37,572 - modelscope - INFO - epoch [1][3760/4953]\tlr: 2.263e-05, memory: 9082, loss: 0.6735\n", + "2023-07-02 19:18:44,200 - modelscope - INFO - epoch [1][3765/4953]\tlr: 2.253e-05, memory: 9082, loss: 1.8328\n", + "2023-07-02 19:18:49,475 - modelscope - INFO - epoch [1][3770/4953]\tlr: 2.243e-05, memory: 9082, loss: 1.3798\n", + "2023-07-02 19:18:53,690 - modelscope - INFO - epoch [1][3775/4953]\tlr: 2.233e-05, memory: 9082, loss: 2.3062\n", + "2023-07-02 19:18:58,638 - modelscope - INFO - epoch [1][3780/4953]\tlr: 2.223e-05, memory: 9082, loss: 1.1617\n", + "2023-07-02 19:19:05,096 - modelscope - INFO - epoch [1][3785/4953]\tlr: 2.213e-05, memory: 9082, loss: 1.7489\n", + "2023-07-02 19:19:12,468 - modelscope - INFO - epoch [1][3790/4953]\tlr: 2.204e-05, memory: 9082, loss: 1.1701\n", + "2023-07-02 19:19:22,097 - modelscope - INFO - epoch [1][3795/4953]\tlr: 2.194e-05, memory: 9082, loss: 0.3038\n", + "2023-07-02 19:19:29,069 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 19:21:44,819 - modelscope - INFO - Saving checkpoint at 3800 iter\n", + "2023-07-02 19:21:44,859 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter3600_acc0.8096736669540405\n", + "2023-07-02 19:21:44,863 - modelscope - INFO - Saving checkpoint at 3800 iter\n", + "2023-07-02 19:21:44,902 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_3600\n", + "2023-07-02 19:21:44,907 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8099, evaluation/loss: 1.2569, loss: 1.0828\n", + "2023-07-02 19:21:50,359 - modelscope - INFO - epoch [1][3805/4953]\tlr: 2.174e-05, memory: 9082, loss: 1.3383\n", + "2023-07-02 19:21:56,101 - modelscope - INFO - epoch [1][3810/4953]\tlr: 2.165e-05, memory: 9082, loss: 1.3833\n", + "2023-07-02 19:22:02,037 - modelscope - INFO - epoch [1][3815/4953]\tlr: 2.155e-05, memory: 9082, loss: 1.1005\n", + "2023-07-02 19:22:07,031 - modelscope - INFO - epoch [1][3820/4953]\tlr: 2.146e-05, memory: 9082, loss: 1.6941\n", + "2023-07-02 19:22:11,810 - modelscope - INFO - epoch [1][3825/4953]\tlr: 2.136e-05, memory: 9082, loss: 1.8938\n", + "2023-07-02 19:22:16,752 - modelscope - INFO - epoch [1][3830/4953]\tlr: 2.127e-05, memory: 9082, loss: 1.6121\n", + "2023-07-02 19:22:25,240 - modelscope - INFO - epoch [1][3835/4953]\tlr: 2.117e-05, memory: 9082, loss: 0.7009\n", + "2023-07-02 19:22:31,231 - modelscope - INFO - epoch [1][3840/4953]\tlr: 2.108e-05, memory: 9082, loss: 1.8273\n", + "2023-07-02 19:22:37,939 - modelscope - INFO - epoch [1][3845/4953]\tlr: 2.098e-05, memory: 9082, loss: 0.8680\n", + "2023-07-02 19:22:43,021 - modelscope - INFO - epoch [1][3850/4953]\tlr: 2.089e-05, memory: 9082, loss: 1.5473\n", + "2023-07-02 19:22:49,156 - modelscope - INFO - epoch [1][3855/4953]\tlr: 2.080e-05, memory: 9082, loss: 1.1435\n", + "2023-07-02 19:22:53,445 - modelscope - INFO - epoch [1][3860/4953]\tlr: 2.071e-05, memory: 9082, loss: 1.1194\n", + "2023-07-02 19:22:59,485 - modelscope - INFO - epoch [1][3865/4953]\tlr: 2.061e-05, memory: 9082, loss: 1.0640\n", + "2023-07-02 19:23:03,673 - modelscope - INFO - epoch [1][3870/4953]\tlr: 2.052e-05, memory: 9082, loss: 1.0879\n", + "2023-07-02 19:23:08,721 - modelscope - INFO - epoch [1][3875/4953]\tlr: 2.043e-05, memory: 9082, loss: 0.9207\n", + "2023-07-02 19:23:14,908 - modelscope - INFO - epoch [1][3880/4953]\tlr: 2.034e-05, memory: 9082, loss: 0.5737\n", + "2023-07-02 19:23:21,843 - modelscope - INFO - epoch [1][3885/4953]\tlr: 2.025e-05, memory: 9082, loss: 1.3052\n", + "2023-07-02 19:23:30,760 - modelscope - INFO - epoch [1][3890/4953]\tlr: 2.016e-05, memory: 9082, loss: 1.1666\n", + "2023-07-02 19:23:36,181 - modelscope - INFO - epoch [1][3895/4953]\tlr: 2.007e-05, memory: 9082, loss: 1.7224\n", + "2023-07-02 19:23:40,094 - modelscope - INFO - epoch [1][3900/4953]\tlr: 1.998e-05, memory: 9082, loss: 1.0042\n", + "2023-07-02 19:23:47,764 - modelscope - INFO - epoch [1][3905/4953]\tlr: 1.989e-05, memory: 9082, loss: 1.2044\n", + "2023-07-02 19:23:54,075 - modelscope - INFO - epoch [1][3910/4953]\tlr: 1.980e-05, memory: 9082, loss: 1.3367\n", + "2023-07-02 19:24:00,699 - modelscope - INFO - epoch [1][3915/4953]\tlr: 1.971e-05, memory: 9082, loss: 1.1395\n", + "2023-07-02 19:24:06,413 - modelscope - INFO - epoch [1][3920/4953]\tlr: 1.962e-05, memory: 9082, loss: 1.1899\n", + "2023-07-02 19:24:12,663 - modelscope - INFO - epoch [1][3925/4953]\tlr: 1.953e-05, memory: 9082, loss: 1.0320\n", + "2023-07-02 19:24:18,897 - modelscope - INFO - epoch [1][3930/4953]\tlr: 1.944e-05, memory: 9082, loss: 2.0555\n", + "2023-07-02 19:24:25,760 - modelscope - INFO - epoch [1][3935/4953]\tlr: 1.936e-05, memory: 9082, loss: 1.3466\n", + "2023-07-02 19:24:29,617 - modelscope - INFO - epoch [1][3940/4953]\tlr: 1.927e-05, memory: 9082, loss: 1.7797\n", + "2023-07-02 19:24:34,498 - modelscope - INFO - epoch [1][3945/4953]\tlr: 1.918e-05, memory: 9082, loss: 0.6168\n", + "2023-07-02 19:24:39,457 - modelscope - INFO - epoch [1][3950/4953]\tlr: 1.910e-05, memory: 9082, loss: 1.1122\n", + "2023-07-02 19:24:48,913 - modelscope - INFO - epoch [1][3955/4953]\tlr: 1.901e-05, memory: 9082, loss: 0.9353\n", + "2023-07-02 19:24:55,564 - modelscope - INFO - epoch [1][3960/4953]\tlr: 1.892e-05, memory: 9082, loss: 0.9599\n", + "2023-07-02 19:25:00,536 - modelscope - INFO - epoch [1][3965/4953]\tlr: 1.884e-05, memory: 9082, loss: 1.4582\n", + "2023-07-02 19:25:07,894 - modelscope - INFO - epoch [1][3970/4953]\tlr: 1.875e-05, memory: 9082, loss: 1.0347\n", + "2023-07-02 19:25:11,877 - modelscope - INFO - epoch [1][3975/4953]\tlr: 1.867e-05, memory: 9082, loss: 1.9000\n", + "2023-07-02 19:25:18,225 - modelscope - INFO - epoch [1][3980/4953]\tlr: 1.858e-05, memory: 9082, loss: 1.4125\n", + "2023-07-02 19:25:22,417 - modelscope - INFO - epoch [1][3985/4953]\tlr: 1.850e-05, memory: 9082, loss: 1.8959\n", + "2023-07-02 19:25:27,100 - modelscope - INFO - epoch [1][3990/4953]\tlr: 1.842e-05, memory: 9082, loss: 1.4008\n", + "2023-07-02 19:25:31,958 - modelscope - INFO - epoch [1][3995/4953]\tlr: 1.833e-05, memory: 9082, loss: 0.8114\n", + "2023-07-02 19:25:37,042 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 19:27:53,013 - modelscope - INFO - Saving checkpoint at 4000 iter\n", + "2023-07-02 19:27:53,054 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_3800\n", + "2023-07-02 19:27:53,059 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8099, evaluation/loss: 1.2522, loss: 1.1221\n", + "2023-07-02 19:27:58,830 - modelscope - INFO - epoch [1][4005/4953]\tlr: 1.817e-05, memory: 9082, loss: 1.9461\n", + "2023-07-02 19:28:04,138 - modelscope - INFO - epoch [1][4010/4953]\tlr: 1.809e-05, memory: 9082, loss: 1.5629\n", + "2023-07-02 19:28:09,984 - modelscope - INFO - epoch [1][4015/4953]\tlr: 1.801e-05, memory: 9082, loss: 0.7642\n", + "2023-07-02 19:28:13,463 - modelscope - INFO - epoch [1][4020/4953]\tlr: 1.792e-05, memory: 9082, loss: 2.2344\n", + "2023-07-02 19:28:20,355 - modelscope - INFO - epoch [1][4025/4953]\tlr: 1.784e-05, memory: 9082, loss: 0.9662\n", + "2023-07-02 19:28:26,276 - modelscope - INFO - epoch [1][4030/4953]\tlr: 1.776e-05, memory: 9082, loss: 1.0925\n", + "2023-07-02 19:28:32,273 - modelscope - INFO - epoch [1][4035/4953]\tlr: 1.768e-05, memory: 9082, loss: 1.4812\n", + "2023-07-02 19:28:38,431 - modelscope - INFO - epoch [1][4040/4953]\tlr: 1.760e-05, memory: 9082, loss: 2.1295\n", + "2023-07-02 19:28:43,468 - modelscope - INFO - epoch [1][4045/4953]\tlr: 1.752e-05, memory: 9082, loss: 1.6391\n", + "2023-07-02 19:28:51,453 - modelscope - INFO - epoch [1][4050/4953]\tlr: 1.744e-05, memory: 9082, loss: 1.4901\n", + "2023-07-02 19:28:57,688 - modelscope - INFO - epoch [1][4055/4953]\tlr: 1.737e-05, memory: 9082, loss: 1.2383\n", + "2023-07-02 19:29:01,776 - modelscope - INFO - epoch [1][4060/4953]\tlr: 1.729e-05, memory: 9082, loss: 1.4404\n", + "2023-07-02 19:29:07,738 - modelscope - INFO - epoch [1][4065/4953]\tlr: 1.721e-05, memory: 9082, loss: 0.5664\n", + "2023-07-02 19:29:12,827 - modelscope - INFO - epoch [1][4070/4953]\tlr: 1.713e-05, memory: 9082, loss: 1.4554\n", + "2023-07-02 19:29:19,309 - modelscope - INFO - epoch [1][4075/4953]\tlr: 1.706e-05, memory: 9082, loss: 0.8976\n", + "2023-07-02 19:29:23,218 - modelscope - INFO - epoch [1][4080/4953]\tlr: 1.698e-05, memory: 9082, loss: 1.0562\n", + "2023-07-02 19:29:32,543 - modelscope - INFO - epoch [1][4085/4953]\tlr: 1.690e-05, memory: 9082, loss: 0.9514\n", + "2023-07-02 19:29:39,285 - modelscope - INFO - epoch [1][4090/4953]\tlr: 1.683e-05, memory: 9082, loss: 0.4714\n", + "2023-07-02 19:29:44,617 - modelscope - INFO - epoch [1][4095/4953]\tlr: 1.675e-05, memory: 9082, loss: 1.2211\n", + "2023-07-02 19:29:49,645 - modelscope - INFO - epoch [1][4100/4953]\tlr: 1.668e-05, memory: 9082, loss: 2.0924\n", + "2023-07-02 19:29:55,362 - modelscope - INFO - epoch [1][4105/4953]\tlr: 1.660e-05, memory: 9082, loss: 2.2705\n", + "2023-07-02 19:30:01,166 - modelscope - INFO - epoch [1][4110/4953]\tlr: 1.653e-05, memory: 9082, loss: 1.6148\n", + "2023-07-02 19:30:08,386 - modelscope - INFO - epoch [1][4115/4953]\tlr: 1.645e-05, memory: 9082, loss: 0.4558\n", + "2023-07-02 19:30:15,808 - modelscope - INFO - epoch [1][4120/4953]\tlr: 1.638e-05, memory: 9082, loss: 1.3715\n", + "2023-07-02 19:30:21,186 - modelscope - INFO - epoch [1][4125/4953]\tlr: 1.631e-05, memory: 9082, loss: 1.4497\n", + "2023-07-02 19:30:26,639 - modelscope - INFO - epoch [1][4130/4953]\tlr: 1.623e-05, memory: 9082, loss: 1.0819\n", + "2023-07-02 19:30:32,756 - modelscope - INFO - epoch [1][4135/4953]\tlr: 1.616e-05, memory: 9082, loss: 0.5440\n", + "2023-07-02 19:30:39,286 - modelscope - INFO - epoch [1][4140/4953]\tlr: 1.609e-05, memory: 9082, loss: 1.7625\n", + "2023-07-02 19:30:45,148 - modelscope - INFO - epoch [1][4145/4953]\tlr: 1.602e-05, memory: 9082, loss: 1.4341\n", + "2023-07-02 19:30:49,574 - modelscope - INFO - epoch [1][4150/4953]\tlr: 1.595e-05, memory: 9082, loss: 1.2615\n", + "2023-07-02 19:30:56,310 - modelscope - INFO - epoch [1][4155/4953]\tlr: 1.588e-05, memory: 9082, loss: 1.1409\n", + "2023-07-02 19:31:00,158 - modelscope - INFO - epoch [1][4160/4953]\tlr: 1.580e-05, memory: 9082, loss: 1.3609\n", + "2023-07-02 19:31:06,731 - modelscope - INFO - epoch [1][4165/4953]\tlr: 1.573e-05, memory: 9082, loss: 1.5992\n", + "2023-07-02 19:31:10,582 - modelscope - INFO - epoch [1][4170/4953]\tlr: 1.566e-05, memory: 9082, loss: 1.2750\n", + "2023-07-02 19:31:17,613 - modelscope - INFO - epoch [1][4175/4953]\tlr: 1.560e-05, memory: 9082, loss: 1.5521\n", + "2023-07-02 19:31:21,814 - modelscope - INFO - epoch [1][4180/4953]\tlr: 1.553e-05, memory: 9082, loss: 2.2871\n", + "2023-07-02 19:31:28,108 - modelscope - INFO - epoch [1][4185/4953]\tlr: 1.546e-05, memory: 9082, loss: 1.4199\n", + "2023-07-02 19:31:31,428 - modelscope - INFO - epoch [1][4190/4953]\tlr: 1.539e-05, memory: 9082, loss: 1.6801\n", + "2023-07-02 19:31:36,958 - modelscope - INFO - epoch [1][4195/4953]\tlr: 1.532e-05, memory: 9082, loss: 1.2423\n", + "2023-07-02 19:31:43,408 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:16<00:00, 2.04it/s]\n", + "2023-07-02 19:33:59,477 - modelscope - INFO - Saving checkpoint at 4200 iter\n", + "2023-07-02 19:33:59,518 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_4000\n", + "2023-07-02 19:33:59,522 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8095, evaluation/loss: 1.2465, loss: 1.5236\n", + "2023-07-02 19:34:03,568 - modelscope - INFO - epoch [1][4205/4953]\tlr: 1.519e-05, memory: 9082, loss: 1.0014\n", + "2023-07-02 19:34:10,609 - modelscope - INFO - epoch [1][4210/4953]\tlr: 1.512e-05, memory: 9082, loss: 0.5158\n", + "2023-07-02 19:34:17,669 - modelscope - INFO - epoch [1][4215/4953]\tlr: 1.506e-05, memory: 9082, loss: 1.1637\n", + "2023-07-02 19:34:24,176 - modelscope - INFO - epoch [1][4220/4953]\tlr: 1.499e-05, memory: 9082, loss: 0.9216\n", + "2023-07-02 19:34:30,303 - modelscope - INFO - epoch [1][4225/4953]\tlr: 1.492e-05, memory: 9082, loss: 0.5468\n", + "2023-07-02 19:34:36,913 - modelscope - INFO - epoch [1][4230/4953]\tlr: 1.486e-05, memory: 9082, loss: 1.0229\n", + "2023-07-02 19:34:42,449 - modelscope - INFO - epoch [1][4235/4953]\tlr: 1.480e-05, memory: 9082, loss: 0.8887\n", + "2023-07-02 19:34:51,187 - modelscope - INFO - epoch [1][4240/4953]\tlr: 1.473e-05, memory: 9082, loss: 1.1398\n", + "2023-07-02 19:34:55,850 - modelscope - INFO - epoch [1][4245/4953]\tlr: 1.467e-05, memory: 9082, loss: 1.8500\n", + "2023-07-02 19:35:01,653 - modelscope - INFO - epoch [1][4250/4953]\tlr: 1.460e-05, memory: 9082, loss: 1.2860\n", + "2023-07-02 19:35:07,538 - modelscope - INFO - epoch [1][4255/4953]\tlr: 1.454e-05, memory: 9082, loss: 0.9241\n", + "2023-07-02 19:35:10,832 - modelscope - INFO - epoch [1][4260/4953]\tlr: 1.448e-05, memory: 9082, loss: 1.5016\n", + "2023-07-02 19:35:15,940 - modelscope - INFO - epoch [1][4265/4953]\tlr: 1.442e-05, memory: 9082, loss: 1.1250\n", + "2023-07-02 19:35:21,080 - modelscope - INFO - epoch [1][4270/4953]\tlr: 1.436e-05, memory: 9082, loss: 1.0505\n", + "2023-07-02 19:35:26,817 - modelscope - INFO - epoch [1][4275/4953]\tlr: 1.429e-05, memory: 9082, loss: 1.0356\n", + "2023-07-02 19:35:36,012 - modelscope - INFO - epoch [1][4280/4953]\tlr: 1.423e-05, memory: 9082, loss: 0.9335\n", + "2023-07-02 19:35:42,237 - modelscope - INFO - epoch [1][4285/4953]\tlr: 1.417e-05, memory: 9082, loss: 0.5855\n", + "2023-07-02 19:35:46,223 - modelscope - INFO - epoch [1][4290/4953]\tlr: 1.411e-05, memory: 9082, loss: 1.2945\n", + "2023-07-02 19:35:52,610 - modelscope - INFO - epoch [1][4295/4953]\tlr: 1.405e-05, memory: 9082, loss: 0.9766\n", + "2023-07-02 19:35:59,125 - modelscope - INFO - epoch [1][4300/4953]\tlr: 1.400e-05, memory: 9082, loss: 1.6789\n", + "2023-07-02 19:36:03,214 - modelscope - INFO - epoch [1][4305/4953]\tlr: 1.394e-05, memory: 9082, loss: 1.5262\n", + "2023-07-02 19:36:08,897 - modelscope - INFO - epoch [1][4310/4953]\tlr: 1.388e-05, memory: 9082, loss: 1.0785\n", + "2023-07-02 19:36:15,128 - modelscope - INFO - epoch [1][4315/4953]\tlr: 1.382e-05, memory: 9082, loss: 0.6479\n", + "2023-07-02 19:36:21,607 - modelscope - INFO - epoch [1][4320/4953]\tlr: 1.376e-05, memory: 9082, loss: 1.8496\n", + "2023-07-02 19:36:29,617 - modelscope - INFO - epoch [1][4325/4953]\tlr: 1.371e-05, memory: 9082, loss: 0.5391\n", + "2023-07-02 19:36:35,101 - modelscope - INFO - epoch [1][4330/4953]\tlr: 1.365e-05, memory: 9082, loss: 1.8141\n", + "2023-07-02 19:36:41,579 - modelscope - INFO - epoch [1][4335/4953]\tlr: 1.359e-05, memory: 9082, loss: 0.6881\n", + "2023-07-02 19:36:48,569 - modelscope - INFO - epoch [1][4340/4953]\tlr: 1.354e-05, memory: 9082, loss: 0.6677\n", + "2023-07-02 19:36:55,362 - modelscope - INFO - epoch [1][4345/4953]\tlr: 1.348e-05, memory: 9082, loss: 0.7067\n", + "2023-07-02 19:37:01,199 - modelscope - INFO - epoch [1][4350/4953]\tlr: 1.343e-05, memory: 9082, loss: 1.3036\n", + "2023-07-02 19:37:06,752 - modelscope - INFO - epoch [1][4355/4953]\tlr: 1.337e-05, memory: 9082, loss: 0.5832\n", + "2023-07-02 19:37:11,013 - modelscope - INFO - epoch [1][4360/4953]\tlr: 1.332e-05, memory: 9082, loss: 0.9969\n", + "2023-07-02 19:37:15,110 - modelscope - INFO - epoch [1][4365/4953]\tlr: 1.326e-05, memory: 9082, loss: 1.6590\n", + "2023-07-02 19:37:22,411 - modelscope - INFO - epoch [1][4370/4953]\tlr: 1.321e-05, memory: 9082, loss: 0.8229\n", + "2023-07-02 19:37:29,106 - modelscope - INFO - epoch [1][4375/4953]\tlr: 1.316e-05, memory: 9082, loss: 1.3289\n", + "2023-07-02 19:37:33,326 - modelscope - INFO - epoch [1][4380/4953]\tlr: 1.311e-05, memory: 9082, loss: 1.0410\n", + "2023-07-02 19:37:38,513 - modelscope - INFO - epoch [1][4385/4953]\tlr: 1.305e-05, memory: 9082, loss: 0.6374\n", + "2023-07-02 19:37:42,903 - modelscope - INFO - epoch [1][4390/4953]\tlr: 1.300e-05, memory: 9082, loss: 2.6094\n", + "2023-07-02 19:37:46,474 - modelscope - INFO - epoch [1][4395/4953]\tlr: 1.295e-05, memory: 9082, loss: 1.7327\n", + "2023-07-02 19:37:53,357 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:16<00:00, 2.03it/s]\n", + "2023-07-02 19:40:09,626 - modelscope - INFO - Saving checkpoint at 4400 iter\n", + "2023-07-02 19:40:09,667 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter3800_acc0.8098996877670288\n", + "2023-07-02 19:40:09,672 - modelscope - INFO - Saving checkpoint at 4400 iter\n", + "2023-07-02 19:40:09,712 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_4200\n", + "2023-07-02 19:40:09,717 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8100, evaluation/loss: 1.2437, loss: 1.0930\n", + "2023-07-02 19:40:15,785 - modelscope - INFO - epoch [1][4405/4953]\tlr: 1.285e-05, memory: 9082, loss: 0.5974\n", + "2023-07-02 19:40:23,144 - modelscope - INFO - epoch [1][4410/4953]\tlr: 1.280e-05, memory: 9082, loss: 1.0870\n", + "2023-07-02 19:40:28,966 - modelscope - INFO - epoch [1][4415/4953]\tlr: 1.275e-05, memory: 9082, loss: 1.0536\n", + "2023-07-02 19:40:35,092 - modelscope - INFO - epoch [1][4420/4953]\tlr: 1.270e-05, memory: 9082, loss: 1.4613\n", + "2023-07-02 19:40:41,826 - modelscope - INFO - epoch [1][4425/4953]\tlr: 1.265e-05, memory: 9082, loss: 0.8297\n", + "2023-07-02 19:40:46,568 - modelscope - INFO - epoch [1][4430/4953]\tlr: 1.261e-05, memory: 9082, loss: 2.0414\n", + "2023-07-02 19:40:53,278 - modelscope - INFO - epoch [1][4435/4953]\tlr: 1.256e-05, memory: 9082, loss: 1.1800\n", + "2023-07-02 19:40:58,208 - modelscope - INFO - epoch [1][4440/4953]\tlr: 1.251e-05, memory: 9082, loss: 0.8595\n", + "2023-07-02 19:41:04,905 - modelscope - INFO - epoch [1][4445/4953]\tlr: 1.246e-05, memory: 9082, loss: 0.0801\n", + "2023-07-02 19:41:08,125 - modelscope - INFO - epoch [1][4450/4953]\tlr: 1.242e-05, memory: 9082, loss: 1.7031\n", + "2023-07-02 19:41:13,374 - modelscope - INFO - epoch [1][4455/4953]\tlr: 1.237e-05, memory: 9082, loss: 1.8381\n", + "2023-07-02 19:41:17,994 - modelscope - INFO - epoch [1][4460/4953]\tlr: 1.233e-05, memory: 9082, loss: 1.1123\n", + "2023-07-02 19:41:21,181 - modelscope - INFO - epoch [1][4465/4953]\tlr: 1.228e-05, memory: 9082, loss: 2.0922\n", + "2023-07-02 19:41:27,252 - modelscope - INFO - epoch [1][4470/4953]\tlr: 1.224e-05, memory: 9082, loss: 0.8977\n", + "2023-07-02 19:41:31,600 - modelscope - INFO - epoch [1][4475/4953]\tlr: 1.219e-05, memory: 9082, loss: 0.9191\n", + "2023-07-02 19:41:36,554 - modelscope - INFO - epoch [1][4480/4953]\tlr: 1.215e-05, memory: 9082, loss: 1.9734\n", + "2023-07-02 19:41:42,916 - modelscope - INFO - epoch [1][4485/4953]\tlr: 1.210e-05, memory: 9082, loss: 0.7236\n", + "2023-07-02 19:41:49,532 - modelscope - INFO - epoch [1][4490/4953]\tlr: 1.206e-05, memory: 9082, loss: 1.5750\n", + "2023-07-02 19:41:55,282 - modelscope - INFO - epoch [1][4495/4953]\tlr: 1.202e-05, memory: 9082, loss: 0.9306\n", + "2023-07-02 19:42:01,377 - modelscope - INFO - epoch [1][4500/4953]\tlr: 1.198e-05, memory: 9082, loss: 1.9801\n", + "2023-07-02 19:42:05,379 - modelscope - INFO - epoch [1][4505/4953]\tlr: 1.193e-05, memory: 9082, loss: 2.3320\n", + "2023-07-02 19:42:11,849 - modelscope - INFO - epoch [1][4510/4953]\tlr: 1.189e-05, memory: 9082, loss: 1.3637\n", + "2023-07-02 19:42:18,695 - modelscope - INFO - epoch [1][4515/4953]\tlr: 1.185e-05, memory: 9082, loss: 1.5328\n", + "2023-07-02 19:42:26,045 - modelscope - INFO - epoch [1][4520/4953]\tlr: 1.181e-05, memory: 9082, loss: 1.0721\n", + "2023-07-02 19:42:32,060 - modelscope - INFO - epoch [1][4525/4953]\tlr: 1.177e-05, memory: 9082, loss: 1.1867\n", + "2023-07-02 19:42:38,307 - modelscope - INFO - epoch [1][4530/4953]\tlr: 1.173e-05, memory: 9082, loss: 1.3500\n", + "2023-07-02 19:42:46,137 - modelscope - INFO - epoch [1][4535/4953]\tlr: 1.169e-05, memory: 9082, loss: 0.7637\n", + "2023-07-02 19:42:52,814 - modelscope - INFO - epoch [1][4540/4953]\tlr: 1.165e-05, memory: 9082, loss: 0.8551\n", + "2023-07-02 19:43:00,111 - modelscope - INFO - epoch [1][4545/4953]\tlr: 1.162e-05, memory: 9082, loss: 1.3265\n", + "2023-07-02 19:43:06,301 - modelscope - INFO - epoch [1][4550/4953]\tlr: 1.158e-05, memory: 9082, loss: 0.6115\n", + "2023-07-02 19:43:10,926 - modelscope - INFO - epoch [1][4555/4953]\tlr: 1.154e-05, memory: 9082, loss: 1.8475\n", + "2023-07-02 19:43:17,954 - modelscope - INFO - epoch [1][4560/4953]\tlr: 1.150e-05, memory: 9082, loss: 1.3332\n", + "2023-07-02 19:43:22,493 - modelscope - INFO - epoch [1][4565/4953]\tlr: 1.147e-05, memory: 9082, loss: 1.9062\n", + "2023-07-02 19:43:28,213 - modelscope - INFO - epoch [1][4570/4953]\tlr: 1.143e-05, memory: 9082, loss: 0.6227\n", + "2023-07-02 19:43:34,862 - modelscope - INFO - epoch [1][4575/4953]\tlr: 1.140e-05, memory: 9082, loss: 0.7937\n", + "2023-07-02 19:43:40,905 - modelscope - INFO - epoch [1][4580/4953]\tlr: 1.136e-05, memory: 9082, loss: 1.4903\n", + "2023-07-02 19:43:47,007 - modelscope - INFO - epoch [1][4585/4953]\tlr: 1.133e-05, memory: 9082, loss: 1.0449\n", + "2023-07-02 19:43:52,730 - modelscope - INFO - epoch [1][4590/4953]\tlr: 1.129e-05, memory: 9082, loss: 1.0068\n", + "2023-07-02 19:43:56,715 - modelscope - INFO - epoch [1][4595/4953]\tlr: 1.126e-05, memory: 9082, loss: 1.5157\n", + "2023-07-02 19:44:04,629 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 19:46:20,481 - modelscope - INFO - Saving checkpoint at 4600 iter\n", + "2023-07-02 19:46:20,521 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_4400\n", + "2023-07-02 19:46:20,526 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8098, evaluation/loss: 1.2390, loss: 1.1334\n", + "2023-07-02 19:46:25,140 - modelscope - INFO - epoch [1][4605/4953]\tlr: 1.119e-05, memory: 9082, loss: 1.6938\n", + "2023-07-02 19:46:30,413 - modelscope - INFO - epoch [1][4610/4953]\tlr: 1.116e-05, memory: 9082, loss: 2.1351\n", + "2023-07-02 19:46:37,216 - modelscope - INFO - epoch [1][4615/4953]\tlr: 1.113e-05, memory: 9082, loss: 0.9270\n", + "2023-07-02 19:46:43,728 - modelscope - INFO - epoch [1][4620/4953]\tlr: 1.110e-05, memory: 9082, loss: 1.1201\n", + "2023-07-02 19:46:50,227 - modelscope - INFO - epoch [1][4625/4953]\tlr: 1.107e-05, memory: 9082, loss: 1.2715\n", + "2023-07-02 19:46:53,772 - modelscope - INFO - epoch [1][4630/4953]\tlr: 1.103e-05, memory: 9082, loss: 1.4461\n", + "2023-07-02 19:46:59,663 - modelscope - INFO - epoch [1][4635/4953]\tlr: 1.100e-05, memory: 9082, loss: 1.2715\n", + "2023-07-02 19:47:06,614 - modelscope - INFO - epoch [1][4640/4953]\tlr: 1.097e-05, memory: 9082, loss: 0.6478\n", + "2023-07-02 19:47:14,999 - modelscope - INFO - epoch [1][4645/4953]\tlr: 1.094e-05, memory: 9082, loss: 1.0031\n", + "2023-07-02 19:47:19,690 - modelscope - INFO - epoch [1][4650/4953]\tlr: 1.092e-05, memory: 9082, loss: 1.0572\n", + "2023-07-02 19:47:27,827 - modelscope - INFO - epoch [1][4655/4953]\tlr: 1.089e-05, memory: 9082, loss: 0.9459\n", + "2023-07-02 19:47:33,520 - modelscope - INFO - epoch [1][4660/4953]\tlr: 1.086e-05, memory: 9082, loss: 0.9813\n", + "2023-07-02 19:47:39,880 - modelscope - INFO - epoch [1][4665/4953]\tlr: 1.083e-05, memory: 9082, loss: 1.3258\n", + "2023-07-02 19:47:46,513 - modelscope - INFO - epoch [1][4670/4953]\tlr: 1.080e-05, memory: 9082, loss: 1.2884\n", + "2023-07-02 19:47:51,769 - modelscope - INFO - epoch [1][4675/4953]\tlr: 1.078e-05, memory: 9082, loss: 1.6375\n", + "2023-07-02 19:47:57,474 - modelscope - INFO - epoch [1][4680/4953]\tlr: 1.075e-05, memory: 9082, loss: 0.9726\n", + "2023-07-02 19:48:02,354 - modelscope - INFO - epoch [1][4685/4953]\tlr: 1.073e-05, memory: 9082, loss: 1.1402\n", + "2023-07-02 19:48:09,946 - modelscope - INFO - epoch [1][4690/4953]\tlr: 1.070e-05, memory: 9082, loss: 0.9941\n", + "2023-07-02 19:48:16,660 - modelscope - INFO - epoch [1][4695/4953]\tlr: 1.068e-05, memory: 9082, loss: 1.5975\n", + "2023-07-02 19:48:22,892 - modelscope - INFO - epoch [1][4700/4953]\tlr: 1.065e-05, memory: 9082, loss: 0.9816\n", + "2023-07-02 19:48:28,221 - modelscope - INFO - epoch [1][4705/4953]\tlr: 1.063e-05, memory: 9082, loss: 0.9115\n", + "2023-07-02 19:48:35,152 - modelscope - INFO - epoch [1][4710/4953]\tlr: 1.060e-05, memory: 9082, loss: 1.4184\n", + "2023-07-02 19:48:40,666 - modelscope - INFO - epoch [1][4715/4953]\tlr: 1.058e-05, memory: 9082, loss: 1.6391\n", + "2023-07-02 19:48:46,682 - modelscope - INFO - epoch [1][4720/4953]\tlr: 1.056e-05, memory: 9082, loss: 2.1836\n", + "2023-07-02 19:48:53,274 - modelscope - INFO - epoch [1][4725/4953]\tlr: 1.054e-05, memory: 9082, loss: 1.1783\n", + "2023-07-02 19:48:56,851 - modelscope - INFO - epoch [1][4730/4953]\tlr: 1.051e-05, memory: 9082, loss: 1.0398\n", + "2023-07-02 19:49:03,951 - modelscope - INFO - epoch [1][4735/4953]\tlr: 1.049e-05, memory: 9082, loss: 0.4896\n", + "2023-07-02 19:49:09,418 - modelscope - INFO - epoch [1][4740/4953]\tlr: 1.047e-05, memory: 9082, loss: 0.8757\n", + "2023-07-02 19:49:15,768 - modelscope - INFO - epoch [1][4745/4953]\tlr: 1.045e-05, memory: 9082, loss: 1.5896\n", + "2023-07-02 19:49:21,308 - modelscope - INFO - epoch [1][4750/4953]\tlr: 1.043e-05, memory: 9082, loss: 1.3535\n", + "2023-07-02 19:49:27,455 - modelscope - INFO - epoch [1][4755/4953]\tlr: 1.041e-05, memory: 9082, loss: 1.3389\n", + "2023-07-02 19:49:34,436 - modelscope - INFO - epoch [1][4760/4953]\tlr: 1.039e-05, memory: 9082, loss: 0.6073\n", + "2023-07-02 19:49:42,538 - modelscope - INFO - epoch [1][4765/4953]\tlr: 1.037e-05, memory: 9082, loss: 0.6708\n", + "2023-07-02 19:49:49,238 - modelscope - INFO - epoch [1][4770/4953]\tlr: 1.036e-05, memory: 9082, loss: 0.8630\n", + "2023-07-02 19:49:55,165 - modelscope - INFO - epoch [1][4775/4953]\tlr: 1.034e-05, memory: 9082, loss: 0.7835\n", + "2023-07-02 19:50:01,434 - modelscope - INFO - epoch [1][4780/4953]\tlr: 1.032e-05, memory: 9082, loss: 1.7195\n", + "2023-07-02 19:50:08,788 - modelscope - INFO - epoch [1][4785/4953]\tlr: 1.030e-05, memory: 9082, loss: 1.1434\n", + "2023-07-02 19:50:14,523 - modelscope - INFO - epoch [1][4790/4953]\tlr: 1.029e-05, memory: 9082, loss: 0.6416\n", + "2023-07-02 19:50:21,717 - modelscope - INFO - epoch [1][4795/4953]\tlr: 1.027e-05, memory: 9082, loss: 1.0909\n", + "2023-07-02 19:50:25,524 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 277/277 [02:15<00:00, 2.04it/s]\n", + "2023-07-02 19:52:41,308 - modelscope - INFO - Saving checkpoint at 4800 iter\n", + "2023-07-02 19:52:41,348 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/best_iter4400_acc0.8100214004516602\n", + "2023-07-02 19:52:41,353 - modelscope - INFO - Saving checkpoint at 4800 iter\n", + "2023-07-02 19:52:41,392 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_4600\n", + "2023-07-02 19:52:41,397 - modelscope - INFO - epoch(eval) [1][277]\tmemory: 9082, evaluation/acc: 0.8101, evaluation/loss: 1.2370, loss: 1.1855\n", + "2023-07-02 19:52:47,709 - modelscope - INFO - epoch [1][4805/4953]\tlr: 1.024e-05, memory: 9082, loss: 0.8004\n", + "2023-07-02 19:52:53,162 - modelscope - INFO - epoch [1][4810/4953]\tlr: 1.023e-05, memory: 9082, loss: 1.1193\n", + "2023-07-02 19:53:00,428 - modelscope - INFO - epoch [1][4815/4953]\tlr: 1.021e-05, memory: 9082, loss: 0.8555\n", + "2023-07-02 19:53:03,760 - modelscope - INFO - epoch [1][4820/4953]\tlr: 1.020e-05, memory: 9082, loss: 1.4422\n", + "2023-07-02 19:53:09,302 - modelscope - INFO - epoch [1][4825/4953]\tlr: 1.019e-05, memory: 9082, loss: 1.5247\n", + "2023-07-02 19:53:17,785 - modelscope - INFO - epoch [1][4830/4953]\tlr: 1.017e-05, memory: 9082, loss: 0.5462\n", + "2023-07-02 19:53:24,406 - modelscope - INFO - epoch [1][4835/4953]\tlr: 1.016e-05, memory: 9082, loss: 1.0023\n", + "2023-07-02 19:53:29,386 - modelscope - INFO - epoch [1][4840/4953]\tlr: 1.015e-05, memory: 9082, loss: 1.3645\n", + "2023-07-02 19:53:34,231 - modelscope - INFO - epoch [1][4845/4953]\tlr: 1.014e-05, memory: 9082, loss: 0.9927\n", + "2023-07-02 19:53:40,558 - modelscope - INFO - epoch [1][4850/4953]\tlr: 1.013e-05, memory: 9082, loss: 2.0516\n", + "2023-07-02 19:53:47,846 - modelscope - INFO - epoch [1][4855/4953]\tlr: 1.012e-05, memory: 9082, loss: 0.7750\n", + "2023-07-02 19:53:52,341 - modelscope - INFO - epoch [1][4860/4953]\tlr: 1.011e-05, memory: 9082, loss: 1.4390\n", + "2023-07-02 19:53:57,172 - modelscope - INFO - epoch [1][4865/4953]\tlr: 1.010e-05, memory: 9082, loss: 1.0197\n", + "2023-07-02 19:54:02,776 - modelscope - INFO - epoch [1][4870/4953]\tlr: 1.009e-05, memory: 9082, loss: 0.7660\n", + "2023-07-02 19:54:08,311 - modelscope - INFO - epoch [1][4875/4953]\tlr: 1.008e-05, memory: 9082, loss: 0.8775\n", + "2023-07-02 19:54:14,394 - modelscope - INFO - epoch [1][4880/4953]\tlr: 1.007e-05, memory: 9082, loss: 1.3374\n", + "2023-07-02 19:54:20,602 - modelscope - INFO - epoch [1][4885/4953]\tlr: 1.006e-05, memory: 9082, loss: 1.0018\n", + "2023-07-02 19:54:28,123 - modelscope - INFO - epoch [1][4890/4953]\tlr: 1.006e-05, memory: 9082, loss: 1.4156\n", + "2023-07-02 19:54:34,101 - modelscope - INFO - epoch [1][4895/4953]\tlr: 1.005e-05, memory: 9082, loss: 1.4742\n", + "2023-07-02 19:54:39,802 - modelscope - INFO - epoch [1][4900/4953]\tlr: 1.004e-05, memory: 9082, loss: 1.2737\n", + "2023-07-02 19:54:45,785 - modelscope - INFO - epoch [1][4905/4953]\tlr: 1.004e-05, memory: 9082, loss: 1.2928\n", + "2023-07-02 19:54:52,274 - modelscope - INFO - epoch [1][4910/4953]\tlr: 1.003e-05, memory: 9082, loss: 0.9859\n", + "2023-07-02 19:54:57,409 - modelscope - INFO - epoch [1][4915/4953]\tlr: 1.003e-05, memory: 9082, loss: 1.8160\n", + "2023-07-02 19:55:04,217 - modelscope - INFO - epoch [1][4920/4953]\tlr: 1.002e-05, memory: 9082, loss: 0.9310\n", + "2023-07-02 19:55:09,704 - modelscope - INFO - epoch [1][4925/4953]\tlr: 1.002e-05, memory: 9082, loss: 1.1717\n", + "2023-07-02 19:55:15,079 - modelscope - INFO - epoch [1][4930/4953]\tlr: 1.001e-05, memory: 9082, loss: 1.8821\n", + "2023-07-02 19:55:19,843 - modelscope - INFO - epoch [1][4935/4953]\tlr: 1.001e-05, memory: 9082, loss: 0.7700\n", + "2023-07-02 19:55:24,826 - modelscope - INFO - epoch [1][4940/4953]\tlr: 1.001e-05, memory: 9082, loss: 1.1562\n", + "2023-07-02 19:55:29,831 - modelscope - INFO - epoch [1][4945/4953]\tlr: 1.000e-05, memory: 9082, loss: 1.2777\n", + "2023-07-02 19:55:34,919 - modelscope - INFO - epoch [1][4950/4953]\tlr: 1.000e-05, memory: 9082, loss: 0.9414\n", + "2023-07-02 19:55:38,429 - modelscope - INFO - Saving checkpoint at 4953 iter\n", + "2023-07-02 19:55:38,697 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449/iter_4800\n", + "2023-07-02 19:55:38,741 - modelscope - INFO - Train finished. Uploading models, waiting...\n", + "2023-07-02 19:55:38,823 - modelscope - INFO - {'done': True}\n" + ] + } + ], + "source": [ + "def cfg_modify_fn(cfg: Config) -> Config:\n", + " cfg.update(CONFIG)\n", + " return cfg\n", + "\n", + "\n", + "trainer = EpochBasedTrainer(\n", + " model=model,\n", + " cfg_file=cfg_file,\n", + " data_collator=data_collate_fn,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=val_dataset,\n", + " remove_unused_data=True,\n", + " seed=42,\n", + " cfg_modify_fn=cfg_modify_fn,\n", + ")\n", + "\n", + "trainer.train()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 可视化\n", + "tensorboard 命令: (e.g.) \n", + "`tensorboard --logdir /home/hackathon/my_git/agent/runs/baichuan/v10-20230702-172449 --port 6006`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['lr', 'loss', 'evaluation/acc', 'evaluation/loss'])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsIAAAHDCAYAAAAupnzhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYsklEQVR4nO3dd3hUVf7H8fckIYWSgpGEQGjSFBAUJEYB1yUaFV1x/a2IKIoIiqBgUBELsMoKgihFiqAr7q4K6lppyoKIYgwQehFRQYomgCEFCGlzfn9cMjAaSii5mbmf1/PcZzIzZzLfyRX4ePK957iMMQYREREREYcJsLsAERERERE7KAiLiIiIiCMpCIuIiIiIIykIi4iIiIgjKQiLiIiIiCMpCIuIiIiIIykIi4iIiIgjKQiLiIiIiCMpCIuIiIiIIykIi4g40MyZM3G5XGzfvt3uUkREbKMgLCIiIiKOpCAsIiIiIo6kICwiIn9gjCE/P9/uMkREzikFYRERoUGDBtx444189tlntGvXjrCwMF599VW7yxIROacUhEVEBIAtW7bQvXt3rrnmGiZMmECbNm3sLklE5JwKsrsAERGpHH744QcWLFhAcnKy3aWIiFQIzQiLiAgADRs2VAgWEUdREBYREcAKwiIiTqIgLCIiAISFhdldgohIhVIQFhERERFHUhAWEREREUdSEBYRERERR1IQFhERERFHchljjN1FiIiIiIhUNM0Ii4iIiIgjKQiLiIiIiCMpCIuIiIiIIykIi4iIiIgjKQiLiIiIiCMpCIuIiIiIIwXZXYCvcbvd/PLLL9SoUQOXy2V3OSIiIiLyO8YY8vLyiIuLIyDg+PO+CsLl9MsvvxAfH293GSIiIiJyEjt37qRu3brHfV5BuJxq1KgBWD/Y8PBwm6sRERERkd/Lzc0lPj7ek9uOR0G4nErbIcLDwxWERURERCqxk7Wx6mI5EREREXEkBWERERERcSQFYRERERFxJAVhEREREXEkBWERERERcSQFYRERERFxJAVhEREREXEkBWERERERcSQFYRERERFxJAVhEREREXGk0wrCkydPpkGDBoSGhpKQkMDy5ctPOP69996jefPmhIaG0qpVK+bNm+f1vDGGYcOGUbt2bcLCwkhKSmLr1q1eY7KysujRowfh4eFERkbSu3dvDhw44Hn+8OHD3HPPPbRq1YqgoCC6du1aZi1Llizh0ksvJSQkhMaNGzNz5szT+RGIiIiIiI8rdxCePXs2KSkpDB8+nFWrVtG6dWuSk5PZs2dPmeO/+eYbunfvTu/evVm9ejVdu3ala9eubNiwwTNmzJgxTJw4kWnTppGWlka1atVITk7m8OHDnjE9evRg48aNLFy4kDlz5rB06VL69u3reb6kpISwsDAefvhhkpKSyqxl27ZtdOnShauvvpo1a9YwaNAg7rvvPj777LPy/hhERERExMe5jDGmPC9ISEjgsssu45VXXgHA7XYTHx/PQw89xBNPPPGH8d26dePgwYPMmTPH89jll19OmzZtmDZtGsYY4uLiGDx4MI8++igAOTk5xMTEMHPmTG6//XY2b97MRRddxIoVK2jXrh0ACxYs4IYbbmDXrl3ExcV5vec999xDdnY2H330kdfjQ4YMYe7cuV4h/Pbbbyc7O5sFCxac0ufPzc0lIiKCnJwcwsPDT+k1IiIiIlJxTjWvlWtGuLCwkPT0dK8Z14CAAJKSkkhNTS3zNampqX+YoU1OTvaM37ZtGxkZGV5jIiIiSEhI8IxJTU0lMjLSE4IBkpKSCAgIIC0t7ZTrP1ktjpSVCbt/hIJ8uysRERERqVBB5Rm8b98+SkpKiImJ8Xo8JiaG7777rszXZGRklDk+IyPD83zpYycaU6tWLe/Cg4KoWbOmZ8ypOF4tubm55OfnExYW9ofXFBQUUFBQ4Lmfm5t7yu9X6eXsg+fvhpJi636NmhAdB+fXOXJbF2o3gOi6EBhoa6kiIiIiZ1u5grATjRo1ir///e92l3Fu7Nl1NAQD5GVZx7YN3uMCq0BMPajd0ArGcY2gbhOoEVWh5YqIiIicTeUKwtHR0QQGBpKZmen1eGZmJrGxsWW+JjY29oTjS28zMzOpXbu215g2bdp4xvz+Yrzi4mKysrKO+77lqSU8PLzM2WCAoUOHkpKS4rmfm5tLfHz8Kb9npZafZ902aAF9RsK+X6zjt19g727I3AEZ26HwMPzyo3UcKyoG6jU7etRtCqFVK/xjiIiIiJyOcgXh4OBg2rZty6JFizzLk7ndbhYtWsSAAQPKfE1iYiKLFi1i0KBBnscWLlxIYmIiAA0bNiQ2NpZFixZ5gm9ubi5paWn069fP8z2ys7NJT0+nbdu2ACxevBi3201CQsIp15+YmPiHpduOraUsISEhhISEnPJ7+JT8I8vPVa0OVWscDbTHcrshK8MKxL9ug1+3Wz3Fe3fC/kzrWLvUGutyQe1GcEEraHSxdatZYxEREamkyt0akZKSwt133027du1o374948eP5+DBg/Tq1QuAnj17UqdOHUaNGgXAwIEDueqqqxg3bhxdunRh1qxZrFy5kunTpwPgcrkYNGgQI0eOpEmTJjRs2JBnnnmGuLg4T9i+8MILue666+jTpw/Tpk2jqKiIAQMGcPvtt3utGLFp0yYKCwvJysoiLy+PNWvWAHgC9gMPPMArr7zC448/zr333svixYt59913mTt37un+/HzboSNBOKz68ccEBFj9wtFx0PKKo48fPgg7t8KO72DHFti5BfbvOTpz/NVH1rha8dCoFVxwMTS9FMJrnrOPIyIiIlIe5Q7C3bp1Y+/evQwbNoyMjAzatGnDggULPBeh7dixg4CAo4tRXHHFFbz99ts8/fTTPPnkkzRp0oSPPvqIli1besY8/vjjHDx4kL59+5KdnU2HDh1YsGABoaGhnjFvvfUWAwYMoHPnzgQEBHDrrbcyceJEr9puuOEGfv75Z8/9Sy65BLA27ABr9nnu3Lk88sgjTJgwgbp16/Laa6+RnJxc3h+DfyhtjThRED6e0GrQpI11lMr5zeov/nEd/LTemkHes9M6vj0yEx93ATRvB80vg4YtIKjKmX4KERERkdNS7nWEnc6v1hH+7yT4+mO49k64/p6z//0P5sK2jfDTOti6FnZ97/18cCg0bgMXXgYtEiGqVpnfRkRERKQ8TjWvadUIJzt0BjPCp6JaOLRMtA6AA9mwJR2+W2Hd5u2HTd9ax38nWRfbtboCWnWA2PpWz7GIiIjIOaIg7GT5p9AjfDZVj4S2na3D7YZff4LvVsLGb2H7RmvGeNf3MH8mRNc5GorrX2j1KouIiIicRQrCTuZZNaJGxb93QADUaWwdnW+3Zoc3fgvrv4bvV8G+3fDFe9YRFQOX/Aku/bO1hrFmikVEROQsUBB2Ms+qEdXsrQOsZdYuv946Dh+yZorXfw0bU60l2hbPto6Y+nDp1VYojo47+fcVEREROQ4FYSfzrBphw4zwiYRWhTadrKOwwOohXvUFbEqDzJ+t1on5M62WifbJ1mxxRbV3iIiIiN9QEHayYzfUqKyCQ6DNVdaRfwDWL4P0xbB1Nfy82To+mgqtO0LCddZGHuonFhERkVOgIOxUhQVQXGR97SuzqWHVrRng9smQmwXp/4O0BdZW0Cv/Zx3n1T4y5jqIjLa7YhEREanEFISdqrQtIiAAQqraW8vpCK8JV98Gf/qbNSuctgBWL4HffrXaJj77l7UTXoebrbWKdYGdiIiI/I6CsFPlH7Ruw6r7dkh0uaDBRdbRtR+s+wq+nW/tbLfua+uIqQdX/gUuu8baEU9EREQEBWHnOlRJL5Q7EyFhcNm11vHLT/DNp7BiodU68cErMPd1aJcEHbtaq0+IiIiIoykIO1V+JVo67VyIawT/NxBuvM8Kw8s+sQLxsk+t46IEq62icWvfnhEXERGR06Yg7FSeIOxHM8JlCa1mzQB3uBl+WAtffQgbvrGWYtuUZm3ocfXfrFUpAvXHQURExEn0L79TlbZGVOal084mlwuatLGOvbvgyw9g+Wew+wf4zyiY8xp0ugUSu6iPWERExCG04KpTeWaEHRKEj3V+Xfi/h2H423D9Pdaudtl74ZPp8NydsOBfcDDX7ipFRETkHFMQdionB+FS1SLg2jvhmbfg9sFQK96aKf/sX1YgnvMa5O23u0oRERE5R9Qa4VSeXeX8vEf4VFQJhoTrrdUm1n0NC9+yVp1YNAuWfmi1S1x9mzboEBER8TMKwk7lWT7NwTPCvxcQaF0017oTbPwWFv4HdmyBpR9YK01cfh0k9VAgFhER8RMKwk6l1ojjc7mgZSK0uBy+XwWf/8faoGPZp9YFdlf+BTrfDtUj7a5UREREzoCCsFOpNeLkXC5o1tY6flgL896AbRtgyfuQOhc6/dVaek3/MyEiIuKTdLGcU3laI7RU2Clp3Boeehn6Pg91m0JBvtVL/Nyd8L+3rfsiIiLiUzQj7FRO2VDjbHK54ML20PwyWP81zJsJmT/D3H9a6xJf1xMuv0Ebc4iIiPgIzQg7UUnJ0RlMp2yocTa5XHBxR3h8OvR4AqLj4EA2vD8RxvSxdq4zxu4qRURE5CQUhJ2odDYYIFRB+LQFBEK7JHjin3DrQ9a6xHt2wuvDYPJg2PGd3RWKiIjICSgIO1H+kf7gkKoQGGhvLf4gMAg63AxPvQlJ3a11iX9cBy8PgH//A7Iy7K5QREREyqAg7ESeFSM0G3xWhVWHLr1h6Exod43VQrHqC3i+F3w6Aw4fsrtCEREROYaCsBMd0hrC51RULegxBFKmQJNLoKQIFs+GUb1gxUJwu+2uUERERFAQdqZ87SpXIeo2gX5j4L6REF0Hcn+Dt1+ASYOsHetERETEVgrCTpR/0LrVZhrnnstl7VA3ZAbceB8Eh8L2TTB+AMwaB3n77a5QRETEsRSEneiQZoQrXFCwtS3zkzOhbZK1vFrafHj+Hvjyv9aSdiIiIlKhFISdKF89wraJiIY7n4CHJ1itE4cPwkdT4aUH4efNdlcnIiLiKArCTuRZNUKtEbZp2AIeeQVue8Q6D7/8CBMehvcneK/zLCIiIueMgrATlbZGhFaztw6nCwiExC4w9A247FqrXWLZp9bqEumLtDudiIjIOaYg7ESaEa5cqkfCHY9D/xehVrx1Ad1/RsG0IbB3l93ViYiI+C0FYSdSj3Dl1LgNPPYqXH8PBFWB71fBmD6w4F9QXGR3dSIiIn5HQdiJFIQrr6BguPZOePw1aNbWCsCf/QvG9YOfv7O7OhEREb+iIOxEpT3Cao2ovM6vA/ePhrueslonMrZbF9N9/CoUHra7OhEREb+gIOw0xhzdUEMzwpWbywWXXg1DXoe2ncG4Ycl7MPZ++GGt3dWJiIj4PAVhpyk4ZAUqUBD2FdUj4M6h1lbNEdGwbzdMHgzvjbfWIRYREZHToiDsNIeO9AcHVYHgEHtrkfJpcTkMec1acg3gmznwwn2wKc3eukRERHyUgrDTeC6UU3+wTwqrbm3C8eCLcF5tyN4LM56CWeM0OywiIlJOCsJOk3/kQjm1Rfi2Jm3g8Rlw1a1WL3HafPUOi4iIlJOCsNOUtkZUVRD2ecGh0LWfNTtcMxayMmDKo/DRVCgssLs6ERGRSk9B2Gm0hrD/adwaHpsOl99grQry5X/hpX6wY4vdlYmIiFRqCsJOoyDsn0KrQrcU6PMPCD8PMnfAhIdg/ptQUmx3dSIiIpWSgrDTaDMN/3ZRAjw+HS75E7jd8Pm/YfxDkPGz3ZWJiIhUOgrCTqMZYf9XLQJ6Pg09n7L+h2fXVqtVYtknVuuEiIiIAArCzqMg7ByXXA2PvwbN20FRIbw/Ef45DA7k2F2ZiIhIpaAg7DSe1ggFYUeIOA/6PG+tLhFYBTakwti+sCXd7spERERspyDsNPlHNl3QhhrOERBgrTf8yCsQUw9yf4NpQ+CT6VBcZHd1IiIitlEQdhptqOFcdS6AlClw5U3W/S/ehQkPw56d9tYlIiJiEwVhp9GGGs4WHAr/NxDu/TtUC7cupBvXD76dpwvpRETEcRSEneZw6cVyao1wtFZXWptwNLkECg/D7Jfg38/D4YN2VyYiIlJhFISdpKjQOgDCqtlbi9gvIhoeeAFu7GP1Ea/+Al56EHb/YHdlIiIiFUJB2ElKV4xwBUBIVXtrkcohIAA6d4OHxkPk+bB3t7UBx7JP1SohIiJ+T0HYSTxtEdWsACRSqsFF8Oir0CLRWkni/Qnwr5FqlRAREb+mNOQkh7SZhpxAtXDo/Szc/AAEBMKaL60L6XZttbsyERGRc0JB2ElKWyN0oZwcj8sFf/o/eOhliKoF+36B8Q/D1x+rVUJERPyOgrCTlP6aW0unyck0uAgGT4OWiVBSBP+dBG8+p1YJERHxKwrCTnJIm2lIOVQLh3uPaZVYuxRe6g+/bre7MhERkbNCQdhJ8tUjLOVU2irx8Pgjq0rsgvEDYNUXdlcmIiJyxhSEnaQ0CFdVj7CUU/0LYfDUoxtw/Psf8OEUKCm2uzIREZHTpiDsJGqNkDNRPRIeGA1J3a37Sz+AyYMh5zdbyxIRETldCsJOotYIOVMBgdClN9z7dwitCts2wrgH4Md1dlcmIiJSbgrCTlI6I6xVI+RMtboSUqZA7QaQtx+mPApL3tcSayIi4lMUhJ0k/8jSV1pHWM6G8+vCwElw6Z/B7YaPp1lLrBXk212ZiIjIKVEQdpJ89QjLWRYSBncOhb8OOLrE2viHrI04REREKjkFYScp3WJZrRFyNrlc0LErDHgJatSEjO3wcn/Ykm53ZSIiIid0WkF48uTJNGjQgNDQUBISEli+fPkJx7/33ns0b96c0NBQWrVqxbx587yeN8YwbNgwateuTVhYGElJSWzdutVrTFZWFj169CA8PJzIyEh69+7NgQMHvMasW7eOjh07EhoaSnx8PGPGjPlDLePHj6dZs2aEhYURHx/PI488wuHDh0/nx+BbSkqg4JD1tVoj5Fxo2MLqG67X3OpHf3Wo+oZFRKRSK3cQnj17NikpKQwfPpxVq1bRunVrkpOT2bNnT5njv/nmG7p3707v3r1ZvXo1Xbt2pWvXrmzYsMEzZsyYMUycOJFp06aRlpZGtWrVSE5O9gqoPXr0YOPGjSxcuJA5c+awdOlS+vbt63k+NzeXa6+9lvr165Oens7YsWMZMWIE06dP94x5++23eeKJJxg+fDibN2/m9ddfZ/bs2Tz55JPl/TH4nsPH/E9DWDX76hD/FhltzQy3TwZzpG/4rRegsMDuykRERP7IlFP79u1N//79PfdLSkpMXFycGTVqVJnjb7vtNtOlSxevxxISEsz9999vjDHG7Xab2NhYM3bsWM/z2dnZJiQkxLzzzjvGGGM2bdpkALNixQrPmPnz5xuXy2V2795tjDFmypQpJioqyhQUFHjGDBkyxDRr1sxzv3///ubPf/6zVy0pKSnmyiuvPOXPn5OTYwCTk5Nzyq+pFPbsMmZQZ2OG3Gh3JeIEbrcxX35gTMo11n934/oZs3+P3VWJiIhDnGpeK9eMcGFhIenp6SQlJXkeCwgIICkpidTU1DJfk5qa6jUeIDk52TN+27ZtZGRkeI2JiIggISHBMyY1NZXIyEjatWvnGZOUlERAQABpaWmeMZ06dSI4ONjrfbZs2cL+/fsBuOKKK0hPT/e0cvz000/MmzePG264oTw/Bt+kNYSlIrlc0OkWeOAFayfDnd/DSw/CTxtO/loREZEKUq4gvG/fPkpKSoiJifF6PCYmhoyMjDJfk5GRccLxpbcnG1OrVi2v54OCgqhZs6bXmLK+x7Hvcccdd/Dss8/SoUMHqlSpwgUXXMCf/vSnE7ZGFBQUkJub63X4JAVhsUOTS6y+4bhGR9cb/maO3VWJiIgADls1YsmSJTz//PNMmTKFVatW8cEHHzB37lyee+65475m1KhRREREeI74+PgKrPgs8mymoQvlpIKdVxsengCtO0FJMbw33jpKiu2uTEREHK5cQTg6OprAwEAyMzO9Hs/MzCQ2NrbM18TGxp5wfOntycb8/mK84uJisrKyvMaU9T2OfY9nnnmGu+66i/vuu49WrVpxyy238PzzzzNq1CjcbneZ9Q8dOpScnBzPsXPnzjLHVXqaERY7hYTB3c/ADfdabRPfzIFpT8BBH/0Ni4iI+IVyBeHg4GDatm3LokWLPI+53W4WLVpEYmJima9JTEz0Gg+wcOFCz/iGDRsSGxvrNSY3N5e0tDTPmMTERLKzs0lPP7ou6eLFi3G73SQkJHjGLF26lKKiIq/3adasGVFRUQAcOnSIgADvjxwYGAhYS7iVJSQkhPDwcK/DJykIi91cLrjmDuj9nBWMf1gDEx6CPbvsrkxERByq3K0RKSkpzJgxgzfffJPNmzfTr18/Dh48SK9evQDo2bMnQ4cO9YwfOHAgCxYsYNy4cXz33XeMGDGClStXMmDAAABcLheDBg1i5MiRfPLJJ6xfv56ePXsSFxdH165dAbjwwgu57rrr6NOnD8uXL2fZsmUMGDCA22+/nbi4OMDq/w0ODqZ3795s3LiR2bNnM2HCBFJSUjy13HTTTUydOpVZs2axbds2Fi5cyDPPPMNNN93kCcR+65B2lZNKosXlVqtEVC3YuxvGD4DvV9ldlYiIONHpLEkxadIkU69ePRMcHGzat29vvv32W89zV111lbn77ru9xr/77rumadOmJjg42LRo0cLMnTvX63m3222eeeYZExMTY0JCQkznzp3Nli1bvMb89ttvpnv37qZ69eomPDzc9OrVy+Tl5XmNWbt2renQoYMJCQkxderUMaNHj/Z6vqioyIwYMcJccMEFJjQ01MTHx5sHH3zQ7N+//5Q/u88unzb7ZWsZqwX/srsSEUtuljHjH7L+u0y5xphln9pdkYiI+IlTzWsuY7TtU3nk5uYSERFBTk6Ob7VJvPkcrPkSbulvLWslUhkUFcLscZB+pDWq4y1w8wPg77+hERGRc+pU85qjVo1wtEPqEZZKqEow9HjCuogO4KsP4bWnj/a0i4iInEMKwk5RusWylk+Tyqb0Irp7hkGVEPhuBUwYCPt+sbsyERHxcwrCTuGZEa5mbx0ix9O6Ezz0MkScB5k/w/iH4Mf1dlclIiJ+TEHYKfJLV43QjLBUYvFNYdBkqNsUDubA1MdgxUK7qxIRET+lIOwExhztuayqHmGp5CKj4aGXju5E9/YL8Nm/rf+ORUREziIFYScoyIfSnfN0sZz4guBQ6Pk0/LmbdX/Bm/DOWCguOvHrREREykFB2AlK2yICq1gXI4n4goAAuKkP/G2Q9fWKz2H6k1pRQkREzhoFYSfIP2jdVq1uXaEv4kuuuBHuG2lty7x1tbWiRFaG3VWJiIgfUBB2Am2vLL7uwvbw0HiIiD66osSOLXZXJSIiPk5B2AnytZmG+IE6F8CgSRDXCPL2w+TBsOEbu6sSEREfpiDsBPnaTEP8ROT51lrDzdtB4WH453BY+qHdVYmIiI9SEHYCT2uENtMQPxBazeoZTuxiLan24WT4cAq4S+yuTEREfIyCsBN4WiM0Iyx+IjDIWk3ixj7W/aUfwBt/t2aJRURETpGCsBNoMw3xRy4XdO4GPZ+CoCpWv/CUx+BAjt2ViYiIj1AQdoJDulhO/NglV0O/MVYP/M+bYaKWVxMRkVOjIOwEpRtqqDVC/FWjVtbyapHnw95dMP5h2LXV7qpERKSSUxB2ArVGiBPE1oeBpcurZcErg2FLut1ViYhIJaYg7ARqjRCniIyGAS9B49ZQcMjakjl9kd1ViYhIJaUg7ATaUEOcJKw63D8KLvmTtaTaf0bB4tnWUmsiIiLHUBB2Am2oIU4TFAx3PglX3Wrd/3QGfDQV3G576xIRkUpFQdjfFRdCUYH1tWaExUkCAqBrP/jL/db9pR/Av0ZCUaG9dYmISKWhIOzvSvuDXS4IqWpvLSJ2uPpvcNeT1iYca5fCq08c/S2JiIg4moKwvzu2PzhAp1sc6tI/W33DIVXhx3UwcRBk77W7KhERsZmSkb8rDcKh1eytQ8RuTS6Bh16G8PMgY7u18caenXZXJSIiNlIQ9neHjmymoQvlRKDOBTBwIpxfF/bvsWaGd2yxuyoREbGJgrC/09JpIt5qxsDD4yG+KRzMgSmPauMNERGHUhD2dwrCIn9UPRIefNFqlyjIhxlPw5ov7a5KREQqmIKwv/O0RigIi3gJrQp9/wGtO0FJkbW02rJP7a5KREQqkIKwv/PMCKtHWOQPgoKh51OQeKO189z7E+Dz/2gXOhERh1AQ9neeXeU0IyxSpoBA+NtAuKaHdX/+TPhwsnahExFxAAVhf3dIPcIiJ+VywQ294Jb+1v2vPoK3RkNxka1liYjIuaUg7O/yj/QIqzVC5OQ63QJ3DrVmiVcthteHWRfTiYiIX1IQ9ndqjRApn7ad4b7noEoIfLcCpg2Bg7l2VyUiIueAgrC/U2uESPld2B4eHGttRLN9E7ySAtn77K5KRETOMgVhf6fWCJHT0+AiGPAyRJRuyfww7N1ld1UiInIWKQj7M3cJHD5kfa3WCJHyq90AHj5mS+ZJj8AvP9ldlYiInCUKwv4s/+DRr9UaIXJ6asbAQy9D3AWQt99qk9i+ye6qRETkLFAQ9melF8oFh0JgkL21iPiyGlEwYBw0aGH9uZr6OHy/yu6qRETkDCkI+zPPihHqDxY5Y2HV4YHR0LQtFB6G6U/B+mV2VyUiImdAQdifHSq9UK6avXWI+IuQMOjzHLTqACVFMPPvsPJ/dlclIiKnSUHYn5XOCGvFCJGzJygY7n4G2l1jbcP81mj4+mO7qxIRkdOgIOzPtJmGyLkRGAjdH4OOXa37/50E/3vH1pJERKT8FIT9WelmGqEKwiJnXUAA3NIfrulh3Z/7Onw6A4yxty4RETllCsL+rHQzDV0sJ3JuuFxwQy/4S1/r/uLZ8P4Eq2VCREQqPQVhf5av7ZVFKsTVt8Ftj1jB+Js5Vt9wSbHdVYmIyEkoCPszBWGRipPYBe58EgICYdVia0WJokK7qxIRkRNQEPZnpcun6WI5kYpx6dVw79+hSjBsSIXpTx7d5lxERCodBWF/VrrFspZPE6k4LS6Hvs9baw7/sMbahe5grt1ViYhIGRSE/VnpxXJqjRCpWI3bwINjrQtVd3wHUx6FvP12VyUiIr+jIOzPDmkdYRHb1GsOA16GGlHwy0/wymDI3md3VSIicgwFYX9ljHaWE7Fb7QZWGI48H/bsgFcegawMu6sSEZEjFIT9VeFhcJdYX4dVs7cWESerVRceehnOqw2//QqTHoG9u+yuSkREUBD2X6UrRgQGQXCovbWIOF3NWGtmuFY9yN4Lk1Lg1+12VyUi4ngKwv7q2DWEXS57axERiIyGAeMgrhHkZcHkFNi11e6qREQcTUHYX2kzDZHKp0YUPPgi1GtmLak2+VHYvsnuqkREHEtB2F9pMw2RyqlaOPQbAw1bwuGDMG0I/LDW7qpERBxJQdhfaTMNkcortBrcPwqaXAIF+TB9KHy3wu6qREQcR0HYX2kzDZHKLSQM+vwDLkqAokJ4bRisX2Z3VSIijqIg7K+0mYZI5VclGHqNgNadoKQIZv4dVn9hd1UiIo6hIOyvtJmGiG8IqgJ3PQXtksDthn+PguWf2V2ViIgjKAj7K09rhDbTEKn0AgOh++OQ2AWMG94ZC19/bHdVIiJ+T0HYX3laIzQjLOITAgLgb4Og01+t+/+dBF+8a2tJIiL+TkHYX2kdYRHf43JB135wzR3W/U+mw2f/BmPsrUtExE8pCPsrBWER3+RywQ33wg29rPsL3oQ5rykMi4icAwrC/srTI6zWCBGfdE0Pa3YYYPFs+GiqwrCIyFmmIOyvSjfU0PJpIr7rqlutvmGApR/AfydaK0uIiMhZoSDsj4qLoPCw9bVaI0R82xU3wu2DrZaJZZ/Cuy+Du8TuqkRE/MJpBeHJkyfToEEDQkNDSUhIYPny5Scc/95779G8eXNCQ0Np1aoV8+bN83reGMOwYcOoXbs2YWFhJCUlsXXrVq8xWVlZ9OjRg/DwcCIjI+nduzcHDhzwGrNu3To6duxIaGgo8fHxjBkz5g+1ZGdn079/f2rXrk1ISAhNmzb9Qz0+r7Q/2OWytnIVEd+WcD3cMQRcAZA231perURhWETkTJU7CM+ePZuUlBSGDx/OqlWraN26NcnJyezZs6fM8d988w3du3end+/erF69mq5du9K1a1c2bNjgGTNmzBgmTpzItGnTSEtLo1q1aiQnJ3P48GHPmB49erBx40YWLlzInDlzWLp0KX379vU8n5uby7XXXkv9+vVJT09n7NixjBgxgunTp3vGFBYWcs0117B9+3bef/99tmzZwowZM6hTp055fwyVW2kQDq1mLckkIr6vXRLcNdT6M73yf/DWaCgptrsqERHfZsqpffv2pn///p77JSUlJi4uzowaNarM8bfddpvp0qWL12MJCQnm/vvvN8YY43a7TWxsrBk7dqzn+ezsbBMSEmLeeecdY4wxmzZtMoBZsWKFZ8z8+fONy+Uyu3fvNsYYM2XKFBMVFWUKCgo8Y4YMGWKaNWvmuT916lTTqFEjU1hYWN6P7ZGTk2MAk5OTc9rf45zbttGYQZ2NebaH3ZWIyNm29itjBidbf8b/OdyYotP/+0xExF+dal4r13RhYWEh6enpJCUleR4LCAggKSmJ1NTUMl+TmprqNR4gOTnZM37btm1kZGR4jYmIiCAhIcEzJjU1lcjISNq1a+cZk5SUREBAAGlpaZ4xnTp1Ijg42Ot9tmzZwv79+wH45JNPSExMpH///sTExNCyZUuef/55Sk7wK8aCggJyc3O9jkpPS6eJ+K+LO0CvERBYBdZ9DTOfheJCu6sSEfFJ5QrC+/bto6SkhJiYGK/HY2JiyMjIKPM1GRkZJxxfenuyMbVq1fJ6PigoiJo1a3qNKet7HPseP/30E++//z4lJSXMmzePZ555hnHjxjFy5MjjfuZRo0YRERHhOeLj4487ttI4dGTpNO0qJ+KfWlwO9z0HVYJhYyq8PgwKC+yuSkTE5ziqgdTtdlOrVi2mT59O27Zt6datG0899RTTpk077muGDh1KTk6O59i5c2cFVnyaPDPCulBOxG81bwd9/gHBofDdSnjtaSjIt7sqERGfUq4gHB0dTWBgIJmZmV6PZ2ZmEhsbW+ZrYmNjTzi+9PZkY35/MV5xcTFZWVleY8r6Hse+R+3atWnatCmBgYGeMRdeeCEZGRkUFpb9q8WQkBDCw8O9jkrPE4Q1Iyzi15pcAn1HQUgYbF0N05+Ew4fsrkpExGeUKwgHBwfTtm1bFi1a5HnM7XazaNEiEhMTy3xNYmKi13iAhQsXesY3bNiQ2NhYrzG5ubmkpaV5xiQmJpKdnU16erpnzOLFi3G73SQkJHjGLF26lKKiIq/3adasGVFRUQBceeWV/PDDD7iPWZD++++/p3bt2l69xT7v0JEgrM00RPzfBa3ggRcgtCr8tB5efeLo/wyLiMgJlbs1IiUlhRkzZvDmm2+yefNm+vXrx8GDB+nVqxcAPXv2ZOjQoZ7xAwcOZMGCBYwbN47vvvuOESNGsHLlSgYMGACAy+Vi0KBBjBw5kk8++YT169fTs2dP4uLi6Nq1K2DN2l533XX06dOH5cuXs2zZMgYMGMDtt99OXFwcAHfccQfBwcH07t2bjRs3Mnv2bCZMmEBKSoqnln79+pGVlcXAgQP5/vvvmTt3Ls8//zz9+/c/7R9gpaSL5UScpcFF0G+sdV3A9k0w9fGj1wqIiMjxnc6SFJMmTTL16tUzwcHBpn379ubbb7/1PHfVVVeZu+++22v8u+++a5o2bWqCg4NNixYtzNy5c72ed7vd5plnnjExMTEmJCTEdO7c2WzZssVrzG+//Wa6d+9uqlevbsLDw02vXr1MXl6e15i1a9eaDh06mJCQEFOnTh0zevToP9T+zTffmISEBBMSEmIaNWpk/vGPf5ji4uJT/uw+sXzaP4dbSyt99ZHdlYhIRdq11Zin/mr9+R/b15i8bLsrEhGxxanmNZcxxtgdxn1Jbm4uERER5OTkVN5+4cmPwg9r4K4n4dI/212NiFSkX7dZM8J5+6F2A2umuEaU3VWJiFSoU81rjlo1wjHUGiHiXLUbQv9xEH4e/LodXhkMOfvsrkpEpFJSEPZHCsIizhZTDwa8BJHnw54dVhjev+fkrxMRcRgFYX+kDTVE5Pw68NDLUDMW9u2GV1Igq+yNj0REnEpB2N+43VBwZB1RzQiLOFvNWGtmODrOCsGTHoG9u+2uSkSk0lAQ9jeHD0Lp9Y8KwiISVcsKw7XiIXsvTE6BzB12VyUiUikoCPub0raI4FAIqmJvLSJSOUREW2E4tgHk/AaTB1sX0omIOJyCsL/JP2jdajZYRI5VIwr6vwhxF1hLq00eDLt/tLsqERFbKQj7m/wjM8IKwiLye9Uj4cGxEN8UDubAlEdh5/d2VyUiYhsFYX9zSEunicgJVAuHfmOg/oVWK9WUx6xtmUVEHEhB2N8cPhKEtXSaiBxPWHV44AVo1Mq6wHbaEPhpvd1ViYhUOAVhf1N6sVxYNXvrEJHKLbQq9H0emlwCBfnw6lDYusbuqkREKpSCsL/x7CqnGWEROYmQMLhvJDRvB4WHYcaT8N1Ku6sSEakwCsL+pjQIV1WPsIicguAQuPdZuOhyKCqE156Bjd/aXZWISIVQEPY3ulhORMqrSjD0Gg4Xd4CSInhjBKz72u6qRETOOQVhf+NZPk2tESJSDkFVoOfTcMnVUFIMbz4Lq5fYXZWIyDmlIOxv1BohIqcrMAjufALaJYHbDf9+Hlb+z+6qRETOGQVhf6PWCBE5EwGB0P0xSLgejBvefgHS5ttdlYjIOaEg7G/yFYRF5AwFBMJtj8CVN4ExMGscLPvU7qpERM46BWF/YswxrRHqERaRMxAQALc+DJ3+at1/fwJ8+YG9NYmInGUKwv6k8LB1kQtoRlhEzpzLBV37wZ+7Wfc/mgKLZttbk4jIWaQg7E9KZ4MDAiE41N5aRMQ/uFxw431w7V3W/Tkz4PP/2FuTiMhZoiDsT45ti3C57K1FRPyHywXX3w039LLuz58J896w2rFERHyYgrA/8awYUc3eOkTEP13TA/7S1/p64Vvw6QyFYRHxaQrC/kSbaYjIuXb1bXBLf+vrL96Fj6YqDIuIz1IQ9ifaTENEKkKnW+Bvg6yvl35grSjhdttakojI6VAQ9ifaTENEKsoVN8Ltj1r9w9/MgXdfAneJ3VWJiJSLgrA/8bRGKAiLSAVIuA56PAGuAEhbAG+PhRKFYRHxHQrC/iT/oHWrzTREpKK07Qw9n7KWbUz/H/zn+aPrmYuIVHIKwv7k0JEZ4VDNCItIBWpzFdwzDAKDYM2X8OZIKC6yuyoRkZNSEPYnulhOROzS6kq49+8QVAXWfw1vjICiQrurEhE5IQVhf1IahLV8mojY4aIEuG8kVAmBTWnw+jPW1u8iIpWUgrA/8Vwspw01RMQmzdpC339Y27xvSYcZT0NBvt1ViYiUSUHYnxw6ZotlERG7NG4D94+GkKrwwxp4dSgcPmh3VSIif6Ag7E/ytY6wiFQSjVpCvxcgtBps2wDThhz9O0pEpJJQEPYXJcVHe/EUhEWkMqh/ITz4ovVbqp+/gymPwcEcu6sSEfFQEPYXpUungXqERaTyiG8C/cdB9UjYtdUKw3n77a5KRARQEPYfpb9yDK1mLWwvIlJZxDWywnCNmvDLTzD5Ucj5ze6qREQUhP2G+oNFpDKLrQ8DXoKIaMj8GSYPhux9dlclIg6nIOwvSlsjtJmGiFRWtepaYTiqFuzdBa88AlmZdlclIg6mIOwvtJmGiPiC6DgY8DKcVxt++9UKw/t+sbsqEXEoBWF/odYIEfEVNWOsmeHz68L+PfBKCuzZZXdVIuJACsL+wrOZhoKwiPiAyPOtC+hi6kPOPisMZ/xsd1Ui4jAKwv7Cs72yWiNExEdEnAf9X4TaDSEvywrDu7baXZWIOIiCsL/wtEZoDWER8SE1oqwwHN/U2mxj8qOwfZPdVYmIQygI+wtPa4RmhEXEx1SLgH5joGFLOHwQpj4OW9fYXZWIOICCsL/QxXIi4svCqsP9o6BpW2u7+BlPwqY0u6sSET+nIOwvFIRFxNeFhMF9z0HLRCgqhH8Oh7VL7a5KRPyYgrC/8GyoodYIEfFhVYLhnuFwyZ+gpBjeHAkrFtpdlYj4KQVhf6EZYRHxF4FBcOdQaJ8Mxg1vvwDLPrW7KhHxQwrC/sDtti4wAc0Ii4h/CAiEboOhY1fr/vsTYMn7tpYkIv5HQdgfFBwCY6yvtXyaiPiLgAC4pT90vt26//E0+OzfR/++ExE5QwrC/qC0P7hKCAQF21uLiMjZ5HLBjffBDb2s+wvehE9nKAyLyFmhIOwP8rW9soj4uWt6QNd+1tdfvAv/nWS1hYmInAEFYX9QGoRDFYRFxI9ddSvc9og1S7zsE5j1IpSU2F2ViPgwBWF/4Fk6TUFYRPxcYhfo8YTVP7zic/j3P6C4yO6qRMRHKQj7g/wjK0aEacUIEXGAtp3h7mEQWMXacOONEdYGHCIi5aQg7A/yj8wIaw1hEXGKizvAfc9aG3BsSoMZT0FBvt1ViYiPURD2B4d0sZyIOFDzy6DvKGtr5q2rYdoTR6+ZEBE5BQrC/sCzq5xaI0TEYRq3hn5jrN+Ibd8IrwyGvP12VyUiPkJB2B94WiO0mYaIOFD9C2HAS1AjCn75ESYOgqxMu6sSER+gIOwPPK0RmhEWEYeKawQPjYeasbBvN0wcCJk/212ViFRyCsL+wNMaoR5hEXGw8+tYYTimHuTsg0mPwI4tdlclIpWYgrA/UBAWEbFERsOAl6FeMziYC1Megx/W2F2ViFRSCsL+wLOhhlojRESoHgH9xkLjNlBwCF4dChtS7a5KRCohBWFfZ8wxG2poRlhEBIDQqtD3eWiZaO0898ZwSF9kd1UiUskoCPu6ogIoObK9qIKwiMhRVYLhnhHQLgncbnhrNHz9sd1ViUgloiDs60pXjAgIsBaVFxGRowIDofvj0LGr9Ru0/06ChW9ZX4uI451WEJ48eTINGjQgNDSUhIQEli9ffsLx7733Hs2bNyc0NJRWrVoxb948r+eNMQwbNozatWsTFhZGUlISW7du9RqTlZVFjx49CA8PJzIykt69e3PggPcOQuvWraNjx46EhoYSHx/PmDFjjlvTrFmzcLlcdO3atXwfvrI5djMNl8veWkREKqOAALilP1x7l3V/3hvwyXSFYREpfxCePXs2KSkpDB8+nFWrVtG6dWuSk5PZs2dPmeO/+eYbunfvTu/evVm9ejVdu3ala9eubNiwwTNmzJgxTJw4kWnTppGWlka1atVITk7m8OHDnjE9evRg48aNLFy4kDlz5rB06VL69u3reT43N5drr72W+vXrk56eztixYxkxYgTTp0//Q03bt2/n0UcfpWPHjuX9+JWPZzMNtUWIiByXywXX3w1d+1n3l7wHs8eBu8TeukTEXqac2rdvb/r37++5X1JSYuLi4syoUaPKHH/bbbeZLl26eD2WkJBg7r//fmOMMW6328TGxpqxY8d6ns/OzjYhISHmnXfeMcYYs2nTJgOYFStWeMbMnz/fuFwus3v3bmOMMVOmTDFRUVGmoKDAM2bIkCGmWbNmXu9dXFxsrrjiCvPaa6+Zu+++29x8883l+vw5OTkGMDk5OeV63Tmz/htjBnU25qUH7a5ERMQ3pC0w5pFrrL87/znCmKKCk79GRHzKqea1cs0IFxYWkp6eTlJSkuexgIAAkpKSSE0te2ma1NRUr/EAycnJnvHbtm0jIyPDa0xERAQJCQmeMampqURGRtKuXTvPmKSkJAICAkhLS/OM6dSpE8HBwV7vs2XLFvbvP7rv/LPPPkutWrXo3bv3KX3mgoICcnNzvY5KRWsIi4iUT/tkuGcYBFaBdV/BjKfh8CG7qxIRG5QrCO/bt4+SkhJiYmK8Ho+JiSEjI6PM12RkZJxwfOntycbUqlXL6/mgoCBq1qzpNaas73Hse3z99de8/vrrzJgx49Q+MDBq1CgiIiI8R3x8/Cm/tkIoCIuIlN/FHaDPSAgOhe9XwZRHIW//yV8nIn7FMatG5OXlcddddzFjxgyio6NP+XVDhw4lJyfHc+zcufMcVnkaSjfTCNNmGiIi5dKsLfR/EapFwM7vYdIg+O1Xu6sSkQoUVJ7B0dHRBAYGkpmZ6fV4ZmYmsbGxZb4mNjb2hONLbzMzM6ldu7bXmDZt2njG/P5ivOLiYrKysry+T1nvU/rcjz/+yPbt27nppps8z7vdbsCaXd6yZQsXXHDBH+oPCQkhJCSkzM9WKZTOCFfVjLCISLnVaw4PT4BpQ2Dvbpg4CO4fBXGN7K5MRCpAuWaEg4ODadu2LYsWHd2dx+12s2jRIhITE8t8TWJiotd4gIULF3rGN2zYkNjYWK8xubm5pKWlecYkJiaSnZ1Nenq6Z8zixYtxu90kJCR4xixdupSioiKv92nWrBlRUVE0b96c9evXs2bNGs/xl7/8hauvvpo1a9ZUvpaHU6XWCBGRM1OrLgycALUbQu5vMOkR+HGd3VWJSAUod2tESkoKM2bM4M0332Tz5s3069ePgwcP0qtXLwB69uzJ0KFDPeMHDhzIggULGDduHN999x0jRoxg5cqVDBgwAACXy8WgQYMYOXIkn3zyCevXr6dnz57ExcV51vi98MILue666+jTpw/Lly9n2bJlDBgwgNtvv524uDgA7rjjDoKDg+nduzcbN25k9uzZTJgwgZSUFABCQ0Np2bKl1xEZGUmNGjVo2bKl10V2PkXLp4mInLmIaHjoZWjYEg4ftGaI1y+zuyoROcfK1RoB0K1bN/bu3cuwYcPIyMigTZs2LFiwwHNh2o4dOwgIOJqvr7jiCt5++22efvppnnzySZo0acJHH31Ey5YtPWMef/xxDh48SN++fcnOzqZDhw4sWLCA0NBQz5i33nqLAQMG0LlzZwICArj11luZOHGi5/mIiAg+//xz+vfvT9u2bYmOjmbYsGFeaw37pfyD1m1V9QiLiJyRsOrwwAvw75GwIRXe+DvcNgguv8HuykTkHHEZo611yiM3N5eIiAhycnIIDw+3uxwY0wd+3Wb95d2srd3ViIj4vpISeO9lSFtg3b+hFyTdod07RXzIqeY1x6wa4bfUIywicnYFBkK3wVb4BWtL5g9egSMXWIuI/1AQ9nWeVSPUGiEicta4XNDlXrilv3X/64/hP89DcaG9dYnIWaUg7MtKiqEg3/o6rJq9tYiI+KNOt8BdT0JgEKxeol3oRPyMgrAvK50NBrVGiIicK5f+Ge47Zhe6ydqFTsRfKAj7skNHgnBoVQgItLcWERF/1rwdPHhkF7pd38OEh2HPLrurEpEzpCDsy3ShnIhIxal/ZBe6mrHWVswTH4btm+yuSkTOgIKwL/NspqEL5UREKkStujBoEsQ3hYO5MOVRbbwh4sMUhH1ZaWtEVc0Ii4hUmBpR0H8cXJQARYXWxhtff2x3VSJyGhSEfZlaI0RE7BESBvc+a+06Z9zw30nw6QytNSziYxSEfZmnNUJBWESkwgUGwm2PWDvPASyeDW+N1lrDIj5EQdiXHdJmGiIitnK54Joe0P1xa/WeVYvh1Se9l7cUkUpLQdiXqTVCRKRyaH8t9H0eQqrCD2tg0iDYv8fuqkTkJBSEfZmCsIhI5dGsLTz0EoSfB79ut9Ya/uUnu6sSkRNQEPZlh470CKs1QkSkcqjTGAZNhJj6kLPPmhn+fpXdVYnIcSgI+7L8g9ZtaDV76xARkaOiYuDh8XDBxXD4ELw6FL6db3dVIlIGBWFflq8ZYRGRSqlqDbh/NFz6Z3CXwOxxMOc1La8mUskoCPuyfG2oISJSaVUJhjuHwrV3WfcXzYJ/jYTCAnvrEhEPBWFf5XYf0xqhICwiUim5XHD93XDH4xAYBGuXwpTBkLff7spEBAVh31VwyNrNCNQaISJS2V12LTzwgvX39c/fwfiHrJUlRMRWCsK+qrQtokqwdYiISOXWuDUMnATRdSArAyY+DFvS7a5KxNEUhH1VaRBWW4SIiO+oVddaXq1RK2tFielDIXWu3VWJOJaCsK86pAvlRER8UrUI6PcCtE2yrvd492X4ZLpWlBCxgYKwrypdOi1M/cEiIj4nKBh6DIHkntb9L96FN5+FwsP21iXiMArCvkpLp4mI+DaXC67raS2xFlgF1n0Nkx6B7L12VybiGArCvqq0NSJMQVhExKe17QwPjrFaJnZthZf6w/ZNdlcl4ggKwr7K0xqhICwi4vMatYKUyVC7IeRlweTBsGKh3VWJ+D0FYV9VupmG1hAWEfEPNWPh4QnQ8gooLoK3X4BPZ1hbNIvIOaEg7KsOaUZYRMTvhFaFXiMg6Q7r/uLZ8PpwOHzQ1rJE/JWCsK/KV4+wiIhfCgiALvdaF9EFVYFN38KEgbDvF7srE/E7CsK+yrNqhFojRET8UtvOMOBlCD8PMrbD+AHwwxq7qxLxKwrCvsqzakQ1e+sQEZFzp35zeGQyxDeFg7kwdQh8M8fuqkT8hoKwr9KGGiIizhAZbc0MX3K1deHce+Ph/YnWBXUickYUhH2VNtQQEXGO4BC460m44V7r/rJPYMpjkJtlb10iPk5B2BcVFhydCdDFciIizuBywTV3QO/nrNUltm2Al/pp8w2RM6Ag7ItK2yICAiCkqr21iIhIxWqZCI9MgZh6kPMbvDIYvp1nd1UiPklB2BeVbqYRVt2aIRAREWepVRcGvQIXd4CSIpj9Erw7HooL7a5MxKcoCPsibaYhIiKhVeGe4VbfsMsFqXNg8qOQs8/uykR8hoKwL9JmGiIiAkf7hvv8w/o3YfsmGPcg/LTB7spEfIKCsC/yBGEtnSYiIsCF7SFlCtRuAHlZMOVRa2UJY+yuTKRSUxD2RZ7WCG2mISIiR0THwcBJ0OYqKCm21hp+ewwUHra7MpFKS0HYF2l7ZRERKUtIGPR8Gm7qA64AWLnQ2pp5zy67KxOplBSEfZF6hEVE5HhcLvhzN3hwLNSIgl+3w0sPwpov7a5MpNJREPZFCsIiInIyjVvD4GlwwcVQcAjefA4+mKytmUWOoSDsi0p7hNUaISIiJxJxHvQbC51vt+5/9SG8kgL7M+2tS6SSUBD2RZoRFhGRUxUYCDfeB/eNtP7d+HkzjOsH362wuzIR2ykI+yIFYRERKa8Wl8PgqVC3KRzMhelPwvw3wV1id2UitlEQ9kWe1ggFYRERKYfzasPD4+HKm6w1hj//N0wdot3oxLEUhH2RNtQQEZHTVSUY/m8g3DkUgkPhhzUw9n7YvNzuykQqnIKwrykpgYJ862u1RoiIyOlq29laVaLOBXAwx2qV+PhVrSohjqIg7GtKZ4NBQVhERM5MrbrWbnQdu1r3l7wHEwfCvl9sLUukoigI+5r8I/3BIVWtK4FFRETORJVg+OsAuPfv1rKcO7+HFx+AVV/YXZnIOacg7Gs8/cHV7K1DRET8S6sr4dFXoWFLawOOf/8DZo072o4n4ocUhH3NoSNBWJtpiIjI2RZVC/qPg2vvtLZqTpsPL/eHXVvtrkzknFAQ9jWlrRHqDxYRkXMhMBCuv8fakS78PMjcAeMfgkWzteaw+B0FYV+Tf9C6VRAWEZFzqUkbeGy61TJRUgxzZsCUx7Q9s/gVBWFf49lMQ60RIiJyjlWPgF4j4PbB1prDP66DMX0hfZHdlYmcFQrCvkbbK4uISEVyuSDhemt2uP6FcPgg/GeUdTHdsUt6ivggBWFfoyAsIiJ2iI6Dh8bDdXdDQIC1vNqYPtbOdCI+SkHY1xzSxXIiImKTwEBIvgsengDRdSB7r9U3/PE0KCywuzqRclMQ9jX5Wj5NRERsVv9CeHQaXH4DGANL3ocX74ftm+yuTKRcFIR9jVojRESkMggJg24pcN9Ia5m1vbtg4iD4ZDoUFdpdncgpURD2NQrCIiJSmbS4HIa8Bu2SwLjhi3dh3APw83d2VyZyUgrCvkbLp4mISGVTtQb0eAJ6Pws1alqbcEx4GOa8BsWaHZbKS0HYlxhzzIYa1eytRURE5PdaXmHNDrftbM0OL5oFL/aDHZodlspJQdiXFByy/mIBCNOMsIiIVELVwuHOoXDv36F6JGT+bM0Of/IqFB62uzoRLwrCvuTQkf7goCoQHGJvLSIiIifS6kp44nW49Gpwu+GL9+CF+2BLut2ViXicVhCePHkyDRo0IDQ0lISEBJYvX37C8e+99x7NmzcnNDSUVq1aMW/ePK/njTEMGzaM2rVrExYWRlJSElu3bvUak5WVRY8ePQgPDycyMpLevXtz4ID3jjbr1q2jY8eOhIaGEh8fz5gxY7yenzFjBh07diQqKoqoqCiSkpJOWnulogvlRETEl1SLgLueslaWiDwfsjJg2hB4azQcyLG7OpHyB+HZs2eTkpLC8OHDWbVqFa1btyY5OZk9e/aUOf6bb76he/fu9O7dm9WrV9O1a1e6du3Khg0bPGPGjBnDxIkTmTZtGmlpaVSrVo3k5GQOHz76K5QePXqwceNGFi5cyJw5c1i6dCl9+/b1PJ+bm8u1115L/fr1SU9PZ+zYsYwYMYLp06d7xixZsoTu3bvzxRdfkJqaSnx8PNdeey27d+8u74/BHvmlm2moLUJERHxIi8thyOvQ8RZry+aV/4PR91q3xthdnTiZKaf27dub/v37e+6XlJSYuLg4M2rUqDLH33bbbaZLly5ejyUkJJj777/fGGOM2+02sbGxZuzYsZ7ns7OzTUhIiHnnnXeMMcZs2rTJAGbFihWeMfPnzzcul8vs3r3bGGPMlClTTFRUlCkoKPCMGTJkiGnWrNlxP0txcbGpUaOGefPNN0/145ucnBwDmJycnFN+zVmz9itjBnU2ZvxDFf/eIiIiZ8P2Tca8cJ/179mgzsZMG2LMvl/srkr8zKnmtXLNCBcWFpKenk5SUpLnsYCAAJKSkkhNTS3zNampqV7jAZKTkz3jt23bRkZGhteYiIgIEhISPGNSU1OJjIykXbt2njFJSUkEBASQlpbmGdOpUyeCg4O93mfLli3s37+/zNoOHTpEUVERNWvWLM+PwT5qjRAREV9X/0IYPBVuuNe65uW7lVbv8MK3tdSaVLhyBeF9+/ZRUlJCTEyM1+MxMTFkZGSU+ZqMjIwTji+9PdmYWrVqeT0fFBREzZo1vcaU9T2OfY/fGzJkCHFxcX8I6scqKCggNzfX67CNgrCIiPiDwCC45g54bAY0bg1FBTDvnzCmrxWMRSqIY1eNGD16NLNmzeLDDz8kNDT0uONGjRpFRESE54iPj6/AKn9Hm2mIiIg/qVUXHnzRWm6tRk1rm+ZXn4CZz0L2XrurEwcoVxCOjo4mMDCQzMxMr8czMzOJjY0t8zWxsbEnHF96e7Ixv78Yr7i4mKysLK8xZX2PY9+j1Isvvsjo0aP5/PPPufjii0/4mYcOHUpOTo7n2Llz5wnHn1OaERYREX/jclkbcAz9J3T6KwQEwNqlMKoXLJoNxUV2Vyh+rFxBODg4mLZt27Jo0SLPY263m0WLFpGYmFjmaxITE73GAyxcuNAzvmHDhsTGxnqNyc3NJS0tzTMmMTGR7Oxs0tOPrj24ePFi3G43CQkJnjFLly6lqKjI632aNWtGVFSU57ExY8bw3HPPsWDBAq+e4+MJCQkhPDzc67CNgrCIiPirsOpwy4OQMhUatrA235gzA8beD9+vsrs68VflvQpv1qxZJiQkxMycOdNs2rTJ9O3b10RGRpqMjAxjjDF33XWXeeKJJzzjly1bZoKCgsyLL75oNm/ebIYPH26qVKli1q9f7xkzevRoExkZaT7++GOzbt06c/PNN5uGDRua/Px8z5jrrrvOXHLJJSYtLc18/fXXpkmTJqZ79+6e57Ozs01MTIy56667zIYNG8ysWbNM1apVzauvvur1PsHBweb99983v/76q+fIy8s75c9v66oRrw61rrD9dl7Fv7eIiEhFcbuNSfvMmKdvPbq6xOvDjdm72+7KxEecal4rdxA2xphJkyaZevXqmeDgYNO+fXvz7bffep676qqrzN133+01/t133zVNmzY1wcHBpkWLFmbu3Llez7vdbvPMM8+YmJgYExISYjp37my2bNniNea3334z3bt3N9WrVzfh4eGmV69efwiwa9euNR06dDAhISGmTp06ZvTo0V7P169f3wB/OIYPH37Kn93WIDz+Yesvg7VfVfx7i4iIVLRDeca8P9GYlGusf/8GJxvz8avW4yIncKp5zWWMVrIuj9zcXCIiIsjJyan4NonR90LmDuvCgiZtKva9RURE7PLrdvh46tHtmatHwvX3QML1EBhoY2FSWZ1qXnPsqhE+6dCRHuGq6hEWEREHqd0A7h8Nff4BterBgWx4bzyMe0D9w3JGFIR9yeHSi+W0fJqIiDiMywUXJcDj0+GW/tZSor9ug6mPw/QnYfePdlcoPkhB2FcUFVoHQFg1e2sRERGxS2AQdLoFnnwTOt4CAYGwebk1O/yf0ZBV9iZaImVREPYVpZtpuAIgpKq9tYiIiNitWjj8tT888U+45E9gDKT/D56/Bz6cYrVPiJyEgrCv8LRFVLMWGxcRERE4vw70fBpSpkDTS6GkGJZ+ACN7wmf/hoJ8uyuUSkyJylcc0mYaIiIixxXfFPqNgQdegLpNoOAQLHgTRt4Fi2crEEuZguwuQE5RaWuELpQTERE5vmZtocklsPZLmPcG7PsFPp0BX7wHf+4GV94EwaF2VymVhGaEfcXhg9atlk4TERE5sYAAuORqq3/49kfhvNpWz/Anr8Jzd8KS960tnMXxFIR9hWdGWEFYRETklAQGQcJ1MPQNuH0w1Iy1AvHH06yWiSXvq2XC4RSEfUW+eoRFREROS2CQtQvdkzOh25FAnLffCsTP9YAF/4KDOXZXKTZQj7CvKA3CVdUjLCIicloCg+Dy6+Gya2DF57BoltVD/Nm/4It34fIb4E+3QlSM3ZVKBVEQ9hWe1ghtpiEiInJGAoOs0Ns+GdZ+ZQXi3T9Yy659/TFc+mfrwrraDeyuVM4xBWFfka/tlUVERM6qgEBrM442V8GWdGuZta2rYeVC67iwPXT6q7UShctld7VyDigI+4rSGWGtGiEiInJ2uVzQvJ11/PwdLJ4F65dZWzdvXg4x9aztnNslQUiY3dXKWaQg7CvyjyyfpovlREREzp36zaHXCNi7C776CJZ/Bpk74P0JMPd1q6Wiw81QU33E/kBB2Ffka0MNERGRCnN+XfjrALj+HisMf/UR/PardVHdkvehRQJc3gUuvMxqsRCfpCDsK0q3WFZrhIiISMUJqw5X3Qodu8Km5dYFdVtXw4ZU64g835olTrjO+lp8ioKwLygpsfZMB7VGiIiI2CEgEFomWkfmz5A6z1qCLXsvLHgTPvs3XJQAiV2g+WUQqFliX6Ag7AsOHzj6tYKwiIiIvWLqQ9d+0KU3rPsKUufCj+tgY6p11IiylmBrlwR1GmvFiUpMQdgXlLZFhIRZax+KiIiI/aoEQ9vO1pG5A76db80S5+2HL/9rHbUbQNtrrDGR0XZXLL+jVOULtL2yiIhI5RZTD26+H27sDd+ttALxxlT4dTvMmQFzX4PGbax1i1tdCdUjbS1XLArCvkBBWERExDcEBkGLy60j/wCs+RJW/g9+Wm9dZLd1tbUU2wWtoXUnuLiD1UohtlAQ9gWezTS0dJqIiIjPCKtuXTyX2AX2/WKF4rVLYdfWo6H4v5PgglZwcUcrPNeMtbtqR1EQ9gWeGeFq9tYhIiIipyc6DpK6W8e+X2DtV7BuKezYAj+stY4PXoHYBtbqExddDg0u0uoT55iCsC/wBGHNCIuIiPi86Djo3M06sjKsULwxFbZtgIzt1rF4tvWb4ObtrOXYmlyqi+3OAQVhX1DaGqEeYREREf9SMxau/pt1HMqzLrTb9C1sXm7dX/WFdQDUiocmbaDJJdC4NVSLsLV0f6Ag7AvyD1q3CsIiIiL+q2oNuPRq63CXwPbNsDkNvl8FO7fCnp3WsexTa23iuEbQ6GKrhaLBRRBVS2sWl5OCsC/I18VyIiIijhIQCI1aWkeX3lab5A9rYesa6yK7jO2w+0fr+OpD6zXh50GDC61QXP8iqNsYgkPt/BSVnoKwLzik5dNEREQcLay6tf5wqyut+3n7rVC8fSNs32QF4tzfYN3X1gHgCoDz60LdCyCuMdS5wDq0XJuHgrAvOKwgLCIiIseoEXW0jQKg8DDs/N4Kxds3wc+brbC8Z4d1lPYZg7WZR61464ipZ4XlmHpQM8aaiXYQBWFfUDojXFVBWERERMoQHAoXXGwdpXKzYPcPR44jbRT7dsOBbOv4ab339wgIhMjzrUAcdeSoWQsia0F4TStAVwv3q7CsIFyZFR6GbRut/2hBy6eJiIjIqQuvCeHt4cL2Rx8ryD960d2enZC5A/buso6iQms5t6yM439Pl8taraJGJFSPsvY4CKkKoVWPuQ2zgnlgEARVgcAqEBRkBeuYeuf8Y5eHgnBllpsF04Ycva8ZYRERETkTIWEQ39Q6juV2Q84+2L8H9mdaR9aRr7P3Wm0Wh3LBmKMzymwv33tfeRP838Cz8znOEgXhyiwwCGo3tL5u2MK6GlRERETkbAsIsJZfi6oFtCx7TEmJFYbz9lvHgWw4fBAOH4KCQ0du863HigqhuAhKiqGkCIqLrRaLSkZBuDKLqgWPz7C7ChERERFru+caUX616kSA3QWIiIiIiNhBQVhEREREHElBWEREREQcSUFYRERERBxJQVhEREREHElBWEREREQcSUFYRERERBxJQVhEREREHElBWEREREQcSUFYRERERBxJQVhEREREHElBWEREREQcSUFYRERERBxJQVhEREREHCnI7gJ8jTEGgNzcXJsrEREREZGylOa00tx2PArC5ZSXlwdAfHy8zZWIiIiIyInk5eURERFx3Odd5mRRWby43W5++eUXatSogcvlOqfvlZubS3x8PDt37iQ8PPycvpecOzqP/kHn0T/oPPoHnUf/cC7PozGGvLw84uLiCAg4fiewZoTLKSAggLp161boe4aHh+sPuh/QefQPOo/+QefRP+g8+odzdR5PNBNcShfLiYiIiIgjKQiLiIiIiCMpCFdiISEhDB8+nJCQELtLkTOg8+gfdB79g86jf9B59A+V4TzqYjkRERERcSTNCIuIiIiIIykIi4iIiIgjKQiLiIiIiCMpCIuIiIiIIykIV2KTJ0+mQYMGhIaGkpCQwPLly+0uybGWLl3KTTfdRFxcHC6Xi48++sjreWMMw4YNo3bt2oSFhZGUlMTWrVu9xmRlZdGjRw/Cw8OJjIykd+/eHDhwwGvMunXr6NixI6GhocTHxzNmzJhz/dEcZdSoUVx22WXUqFGDWrVq0bVrV7Zs2eI15vDhw/Tv35/zzjuP6tWrc+utt5KZmek1ZseOHXTp0oWqVatSq1YtHnvsMYqLi73GLFmyhEsvvZSQkBAaN27MzJkzz/XHc4ypU6dy8cUXexbhT0xMZP78+Z7ndQ59z+jRo3G5XAwaNMjzmM6jbxgxYgQul8vraN68uef5Sn8ejVRKs2bNMsHBweaf//yn2bhxo+nTp4+JjIw0mZmZdpfmSPPmzTNPPfWU+eCDDwxgPvzwQ6/nR48ebSIiIsxHH31k1q5da/7yl7+Yhg0bmvz8fM+Y6667zrRu3dp8++235quvvjKNGzc23bt39zyfk5NjYmJiTI8ePcyGDRvMO++8Y8LCwsyrr75aUR/T7yUnJ5s33njDbNiwwaxZs8bccMMNpl69eubAgQOeMQ888ICJj483ixYtMitXrjSXX365ueKKKzzPFxcXm5YtW5qkpCSzevVqM2/ePBMdHW2GDh3qGfPTTz+ZqlWrmpSUFLNp0yYzadIkExgYaBYsWFChn9dfffLJJ2bu3Lnm+++/N1u2bDFPPvmkqVKlitmwYYMxRufQ1yxfvtw0aNDAXHzxxWbgwIGex3UefcPw4cNNixYtzK+//uo59u7d63m+sp9HBeFKqn379qZ///6e+yUlJSYuLs6MGjXKxqrEGPOHIOx2u01sbKwZO3as57Hs7GwTEhJi3nnnHWOMMZs2bTKAWbFihWfM/PnzjcvlMrt37zbGGDNlyhQTFRVlCgoKPGOGDBlimjVrdo4/kXPt2bPHAObLL780xljnrUqVKua9997zjNm8ebMBTGpqqjHG+p+igIAAk5GR4RkzdepUEx4e7jl3jz/+uGnRooXXe3Xr1s0kJyef64/kWFFRUea1117TOfQxeXl5pkmTJmbhwoXmqquu8gRhnUffMXz4cNO6desyn/OF86jWiEqosLCQ9PR0kpKSPI8FBASQlJREamqqjZVJWbZt20ZGRobX+YqIiCAhIcFzvlJTU4mMjKRdu3aeMUlJSQQEBJCWluYZ06lTJ4KDgz1jkpOT2bJlC/v376+gT+MsOTk5ANSsWROA9PR0ioqKvM5l8+bNqVevnte5bNWqFTExMZ4xycnJ5ObmsnHjRs+YY79H6Rj9+T37SkpKmDVrFgcPHiQxMVHn0Mf079+fLl26/OFnrfPoW7Zu3UpcXByNGjWiR48e7NixA/CN86ggXAnt27ePkpISr/8oAGJiYsjIyLCpKjme0nNyovOVkZFBrVq1vJ4PCgqiZs2aXmPK+h7HvoecPW63m0GDBnHllVfSsmVLwPo5BwcHExkZ6TX29+fyZOfpeGNyc3PJz88/Fx/HcdavX0/16tUJCQnhgQce4MMPP+Siiy7SOfQhs2bNYtWqVYwaNeoPz+k8+o6EhARmzpzJggULmDp1Ktu2baNjx47k5eX5xHkMOqNXi4j4qP79+7Nhwwa+/vpru0uR09CsWTPWrFlDTk4O77//PnfffTdffvml3WXJKdq5cycDBw5k4cKFhIaG2l2OnIHrr7/e8/XFF19MQkIC9evX59133yUsLMzGyk6NZoQroejoaAIDA/9wVWVmZiaxsbE2VSXHU3pOTnS+YmNj2bNnj9fzxcXFZGVleY0p63sc+x5ydgwYMIA5c+bwxRdfULduXc/jsbGxFBYWkp2d7TX+9+fyZOfpeGPCw8N94h8GXxAcHEzjxo1p27Yto0aNonXr1kyYMEHn0Eekp6ezZ88eLr30UoKCgggKCuLLL79k4sSJBAUFERMTo/PooyIjI2natCk//PCDT/x5VBCuhIKDg2nbti2LFi3yPOZ2u1m0aBGJiYk2ViZladiwIbGxsV7nKzc3l7S0NM/5SkxMJDs7m/T0dM+YxYsX43a7SUhI8IxZunQpRUVFnjELFy6kWbNmREVFVdCn8W/GGAYMGMCHH37I4sWLadiwodfzbdu2pUqVKl7ncsuWLezYscPrXK5fv97rf2wWLlxIeHg4F110kWfMsd+jdIz+/J47brebgoICnUMf0blzZ9avX8+aNWs8R7t27ejRo4fna51H33TgwAF+/PFHateu7Rt/Hs/4cjs5J2bNmmVCQkLMzJkzzaZNm0zfvn1NZGSk11WVUnHy8vLM6tWrzerVqw1gXnrpJbN69Wrz888/G2Os5dMiIyPNxx9/bNatW2duvvnmMpdPu+SSS0xaWpr5+uuvTZMmTbyWT8vOzjYxMTHmrrvuMhs2bDCzZs0yVatW1fJpZ1G/fv1MRESEWbJkiddSP4cOHfKMeeCBB0y9evXM4sWLzcqVK01iYqJJTEz0PF+61M+1115r1qxZYxYsWGDOP//8Mpf6eeyxx8zmzZvN5MmTtWTTWfTEE0+YL7/80mzbts2sW7fOPPHEE8blcpnPP//cGKNz6KuOXTXCGJ1HXzF48GCzZMkSs23bNrNs2TKTlJRkoqOjzZ49e4wxlf88KghXYpMmTTL16tUzwcHBpn379ubbb7+1uyTH+uKLLwzwh+Puu+82xlhLqD3zzDMmJibGhISEmM6dO5stW7Z4fY/ffvvNdO/e3VSvXt2Eh4ebXr16mby8PK8xa9euNR06dDAhISGmTp06ZvTo0RX1ER2hrHMImDfeeMMzJj8/3zz44IMmKirKVK1a1dxyyy3m119/9fo+27dvN9dff70JCwsz0dHRZvDgwaaoqMhrzBdffGHatGljgoODTaNGjbzeQ87Mvffea+rXr2+Cg4PN+eefbzp37uwJwcboHPqq3wdhnUff0K1bN1O7dm0THBxs6tSpY7p162Z++OEHz/OV/Ty6jDHmzOeVRURERER8i3qERURERMSRFIRFRERExJEUhEVERETEkRSERURERMSRFIRFRERExJEUhEVERETEkRSERURERMSRFIRFRERExJEUhEVERETEkRSERURERMSRFIRFRERExJEUhEVERETEkf4ftmKsAZ2XwyAAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tb_dir = os.path.join(WORK_DIR, 'tensorboard_output')\n", + "fname = os.listdir(tb_dir)[0]\n", + "tb_path = os.path.join(tb_dir, fname)\n", + "#\n", + "data = read_tensorboard_file(tb_path)\n", + "print(data.keys())\n", + "_ = plot_image(data, 'loss', 0.9)\n", + "_ = plot_image(data, 'lr', 0)\n", + "_ = plot_image(data, 'evaluation/acc', 0)\n", + "_ = plot_image(data, 'evaluation/loss', 0)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 推理\n", + "推理部分见baichuan_infer.ipynb" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch/llm_agent/chatglm2_infer.ipynb b/examples/pytorch/llm_agent/chatglm2_infer.ipynb new file mode 100644 index 00000000..821da5e6 --- /dev/null +++ b/examples/pytorch/llm_agent/chatglm2_infer.ipynb @@ -0,0 +1,514 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ChatGLM2 推理" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 配置实验环境\n", + "The following code is copied from baichuan_infer.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-02 21:48:47,527] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 21:48:48,006 - modelscope - INFO - PyTorch version 2.0.1 Found.\n", + "2023-07-02 21:48:48,007 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n", + "2023-07-02 21:48:48,032 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n", + "2023-07-02 21:48:48,708 - modelscope - INFO - [0, 1]\n", + "2023-07-02 21:48:48,848 - modelscope - INFO - Using device: cuda:0,1\n" + ] + }, + { + "data": { + "text/plain": [ + "device(type='cuda', index=0)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from _common import *\n", + "from transformers import TextStreamer\n", + "device_ids = [0, 1]\n", + "select_device(device_ids)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Model, Tokenizer\n", + "Note: 你需要设置CKPT_FPATH的内容, 指向`.bin`文件, 或`.pth`文件" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 21:48:49,227 - modelscope - INFO - Development mode use revision: v1.0.3\n", + "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n", + "The tokenizer class you load from this checkpoint is 'ChatGLMTokenizer'. \n", + "The class this function is called from is 'ChatGLM2Tokenizer'.\n", + "2023-07-02 21:48:49,572 - modelscope - INFO - initialize model from /home/hackathon/.cache/modelscope/hub/ZhipuAI/chatglm2-6b\n", + "Failed to load cpm_kernels:No module named 'cpm_kernels'\n", + "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b72b43e11bec49c78c8097deaffea8a7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/7 [00:00```JSON\n", + "{\"api_name\": \"modelscope_speech-generation\", \"url\": \"http://90.49.118.175:2603/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"parameters\": {\"text\": \"秋树红叶舞飘零,\n", + "山间小溪水潺潺。\n", + "微风拂面感清凉,\n", + "散步赏景心旷神怡。\", \"gender\": \"woman\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"result\": \"\"}\n", + "```<|endofexec|>\n", + "\n", + "-----------------------------------------------------------------------------------\n", + "[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://159.1.4.174:3210/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://172.163.158.154:5325/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-address\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-address\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"url\": \"http://133.94.12.37:3160/\", \"paths\": [{\"name\": \"modelscope_text-address\", \"model_id\": \"/damo/mgeo_geographic_elements_tagging_chinese_base\", \"method\": \"post\", \"description\": \"针对中文的地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的地址信息\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "现在我给你另一条地址,请识别出里面的元素。输入地址:广东省深圳市南山区科技园北区 \n", + "\n", + "### 助手\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://133.94.12.37:3160/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"科技园北区\"}\n", + "```<|endofexec|>\n", + "地址识别结果为:{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"科技园北区\"}。我识别出的元素包括:prov、city、district、town。\n", + "\n", + "[LABELS]<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-address\", \"url\": \"http://159.1.4.174:3210/damo/mgeo_geographic_elements_tagging_chinese_base\", \"parameters\": {\"text\": \"广东省深圳市南山区科技园北区\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}\n", + "```<|endofexec|>\n", + "地址识别json表示:{\"prov\": \"广东省\", \"city\": \"深圳市\", \"district\": \"南山区\", \"town\": \"\", \"community\": \"科技园北区\", \"poi\": \"\"}。我使用的模型是ModelScope的'damo/mgeo_geographic_elements_tagging_chinese_base'模型。这是基于达摩院联合高德发布的多任务多模态地址预训练底座MGeo模型微调得到的。\n", + "-----------------------------------------------------------------------------------\n", + "[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下,请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件,则需要将插件调用请求按照json格式给出,必须包含api_name、url、parameters字段,并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复;若无需调用插件,则直接给出对应回复即可:\n", + "\n", + "1. {\"name\": \"modelscope_text-translation-zh2en\", \"description\": \"将输入的中文文本翻译成英文\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_text-translation-zh2en\", \"model_id\": \"/damo/nlp_csanmt_translation_zh2en\", \"method\": \"post\", \"description\": \"将输入的中文文本翻译成英文\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的中文文本\", \"required\": \"True\"}]}]}\n", + "\n", + "2. {\"name\": \"modelscope_speech-generation\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_speech-generation\", \"model_id\": \"/damo/speech_sambert-hifigan_tts_zh-cn_16k\", \"method\": \"post\", \"description\": \"针对回复的内容,用语音表示,同时可以选择是男声或者女声\", \"parameters\": [{\"name\": \"text\", \"description\": \"要转成语音的文本\", \"required\": \"True\"}, {\"name\": \"gender\", \"description\": \"用户身份\", \"required\": \"True\"}]}]}\n", + "\n", + "3. {\"name\": \"modelscope_image-generation\", \"description\": \"针对文本输入,生成对应的图片\", \"url\": \"http://api-inference.modelscope.cn/api-inference/v1/models\", \"paths\": [{\"name\": \"modelscope_image-generation\", \"model_id\": \"/damo/image_generation\", \"method\": \"post\", \"description\": \"针对文本输入,生成对应的图片\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本信息\", \"required\": \"True\"}]}]} \n", + "\n", + "### 用户\n", + "歌手:古巨基\n", + "歌曲名:爱情马戏班\n", + "经典歌词:情是何等诡秘能令人使出看家把戏;恋爱就像走纲线般惊险;为你献技 像马戏班\n", + "请结合以上信息,编写一个智能音响的播放导语,需要有文采,字数30字以内,凸显一下即将播放该歌曲 \n", + "\n", + "### 助手\n", + "爱情马戏班,由古巨基演唱,是一首充满马戏班元素的浪漫歌曲,歌词中描述了爱情的神秘和危险,是一首值得听一听的浪漫歌曲。\n", + "\n", + "[LABELS]亲爱的主人,今天我为您带来的是古巨基的经典之作——《爱情马戏班》。这首歌曲描绘了情与爱的神秘和惊险,让人们为之倾倒。让我们一起享受这场爱情的马戏表演吧!\n", + "-----------------------------------------------------------------------------------\n", + "[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://114.42.178.183:8005/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://93.82.87.89:6631/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://4.105.93.165:8143/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "按照给定的schema抽取出下面文本对应的信息\n", + "schema:{\"动物\": null, \"食物\": null, \"颜色\": null}\n", + "这只棕色的狗狗很喜欢吃狗粮。 \n", + "\n", + "### 助手\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofexec|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofexec|>\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofexec|>\n", + "<|startofexec|>```JSON\n", + "{\"prov\": \"http://4.105.93.165:8143/damo/nlp_structbert_siames\n", + "\n", + "[LABELS]<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://114.42.178.183:8005/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"这只棕色的狗狗很喜欢吃狗粮。\", \"schema\": \"{\\\"动物\\\": null, \\\"食物\\\": null, \\\"颜色\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"动物\": [\"狗狗\"], \"食物\": [\"狗粮\"], \"颜色\": [\"棕色\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "-----------------------------------------------------------------------------------\n", + "[TEST]你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://28.179.171.5:6428/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://100.111.18.38:6408/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://144.67.18.142:6381/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "按照给定的schema抽取出下面文本对应的信息\n", + "schema:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n", + "谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。 \n", + "\n", + "### 助手\n", + "<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调的通用信息抽取模型。\n", + "\n", + "[LABELS]<|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://100.111.18.38:6408/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"谷歌公司是一家全球知名的科技公司,总部位于美国的加利福尼亚州山景市。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [], \"地理位置\": [\"美国\", \"加利福尼亚州山景市\"], \"组织机构\": [\"谷歌公司\"]}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "-----------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", + "for d in test_dataset[:5]:\n", + " system = d['system']\n", + " user = d['user']\n", + " assistant = d['assistant']\n", + " input_ids = tokenize_function(system, user, None, tokenizer)['input_ids']\n", + " print(f'[TEST]{tokenizer.decode(input_ids)}', end='')\n", + " input_ids = torch.tensor(input_ids)[None].cuda()\n", + " attention_mask = torch.ones_like(input_ids)\n", + " generate_ids = model.generate(input_ids=input_ids, max_new_tokens=512,\n", + " attention_mask=attention_mask,\n", + " streamer=streamer, pad_token_id=tokenizer.eos_token_id, \n", + " temperature=0.7, top_k=50, top_p=0.7, do_sample=True)\n", + " print()\n", + " print(f'[LABELS]{assistant}')\n", + " print('-----------------------------------------------------------------------------------')\n", + " # input('next[ENTER]')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hackathon", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch/llm_agent/chatglm2_sft.ipynb b/examples/pytorch/llm_agent/chatglm2_sft.ipynb new file mode 100644 index 00000000..f1943086 --- /dev/null +++ b/examples/pytorch/llm_agent/chatglm2_sft.ipynb @@ -0,0 +1,1917 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ChatGLM2 + Lora + Agent\n", + "ChatGLM2-6B 是开源中英双语对话模型 ChatGLM-6B 的第二代版本,在保留了初代模型对话流畅、部署门槛较低等众多优秀特性的基础之上,ChatGLM2-6B 引入了如下新特性:\n", + "\n", + "1. 更强大的性能:基于 ChatGLM 初代模型的开发经验,我们全面升级了 ChatGLM2-6B 的基座模型。ChatGLM2-6B 使用了 GLM 的混合目标函数,经过了 1.4T 中英标识符的预训练与人类偏好对齐训练,评测结果显示,相比于初代模型,ChatGLM2-6B 在 MMLU(+23%)、CEval(+33%)、GSM8K(+571%) 、BBH(+60%)等数据集上的性能取得了大幅度的提升,在同尺寸开源模型中具有较强的竞争力。\n", + "\n", + "2. 更长的上下文:基于 FlashAttention 技术,我们将基座模型的上下文长度(Context Length)由 ChatGLM-6B 的 2K 扩展到了 32K,并在对话阶段使用 8K 的上下文长度训练,允许更多轮次的对话。但当前版本的 ChatGLM2-6B 对单轮超长文档的理解能力有限,我们会在后续迭代升级中着重进行优化。\n", + "\n", + "3. 更高效的推理:基于 Multi-Query Attention 技术,ChatGLM2-6B 有更高效的推理速度和更低的显存占用:在官方的模型实现下,推理速度相比初代提升了 42%,INT4 量化下,6G 显存支持的对话长度由 1K 提升到了 8K。" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Ref: https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary\n", + "2. 以下脚本可以在2*A10环境下正常运行, 大概占用40G显存\n", + "3. python>=3.8" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 配置实验环境\n", + "The following code is copied from baichuan_sft.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install modelscope\n", + "# !pip install numpy pandas matplotlib scikit-learn\n", + "# !pip install transformers datasets\n", + "# !conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia\n", + "# !pip install tqdm tensorboard torchmetrics sentencepiece charset_normalizer accelerate\n", + "\n", + "# !pip install numpy -U # Resolve torchmetrics dependencies and update numpy" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-07-02 20:34:35,987] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 20:34:36,464 - modelscope - INFO - PyTorch version 2.0.1 Found.\n", + "2023-07-02 20:34:36,465 - modelscope - INFO - Loading ast index from /home/hackathon/.cache/modelscope/ast_indexer\n", + "2023-07-02 20:34:36,489 - modelscope - INFO - Loading done! Current index file version is 1.6.2, with md5 ddf811ee982377c1357284a2bfda3dec and a total number of 861 components indexed\n", + "2023-07-02 20:34:37,158 - modelscope - INFO - [0, 1]\n", + "2023-07-02 20:34:37,324 - modelscope - INFO - Using device: cuda:0,1\n", + "2023-07-02 20:34:37,326 - modelscope - INFO - Global seed set to 42\n" + ] + } + ], + "source": [ + "from _common import *\n", + "device_ids = [0, 1]\n", + "select_device(device_ids)\n", + "_ = seed_everything(42)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Model, Tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 20:34:37,660 - modelscope - INFO - Development mode use revision: v1.0.3\n", + "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n", + "The tokenizer class you load from this checkpoint is 'ChatGLMTokenizer'. \n", + "The class this function is called from is 'ChatGLM2Tokenizer'.\n", + "2023-07-02 20:34:38,020 - modelscope - INFO - initialize model from /home/hackathon/.cache/modelscope/hub/ZhipuAI/chatglm2-6b\n", + "Failed to load cpm_kernels:No module named 'cpm_kernels'\n", + "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "51826d090fb740e0a7d514e543af843b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/7 [00:00': 1, '': 2, '': 2}\n", + "2023-07-02 20:34:45,152 - modelscope - INFO - bos_token_id: 1, eos_token_id: 2, pad_token_id: 2\n" + ] + } + ], + "source": [ + "WORK_DIR = 'runs/chatglm2'\n", + "LORA_TARGET_MODULES = ['query_key_value']\n", + "#\n", + "model_dir = snapshot_download('ZhipuAI/chatglm2-6b', 'v1.0.6')\n", + "model, tokenizer = get_chatglm2_model_tokenizer(model_dir)\n", + "#\n", + "GRADIENT_CHECKPOINTING = True\n", + "if GRADIENT_CHECKPOINTING:\n", + " model.gradient_checkpointing_enable()\n", + " model.enable_input_require_grads()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 准备Lora\n", + "The following code is copied from baichun.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 20:34:45,215 - modelscope - INFO - lora_config: LoRAConfig(rank=8, replace_modules=['query_key_value'], lora_alpha=32, lora_dropout=0.1, merge_weights=True, use_merged_linear=False, enable_lora=None, fan_in_fan_out=False, bias='none', only_lora_trainable=True, pretrained_weights=None)\n", + "2023-07-02 20:34:49,932 - modelscope - INFO - transformer.embedding.word_embeddings.weight: requires_grad=False\n", + "2023-07-02 20:34:49,933 - modelscope - INFO - transformer.encoder.layers.0.input_layernorm.weight: requires_grad=False\n", + "2023-07-02 20:34:49,933 - modelscope - INFO - transformer.encoder.layers.0.self_attention.query_key_value.weight: requires_grad=False\n", + "2023-07-02 20:34:49,933 - modelscope - INFO - transformer.encoder.layers.0.self_attention.query_key_value.bias: requires_grad=False\n", + "2023-07-02 20:34:49,934 - modelscope - INFO - transformer.encoder.layers.0.self_attention.query_key_value.lora_A: requires_grad=True\n", + "2023-07-02 20:34:49,934 - modelscope - INFO - transformer.encoder.layers.0.self_attention.query_key_value.lora_B: requires_grad=True\n", + "2023-07-02 20:34:49,934 - modelscope - INFO - transformer.encoder.layers.0.self_attention.dense.weight: requires_grad=False\n", + "2023-07-02 20:34:49,934 - modelscope - INFO - transformer.encoder.layers.0.post_attention_layernorm.weight: requires_grad=False\n", + "2023-07-02 20:34:49,935 - modelscope - INFO - transformer.encoder.layers.0.mlp.dense_h_to_4h.weight: requires_grad=False\n", + "2023-07-02 20:34:49,935 - modelscope - INFO - transformer.encoder.layers.0.mlp.dense_4h_to_h.weight: requires_grad=False\n", + "2023-07-02 20:34:49,936 - modelscope - INFO - transformer.encoder.layers.1.input_layernorm.weight: requires_grad=False\n", + "2023-07-02 20:34:49,936 - modelscope - INFO - transformer.encoder.layers.1.self_attention.query_key_value.weight: requires_grad=False\n", + "2023-07-02 20:34:49,936 - modelscope - INFO - transformer.encoder.layers.1.self_attention.query_key_value.bias: requires_grad=False\n", + "2023-07-02 20:34:49,937 - modelscope - INFO - transformer.encoder.layers.1.self_attention.query_key_value.lora_A: requires_grad=True\n", + "2023-07-02 20:34:49,937 - modelscope - INFO - transformer.encoder.layers.1.self_attention.query_key_value.lora_B: requires_grad=True\n", + "2023-07-02 20:34:49,937 - modelscope - INFO - transformer.encoder.layers.1.self_attention.dense.weight: requires_grad=False\n", + "2023-07-02 20:34:49,938 - modelscope - INFO - transformer.encoder.layers.1.post_attention_layernorm.weight: requires_grad=False\n", + "2023-07-02 20:34:49,938 - modelscope - INFO - transformer.encoder.layers.1.mlp.dense_h_to_4h.weight: requires_grad=False\n", + "2023-07-02 20:34:49,938 - modelscope - INFO - transformer.encoder.layers.1.mlp.dense_4h_to_h.weight: requires_grad=False\n", + "2023-07-02 20:34:49,938 - modelscope - INFO - transformer.encoder.layers.2.input_layernorm.weight: requires_grad=False\n", + "2023-07-02 20:34:49,939 - modelscope - INFO - ...\n", + "2023-07-02 20:34:49,941 - modelscope - INFO - ChatGLM2ForConditionalGeneration: 6245.5337M Params (1.9497M Trainable), 0.0000M Buffers.\n", + "2023-07-02 20:34:49,942 - modelscope - INFO - device: cuda:0, dtype: torch.float16\n" + ] + }, + { + "data": { + "text/plain": [ + "ChatGLM2ForConditionalGeneration(\n", + " (transformer): ChatGLMModel(\n", + " (embedding): Embedding(\n", + " (word_embeddings): Embedding(65024, 4096)\n", + " )\n", + " (rotary_pos_emb): RotaryEmbedding()\n", + " (encoder): GLMTransformer(\n", + " (layers): ModuleList(\n", + " (0-27): 28 x GLMBlock(\n", + " (input_layernorm): RMSNorm()\n", + " (self_attention): SelfAttention(\n", + " (query_key_value): Linear(\n", + " in_features=4096, out_features=4608, bias=True\n", + " (lora_dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (core_attention): CoreAttention(\n", + " (attention_dropout): Dropout(p=0.0, inplace=False)\n", + " )\n", + " (dense): Linear(in_features=4096, out_features=4096, bias=False)\n", + " )\n", + " (post_attention_layernorm): RMSNorm()\n", + " (mlp): MLP(\n", + " (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)\n", + " (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)\n", + " )\n", + " )\n", + " )\n", + " (final_layernorm): RMSNorm()\n", + " )\n", + " (output_layer): Linear(in_features=4096, out_features=65024, bias=False)\n", + " )\n", + ")" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "LORA_RANK = 8\n", + "LORA_ALPHA = 32\n", + "LORA_DROPOUT_P = 0.1\n", + "lora_config = LoRAConfig(\n", + " replace_modules=LORA_TARGET_MODULES,\n", + " rank=LORA_RANK,\n", + " lora_alpha=LORA_ALPHA,\n", + " lora_dropout=LORA_DROPOUT_P)\n", + "logger.info(f'lora_config: {lora_config}')\n", + "Swift.prepare_model(model, lora_config)\n", + "#\n", + "show_freeze_layers(model)\n", + "print_model_info(model)\n", + "_p = list(model.parameters())[100]\n", + "logger.info(f'device: {_p.device}, dtype: {_p.dtype}')\n", + "model.bfloat16()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入Dataset\n", + "The following code is copied from baichuan_sft.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 20:34:50,040 - modelscope - INFO - No subset_name specified, defaulting to the default\n", + "2023-07-02 20:34:50,479 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n", + "2023-07-02 20:34:50,479 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n", + "2023-07-02 20:34:50,480 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/8c9e7b1aa666c8840cb938d877f2b99f\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dac0fb3841854f6f867f0c639c6b2176", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading data files: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "beada7f3eb734a6485034e666e60285f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Extracting data files: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5036/5036 [00:12<00:00, 403.83it/s]\n", + "2023-07-02 20:35:03,823 - modelscope - INFO - No subset_name specified, defaulting to the default\n", + "2023-07-02 20:35:04,269 - modelscope - WARNING - Reusing dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n", + "2023-07-02 20:35:04,270 - modelscope - INFO - Generating dataset ms_hackathon_23_agent_train_dev (/home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files)\n", + "2023-07-02 20:35:04,270 - modelscope - INFO - Reusing cached meta-data file: /home/hackathon/.cache/modelscope/hub/datasets/modelscope/ms_hackathon_23_agent_train_dev/master/data_files/941b733ec0354c2172a3386d8788bb37\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "82cacd1b06864eabb4e320a93d41691c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading data files: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "37d5dbf851b745fb90b12cb1e4167732", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Extracting data files: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 285/285 [00:00<00:00, 380.76it/s]\n", + "2023-07-02 20:35:05,192 - modelscope - INFO - Dataset Token Length: 888.357487±349.060492, min=48.000000, max=2039.000000, size=4982\n", + "2023-07-02 20:35:05,192 - modelscope - INFO - Dataset Token Length: 928.654804±330.133929, min=74.000000, max=1959.000000, size=281\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INPUT_IDS] 你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。1. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://109.199.101.10:1485/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "2. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://9.32.64.200:5873/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}}\n", + "\n", + "3. {\"plugin_name\": \"modelscope_text-ie\", \"plugin_owner\": \"ModelScopeGPT\", \"plugin_type\": \"default\", \"plugin_schema_for_model\": {\"name\": \"modelscope_text-ie\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"url\": \"http://54.149.78.185:3979/\", \"paths\": [{\"name\": \"modelscope_text-ie\", \"model_id\": \"/damo/nlp_structbert_siamese-uie_chinese-base\", \"method\": \"post\", \"description\": \"针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示\", \"parameters\": [{\"name\": \"text\", \"description\": \"用户输入的文本\", \"required\": \"True\"}, {\"name\": \"schema\", \"description\": \"要抽取信息的json表示\", \"required\": \"True\"}]}]}} \n", + "\n", + "### 用户\n", + "按照给定的schema抽取出下面文本对应的信息\n", + "schema:{\"人物\": null, \"地理位置\": null, \"组织机构\": null}\n", + "近日,美国政府宣布将对中国1000多种商品加征关税,并威胁进一步加征关税。 \n", + "\n", + "### 助手\n", + " <|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://9.32.64.200:5873/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"近日,美国政府宣布将对中国1000多种商品加征关税,并威胁进一步加征关税。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n", + "\n", + "[LABLES] ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ ⁇ <|startofthink|>```JSON\n", + "{\"api_name\": \"modelscope_text-ie\", \"url\": \"http://9.32.64.200:5873/damo/nlp_structbert_siamese-uie_chinese-base\", \"parameters\": {\"text\": \"近日,美国政府宣布将对中国1000多种商品加征关税,并威胁进一步加征关税。\", \"schema\": \"{\\\"人物\\\": null, \\\"地理位置\\\": null, \\\"组织机构\\\": null}\"}}\n", + "```<|endofthink|>\n", + "\n", + "<|startofexec|>```JSON\n", + "{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}\n", + "```<|endofexec|>\n", + "信息抽取结果:{\"人物\": [], \"地理位置\": [\"中国\", \"美国\"], \"组织机构\": []}。我使用的模型是ModelScope的'damo/nlp_structbert_siamese-uie_chinese-base'模型。这是一个基于StructBERT预训练模型微调训练的通用信息抽取模型。\n" + ] + } + ], + "source": [ + "tokenize_function = partial(tokenize_function, tokenizer=tokenizer)\n", + "train_dataset = make_dataset('train', tokenize_function)\n", + "val_dataset = make_dataset('validation', tokenize_function)\n", + "# Data analysis\n", + "stat_dataset(train_dataset)\n", + "stat_dataset(val_dataset)\n", + "data_collate_fn = partial(data_collate_fn, tokenizer=tokenizer)\n", + "print_examples(train_dataset[0], tokenizer)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 配置Config\n", + "The following code is copied from baichuan_sft.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 20:35:05,244 - modelscope - INFO - work_dir: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505\n" + ] + } + ], + "source": [ + "cfg_file = os.path.join(model_dir, 'configuration.json')\n", + "#\n", + "BATCH_SIZE = 1\n", + "MAX_EPOCHS = 1\n", + "T_max = get_T_max(len(train_dataset), BATCH_SIZE, MAX_EPOCHS, True)\n", + "WORK_DIR = get_work_dir(WORK_DIR)\n", + "EVAL_INTERVAL = 200\n", + "CONFIG = Config({\n", + " 'train': {\n", + " 'dataloader': {\n", + " 'batch_size_per_gpu': BATCH_SIZE,\n", + " 'workers_per_gpu': 1,\n", + " 'shuffle': True,\n", + " 'drop_last': True,\n", + " 'pin_memory': True\n", + " },\n", + " 'max_epochs': MAX_EPOCHS,\n", + " 'work_dir': WORK_DIR,\n", + " 'optimizer': {\n", + " 'type': 'AdamW',\n", + " 'lr': 1e-4,\n", + " 'weight_decay': 0.01,\n", + " 'options': {\n", + " 'cumulative_iters': 16, 'grad_clip': {\n", + " 'norm_type': 2,\n", + " 'max_norm': 2.0\n", + " }\n", + " }\n", + " },\n", + " 'lr_scheduler': {\n", + " 'type': 'CosineAnnealingLR',\n", + " 'T_max': T_max,\n", + " 'eta_min': 1e-5,\n", + " 'options': {\n", + " 'by_epoch': False,\n", + " 'warmup': {\n", + " 'type': 'LinearWarmup',\n", + " 'warmup_ratio': 0.1,\n", + " 'warmup_iters': 200\n", + " }\n", + " }\n", + " },\n", + " 'hooks': [\n", + " {'type': 'CheckpointHook', 'by_epoch': False, 'interval': EVAL_INTERVAL, 'max_checkpoint_num': 1},\n", + " {'type': 'EvaluationHook', 'by_epoch': False, 'interval': EVAL_INTERVAL},\n", + " {'type': 'BestCkptSaverHook',\n", + " 'metric_key': 'acc',\n", + " 'save_best': True, 'rule': 'max', 'max_checkpoint_num': 1},\n", + " {'type': 'TextLoggerHook',\n", + " 'by_epoch': True, # Whether EpochBasedTrainer is used\n", + " 'interval': 5},\n", + " {'type': 'TensorboardHook', 'by_epoch': False, 'interval': 5}\n", + " ]\n", + " },\n", + " 'evaluation': {\n", + " 'dataloader': {\n", + " 'batch_size_per_gpu': BATCH_SIZE,\n", + " 'workers_per_gpu': 1,\n", + " 'shuffle': False,\n", + " 'drop_last': False,\n", + " 'pin_memory': True\n", + " },\n", + " 'metrics': [\n", + " {'type': 'my_metric', 'vocab_size': tokenizer.vocab_size}\n", + " ]\n", + " }\n", + "})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 微调\n", + "The following code is copied from baichuan_sft.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-02 20:35:05,284 - modelscope - INFO - ==========================Training Config Start==========================\n", + "2023-07-02 20:35:05,285 - modelscope - INFO - {\n", + " \"framework\": \"pytorch\",\n", + " \"task\": \"chat\",\n", + " \"pipeline\": {\n", + " \"type\": \"chatglm26b-text-generation\"\n", + " },\n", + " \"allow_remote\": true,\n", + " \"train\": {\n", + " \"hooks\": [\n", + " {\n", + " \"type\": \"TensorboardHook\",\n", + " \"by_epoch\": false,\n", + " \"interval\": 5\n", + " }\n", + " ],\n", + " \"dataloader\": {\n", + " \"batch_size_per_gpu\": 1,\n", + " \"workers_per_gpu\": 1,\n", + " \"shuffle\": true,\n", + " \"drop_last\": true,\n", + " \"pin_memory\": true\n", + " },\n", + " \"max_epochs\": 1,\n", + " \"work_dir\": \"/home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505\",\n", + " \"optimizer\": {\n", + " \"type\": \"AdamW\",\n", + " \"lr\": 0.0001,\n", + " \"weight_decay\": 0.01,\n", + " \"options\": {\n", + " \"cumulative_iters\": 16,\n", + " \"grad_clip\": {\n", + " \"norm_type\": 2,\n", + " \"max_norm\": 2.0\n", + " }\n", + " }\n", + " },\n", + " \"lr_scheduler\": {\n", + " \"type\": \"CosineAnnealingLR\",\n", + " \"T_max\": 4982,\n", + " \"eta_min\": 1e-05,\n", + " \"options\": {\n", + " \"by_epoch\": false,\n", + " \"warmup\": {\n", + " \"type\": \"LinearWarmup\",\n", + " \"warmup_ratio\": 0.1,\n", + " \"warmup_iters\": 200\n", + " }\n", + " }\n", + " },\n", + " \"checkpoint\": {\n", + " \"period\": {\n", + " \"by_epoch\": false,\n", + " \"interval\": 200,\n", + " \"max_checkpoint_num\": 1\n", + " },\n", + " \"best\": {\n", + " \"metric_key\": \"acc\",\n", + " \"save_best\": true,\n", + " \"rule\": \"max\",\n", + " \"max_checkpoint_num\": 1\n", + " }\n", + " },\n", + " \"logging\": {\n", + " \"by_epoch\": true,\n", + " \"interval\": 5\n", + " }\n", + " },\n", + " \"evaluation\": {\n", + " \"dataloader\": {\n", + " \"batch_size_per_gpu\": 1,\n", + " \"workers_per_gpu\": 1,\n", + " \"shuffle\": false,\n", + " \"drop_last\": false,\n", + " \"pin_memory\": true\n", + " },\n", + " \"metrics\": [\n", + " {\n", + " \"type\": \"my_metric\",\n", + " \"vocab_size\": 64794\n", + " }\n", + " ],\n", + " \"period\": {\n", + " \"by_epoch\": false,\n", + " \"interval\": 200\n", + " }\n", + " }\n", + "}\n", + "2023-07-02 20:35:05,285 - modelscope - INFO - ===========================Training Config End===========================\n", + "2023-07-02 20:35:05,286 - modelscope - WARNING - ('OPTIMIZER', 'default', 'AdamW') not found in ast index file\n", + "2023-07-02 20:35:05,287 - modelscope - WARNING - ('LR_SCHEDULER', 'default', 'CosineAnnealingLR') not found in ast index file\n", + "2023-07-02 20:35:05,289 - modelscope - INFO - Stage: before_run:\n", + " (ABOVE_NORMAL) OptimizerHook \n", + " (LOW ) LrSchedulerHook \n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: before_train_epoch:\n", + " (LOW ) LrSchedulerHook \n", + " -------------------- \n", + "Stage: before_train_iter:\n", + " (ABOVE_NORMAL) OptimizerHook \n", + " -------------------- \n", + "Stage: after_train_iter:\n", + " (ABOVE_NORMAL) OptimizerHook \n", + " (NORMAL ) EvaluationHook \n", + " (LOW ) LrSchedulerHook \n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: after_train_epoch:\n", + " (NORMAL ) EvaluationHook \n", + " (LOW ) LrSchedulerHook \n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: after_val_epoch:\n", + " (VERY_LOW ) TextLoggerHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "Stage: after_run:\n", + " (LOW ) BestCkptSaverHook \n", + " (LOW ) CheckpointHook \n", + " (VERY_LOW ) TensorboardHook \n", + " -------------------- \n", + "2023-07-02 20:35:05,293 - modelscope - INFO - Checkpoints will be saved to /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505\n", + "2023-07-02 20:35:05,296 - modelscope - INFO - Checkpoints will be saved to /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505\n", + "2023-07-02 20:35:05,296 - modelscope - INFO - Text logs will be saved to /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505\n", + "2023-07-02 20:35:05,296 - modelscope - INFO - tensorboard files will be saved to /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/tensorboard_output\n", + "2023-07-02 20:35:09,665 - modelscope - INFO - epoch [1][5/4982]\tlr: 1.000e-05, memory: 9310, loss: 4.4797\n", + "2023-07-02 20:35:11,753 - modelscope - INFO - epoch [1][10/4982]\tlr: 1.000e-05, memory: 9653, loss: 4.4281\n", + "2023-07-02 20:35:15,111 - modelscope - INFO - epoch [1][15/4982]\tlr: 1.000e-05, memory: 11498, loss: 5.4297\n", + "2023-07-02 20:35:18,142 - modelscope - INFO - epoch [1][20/4982]\tlr: 1.225e-05, memory: 12041, loss: 2.6703\n", + "2023-07-02 20:35:21,335 - modelscope - INFO - epoch [1][25/4982]\tlr: 1.450e-05, memory: 12041, loss: 2.5969\n", + "2023-07-02 20:35:24,524 - modelscope - INFO - epoch [1][30/4982]\tlr: 1.675e-05, memory: 12180, loss: 2.7797\n", + "2023-07-02 20:35:27,061 - modelscope - INFO - epoch [1][35/4982]\tlr: 1.900e-05, memory: 12180, loss: 5.0344\n", + "2023-07-02 20:35:29,749 - modelscope - INFO - epoch [1][40/4982]\tlr: 2.125e-05, memory: 12180, loss: 6.1875\n", + "2023-07-02 20:35:32,140 - modelscope - INFO - epoch [1][45/4982]\tlr: 2.350e-05, memory: 12180, loss: 4.5844\n", + "2023-07-02 20:35:35,367 - modelscope - INFO - epoch [1][50/4982]\tlr: 2.575e-05, memory: 12180, loss: 3.3578\n", + "2023-07-02 20:35:37,739 - modelscope - INFO - epoch [1][55/4982]\tlr: 2.800e-05, memory: 12180, loss: 3.0375\n", + "2023-07-02 20:35:41,595 - modelscope - INFO - epoch [1][60/4982]\tlr: 3.025e-05, memory: 12180, loss: 2.7219\n", + "2023-07-02 20:35:44,105 - modelscope - INFO - epoch [1][65/4982]\tlr: 3.250e-05, memory: 12180, loss: 4.8016\n", + "2023-07-02 20:35:46,069 - modelscope - INFO - epoch [1][70/4982]\tlr: 3.475e-05, memory: 12180, loss: 6.9406\n", + "2023-07-02 20:35:48,149 - modelscope - INFO - epoch [1][75/4982]\tlr: 3.700e-05, memory: 12180, loss: 3.2133\n", + "2023-07-02 20:35:50,371 - modelscope - INFO - epoch [1][80/4982]\tlr: 3.925e-05, memory: 12180, loss: 4.3719\n", + "2023-07-02 20:35:53,531 - modelscope - INFO - epoch [1][85/4982]\tlr: 4.150e-05, memory: 12180, loss: 5.8875\n", + "2023-07-02 20:35:55,682 - modelscope - INFO - epoch [1][90/4982]\tlr: 4.375e-05, memory: 12180, loss: 4.9297\n", + "2023-07-02 20:35:57,349 - modelscope - INFO - epoch [1][95/4982]\tlr: 4.600e-05, memory: 12180, loss: 5.8781\n", + "2023-07-02 20:36:00,218 - modelscope - INFO - epoch [1][100/4982]\tlr: 4.825e-05, memory: 12180, loss: 2.4125\n", + "2023-07-02 20:36:02,674 - modelscope - INFO - epoch [1][105/4982]\tlr: 5.050e-05, memory: 12180, loss: 6.7234\n", + "2023-07-02 20:36:05,443 - modelscope - INFO - epoch [1][110/4982]\tlr: 5.275e-05, memory: 12180, loss: 3.7437\n", + "2023-07-02 20:36:08,231 - modelscope - INFO - epoch [1][115/4982]\tlr: 5.500e-05, memory: 12180, loss: 4.5187\n", + "2023-07-02 20:36:10,992 - modelscope - INFO - epoch [1][120/4982]\tlr: 5.725e-05, memory: 12180, loss: 4.3281\n", + "2023-07-02 20:36:12,907 - modelscope - INFO - epoch [1][125/4982]\tlr: 5.950e-05, memory: 12180, loss: 4.4422\n", + "2023-07-02 20:36:16,210 - modelscope - INFO - epoch [1][130/4982]\tlr: 6.175e-05, memory: 12992, loss: 5.8688\n", + "2023-07-02 20:36:18,791 - modelscope - INFO - epoch [1][135/4982]\tlr: 6.400e-05, memory: 12992, loss: 3.2531\n", + "2023-07-02 20:36:19,911 - modelscope - INFO - epoch [1][140/4982]\tlr: 6.625e-05, memory: 12992, loss: 5.1781\n", + "2023-07-02 20:36:22,445 - modelscope - INFO - epoch [1][145/4982]\tlr: 6.850e-05, memory: 12992, loss: 3.4523\n", + "2023-07-02 20:36:24,826 - modelscope - INFO - epoch [1][150/4982]\tlr: 7.075e-05, memory: 12992, loss: 4.6125\n", + "2023-07-02 20:36:26,567 - modelscope - INFO - epoch [1][155/4982]\tlr: 7.300e-05, memory: 12992, loss: 4.0859\n", + "2023-07-02 20:36:29,936 - modelscope - INFO - epoch [1][160/4982]\tlr: 7.525e-05, memory: 12992, loss: 3.4937\n", + "2023-07-02 20:36:32,253 - modelscope - INFO - epoch [1][165/4982]\tlr: 7.750e-05, memory: 12992, loss: 5.8266\n", + "2023-07-02 20:36:34,867 - modelscope - INFO - epoch [1][170/4982]\tlr: 7.975e-05, memory: 12992, loss: 2.7047\n", + "2023-07-02 20:36:38,118 - modelscope - INFO - epoch [1][175/4982]\tlr: 8.200e-05, memory: 12992, loss: 2.5844\n", + "2023-07-02 20:36:40,913 - modelscope - INFO - epoch [1][180/4982]\tlr: 8.425e-05, memory: 12992, loss: 3.9641\n", + "2023-07-02 20:36:43,807 - modelscope - INFO - epoch [1][185/4982]\tlr: 8.650e-05, memory: 12992, loss: 3.1375\n", + "2023-07-02 20:36:46,624 - modelscope - INFO - epoch [1][190/4982]\tlr: 8.875e-05, memory: 12992, loss: 3.8813\n", + "2023-07-02 20:36:49,527 - modelscope - INFO - epoch [1][195/4982]\tlr: 9.100e-05, memory: 12992, loss: 3.6156\n", + "2023-07-02 20:36:51,833 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:05<00:00, 4.29it/s]\n", + "2023-07-02 20:37:57,381 - modelscope - INFO - Saving checkpoint at 200 iter\n", + "2023-07-02 20:37:57,410 - modelscope - INFO - Saving checkpoint at 200 iter\n", + "2023-07-02 20:37:57,436 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 12992, evaluation/acc: 0.6542, evaluation/loss: 3.4747, loss: 4.5406\n", + "2023-07-02 20:38:00,375 - modelscope - INFO - epoch [1][205/4982]\tlr: 9.550e-05, memory: 12992, loss: 3.8125\n", + "2023-07-02 20:38:03,071 - modelscope - INFO - epoch [1][210/4982]\tlr: 9.775e-05, memory: 12992, loss: 4.4109\n", + "2023-07-02 20:38:06,715 - modelscope - INFO - epoch [1][215/4982]\tlr: 1.000e-04, memory: 12992, loss: 2.2437\n", + "2023-07-02 20:38:09,499 - modelscope - INFO - epoch [1][220/4982]\tlr: 9.998e-05, memory: 12992, loss: 3.2750\n", + "2023-07-02 20:38:13,188 - modelscope - INFO - epoch [1][225/4982]\tlr: 9.996e-05, memory: 13730, loss: 3.2656\n", + "2023-07-02 20:38:15,237 - modelscope - INFO - epoch [1][230/4982]\tlr: 9.994e-05, memory: 13730, loss: 4.3750\n", + "2023-07-02 20:38:17,706 - modelscope - INFO - epoch [1][235/4982]\tlr: 9.992e-05, memory: 13730, loss: 3.2844\n", + "2023-07-02 20:38:20,429 - modelscope - INFO - epoch [1][240/4982]\tlr: 9.990e-05, memory: 13730, loss: 2.9766\n", + "2023-07-02 20:38:23,127 - modelscope - INFO - epoch [1][245/4982]\tlr: 9.988e-05, memory: 13730, loss: 4.4125\n", + "2023-07-02 20:38:26,058 - modelscope - INFO - epoch [1][250/4982]\tlr: 9.986e-05, memory: 13730, loss: 2.3047\n", + "2023-07-02 20:38:28,740 - modelscope - INFO - epoch [1][255/4982]\tlr: 9.984e-05, memory: 13730, loss: 3.5484\n", + "2023-07-02 20:38:31,332 - modelscope - INFO - epoch [1][260/4982]\tlr: 9.982e-05, memory: 13730, loss: 4.4297\n", + "2023-07-02 20:38:33,632 - modelscope - INFO - epoch [1][265/4982]\tlr: 9.980e-05, memory: 13730, loss: 5.1078\n", + "2023-07-02 20:38:35,634 - modelscope - INFO - epoch [1][270/4982]\tlr: 9.977e-05, memory: 13730, loss: 4.2250\n", + "2023-07-02 20:38:37,731 - modelscope - INFO - epoch [1][275/4982]\tlr: 9.975e-05, memory: 13730, loss: 4.5984\n", + "2023-07-02 20:38:39,950 - modelscope - INFO - epoch [1][280/4982]\tlr: 9.973e-05, memory: 13730, loss: 4.0594\n", + "2023-07-02 20:38:42,470 - modelscope - INFO - epoch [1][285/4982]\tlr: 9.970e-05, memory: 13730, loss: 2.6523\n", + "2023-07-02 20:38:45,483 - modelscope - INFO - epoch [1][290/4982]\tlr: 9.968e-05, memory: 13730, loss: 2.5766\n", + "2023-07-02 20:38:47,773 - modelscope - INFO - epoch [1][295/4982]\tlr: 9.965e-05, memory: 13730, loss: 2.7078\n", + "2023-07-02 20:38:51,126 - modelscope - INFO - epoch [1][300/4982]\tlr: 9.963e-05, memory: 13730, loss: 5.0844\n", + "2023-07-02 20:38:53,948 - modelscope - INFO - epoch [1][305/4982]\tlr: 9.960e-05, memory: 13730, loss: 3.3844\n", + "2023-07-02 20:38:56,666 - modelscope - INFO - epoch [1][310/4982]\tlr: 9.958e-05, memory: 13730, loss: 3.1812\n", + "2023-07-02 20:38:59,269 - modelscope - INFO - epoch [1][315/4982]\tlr: 9.955e-05, memory: 13730, loss: 3.3219\n", + "2023-07-02 20:39:02,576 - modelscope - INFO - epoch [1][320/4982]\tlr: 9.952e-05, memory: 13730, loss: 2.0031\n", + "2023-07-02 20:39:04,494 - modelscope - INFO - epoch [1][325/4982]\tlr: 9.949e-05, memory: 13730, loss: 3.7469\n", + "2023-07-02 20:39:07,068 - modelscope - INFO - epoch [1][330/4982]\tlr: 9.947e-05, memory: 13730, loss: 3.0187\n", + "2023-07-02 20:39:09,719 - modelscope - INFO - epoch [1][335/4982]\tlr: 9.944e-05, memory: 13730, loss: 2.5828\n", + "2023-07-02 20:39:11,755 - modelscope - INFO - epoch [1][340/4982]\tlr: 9.941e-05, memory: 13730, loss: 4.1156\n", + "2023-07-02 20:39:14,258 - modelscope - INFO - epoch [1][345/4982]\tlr: 9.938e-05, memory: 13730, loss: 5.1594\n", + "2023-07-02 20:39:16,436 - modelscope - INFO - epoch [1][350/4982]\tlr: 9.935e-05, memory: 13730, loss: 4.0859\n", + "2023-07-02 20:39:19,643 - modelscope - INFO - epoch [1][355/4982]\tlr: 9.932e-05, memory: 13730, loss: 1.8391\n", + "2023-07-02 20:39:22,779 - modelscope - INFO - epoch [1][360/4982]\tlr: 9.929e-05, memory: 13730, loss: 2.0641\n", + "2023-07-02 20:39:25,402 - modelscope - INFO - epoch [1][365/4982]\tlr: 9.926e-05, memory: 13730, loss: 1.9453\n", + "2023-07-02 20:39:27,813 - modelscope - INFO - epoch [1][370/4982]\tlr: 9.923e-05, memory: 13730, loss: 3.8641\n", + "2023-07-02 20:39:30,315 - modelscope - INFO - epoch [1][375/4982]\tlr: 9.920e-05, memory: 13730, loss: 3.0281\n", + "2023-07-02 20:39:33,075 - modelscope - INFO - epoch [1][380/4982]\tlr: 9.916e-05, memory: 13730, loss: 1.9109\n", + "2023-07-02 20:39:35,539 - modelscope - INFO - epoch [1][385/4982]\tlr: 9.913e-05, memory: 13730, loss: 3.9797\n", + "2023-07-02 20:39:37,804 - modelscope - INFO - epoch [1][390/4982]\tlr: 9.910e-05, memory: 13730, loss: 4.4547\n", + "2023-07-02 20:39:40,277 - modelscope - INFO - epoch [1][395/4982]\tlr: 9.906e-05, memory: 13730, loss: 2.4516\n", + "2023-07-02 20:39:43,900 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.25it/s]\n", + "2023-07-02 20:40:50,049 - modelscope - INFO - Saving checkpoint at 400 iter\n", + "2023-07-02 20:40:50,080 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter200_acc0.6542276740074158\n", + "2023-07-02 20:40:50,083 - modelscope - INFO - Saving checkpoint at 400 iter\n", + "2023-07-02 20:40:50,113 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_200\n", + "2023-07-02 20:40:50,115 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 13730, evaluation/acc: 0.6604, evaluation/loss: 3.0119, loss: 2.8062\n", + "2023-07-02 20:40:53,254 - modelscope - INFO - epoch [1][405/4982]\tlr: 9.900e-05, memory: 13730, loss: 3.2422\n", + "2023-07-02 20:40:55,618 - modelscope - INFO - epoch [1][410/4982]\tlr: 9.896e-05, memory: 13730, loss: 4.2297\n", + "2023-07-02 20:40:58,448 - modelscope - INFO - epoch [1][415/4982]\tlr: 9.893e-05, memory: 13730, loss: 3.6063\n", + "2023-07-02 20:41:00,872 - modelscope - INFO - epoch [1][420/4982]\tlr: 9.889e-05, memory: 13730, loss: 4.6141\n", + "2023-07-02 20:41:02,997 - modelscope - INFO - epoch [1][425/4982]\tlr: 9.885e-05, memory: 13730, loss: 5.2875\n", + "2023-07-02 20:41:06,866 - modelscope - INFO - epoch [1][430/4982]\tlr: 9.882e-05, memory: 13730, loss: 2.2109\n", + "2023-07-02 20:41:09,155 - modelscope - INFO - epoch [1][435/4982]\tlr: 9.878e-05, memory: 13730, loss: 2.5969\n", + "2023-07-02 20:41:11,158 - modelscope - INFO - epoch [1][440/4982]\tlr: 9.874e-05, memory: 13730, loss: 3.1453\n", + "2023-07-02 20:41:13,695 - modelscope - INFO - epoch [1][445/4982]\tlr: 9.870e-05, memory: 13730, loss: 4.1219\n", + "2023-07-02 20:41:16,481 - modelscope - INFO - epoch [1][450/4982]\tlr: 9.867e-05, memory: 13730, loss: 3.0016\n", + "2023-07-02 20:41:19,595 - modelscope - INFO - epoch [1][455/4982]\tlr: 9.863e-05, memory: 13730, loss: 2.0086\n", + "2023-07-02 20:41:22,798 - modelscope - INFO - epoch [1][460/4982]\tlr: 9.859e-05, memory: 13730, loss: 1.6477\n", + "2023-07-02 20:41:24,516 - modelscope - INFO - epoch [1][465/4982]\tlr: 9.855e-05, memory: 13730, loss: 5.0250\n", + "2023-07-02 20:41:26,807 - modelscope - INFO - epoch [1][470/4982]\tlr: 9.851e-05, memory: 13730, loss: 5.0906\n", + "2023-07-02 20:41:29,550 - modelscope - INFO - epoch [1][475/4982]\tlr: 9.847e-05, memory: 13730, loss: 3.1719\n", + "2023-07-02 20:41:31,558 - modelscope - INFO - epoch [1][480/4982]\tlr: 9.843e-05, memory: 13730, loss: 3.0094\n", + "2023-07-02 20:41:34,367 - modelscope - INFO - epoch [1][485/4982]\tlr: 9.839e-05, memory: 13730, loss: 1.8000\n", + "2023-07-02 20:41:37,084 - modelscope - INFO - epoch [1][490/4982]\tlr: 9.834e-05, memory: 13730, loss: 3.2406\n", + "2023-07-02 20:41:39,602 - modelscope - INFO - epoch [1][495/4982]\tlr: 9.830e-05, memory: 13730, loss: 2.9141\n", + "2023-07-02 20:41:42,010 - modelscope - INFO - epoch [1][500/4982]\tlr: 9.826e-05, memory: 13730, loss: 3.1969\n", + "2023-07-02 20:41:44,328 - modelscope - INFO - epoch [1][505/4982]\tlr: 9.822e-05, memory: 13730, loss: 2.4125\n", + "2023-07-02 20:41:47,138 - modelscope - INFO - epoch [1][510/4982]\tlr: 9.817e-05, memory: 13730, loss: 2.3031\n", + "2023-07-02 20:41:50,494 - modelscope - INFO - epoch [1][515/4982]\tlr: 9.813e-05, memory: 13730, loss: 2.2938\n", + "2023-07-02 20:41:52,746 - modelscope - INFO - epoch [1][520/4982]\tlr: 9.808e-05, memory: 13730, loss: 3.8672\n", + "2023-07-02 20:41:54,958 - modelscope - INFO - epoch [1][525/4982]\tlr: 9.804e-05, memory: 13730, loss: 3.2156\n", + "2023-07-02 20:41:57,466 - modelscope - INFO - epoch [1][530/4982]\tlr: 9.799e-05, memory: 13730, loss: 3.0344\n", + "2023-07-02 20:42:00,137 - modelscope - INFO - epoch [1][535/4982]\tlr: 9.795e-05, memory: 13730, loss: 4.9406\n", + "2023-07-02 20:42:02,774 - modelscope - INFO - epoch [1][540/4982]\tlr: 9.790e-05, memory: 13730, loss: 3.3563\n", + "2023-07-02 20:42:05,715 - modelscope - INFO - epoch [1][545/4982]\tlr: 9.786e-05, memory: 13730, loss: 1.4797\n", + "2023-07-02 20:42:07,960 - modelscope - INFO - epoch [1][550/4982]\tlr: 9.781e-05, memory: 13730, loss: 3.8781\n", + "2023-07-02 20:42:11,011 - modelscope - INFO - epoch [1][555/4982]\tlr: 9.776e-05, memory: 13730, loss: 2.9297\n", + "2023-07-02 20:42:13,456 - modelscope - INFO - epoch [1][560/4982]\tlr: 9.771e-05, memory: 13730, loss: 3.8203\n", + "2023-07-02 20:42:15,443 - modelscope - INFO - epoch [1][565/4982]\tlr: 9.767e-05, memory: 13730, loss: 2.0219\n", + "2023-07-02 20:42:18,846 - modelscope - INFO - epoch [1][570/4982]\tlr: 9.762e-05, memory: 13730, loss: 1.9281\n", + "2023-07-02 20:42:22,121 - modelscope - INFO - epoch [1][575/4982]\tlr: 9.757e-05, memory: 13730, loss: 2.6750\n", + "2023-07-02 20:42:25,145 - modelscope - INFO - epoch [1][580/4982]\tlr: 9.752e-05, memory: 13730, loss: 1.7852\n", + "2023-07-02 20:42:27,316 - modelscope - INFO - epoch [1][585/4982]\tlr: 9.747e-05, memory: 13730, loss: 2.8047\n", + "2023-07-02 20:42:29,441 - modelscope - INFO - epoch [1][590/4982]\tlr: 9.742e-05, memory: 13730, loss: 2.6773\n", + "2023-07-02 20:42:32,360 - modelscope - INFO - epoch [1][595/4982]\tlr: 9.737e-05, memory: 13730, loss: 1.9812\n", + "2023-07-02 20:42:35,221 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.24it/s]\n", + "2023-07-02 20:43:41,520 - modelscope - INFO - Saving checkpoint at 600 iter\n", + "2023-07-02 20:43:41,550 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter400_acc0.6604225635528564\n", + "2023-07-02 20:43:41,552 - modelscope - INFO - Saving checkpoint at 600 iter\n", + "2023-07-02 20:43:41,582 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_400\n", + "2023-07-02 20:43:41,584 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 13730, evaluation/acc: 0.6708, evaluation/loss: 2.5856, loss: 2.3328\n", + "2023-07-02 20:43:43,999 - modelscope - INFO - epoch [1][605/4982]\tlr: 9.726e-05, memory: 13730, loss: 2.6875\n", + "2023-07-02 20:43:47,119 - modelscope - INFO - epoch [1][610/4982]\tlr: 9.721e-05, memory: 13730, loss: 1.4031\n", + "2023-07-02 20:43:48,961 - modelscope - INFO - epoch [1][615/4982]\tlr: 9.716e-05, memory: 13730, loss: 2.9422\n", + "2023-07-02 20:43:51,931 - modelscope - INFO - epoch [1][620/4982]\tlr: 9.711e-05, memory: 13730, loss: 2.2016\n", + "2023-07-02 20:43:55,085 - modelscope - INFO - epoch [1][625/4982]\tlr: 9.705e-05, memory: 13730, loss: 2.4344\n", + "2023-07-02 20:43:57,859 - modelscope - INFO - epoch [1][630/4982]\tlr: 9.700e-05, memory: 13730, loss: 1.9727\n", + "2023-07-02 20:44:00,652 - modelscope - INFO - epoch [1][635/4982]\tlr: 9.695e-05, memory: 13730, loss: 3.5047\n", + "2023-07-02 20:44:03,525 - modelscope - INFO - epoch [1][640/4982]\tlr: 9.689e-05, memory: 13730, loss: 2.3672\n", + "2023-07-02 20:44:06,457 - modelscope - INFO - epoch [1][645/4982]\tlr: 9.684e-05, memory: 13730, loss: 2.7797\n", + "2023-07-02 20:44:08,691 - modelscope - INFO - epoch [1][650/4982]\tlr: 9.678e-05, memory: 13730, loss: 1.9734\n", + "2023-07-02 20:44:11,608 - modelscope - INFO - epoch [1][655/4982]\tlr: 9.673e-05, memory: 13730, loss: 2.0531\n", + "2023-07-02 20:44:13,499 - modelscope - INFO - epoch [1][660/4982]\tlr: 9.667e-05, memory: 13730, loss: 2.8078\n", + "2023-07-02 20:44:15,767 - modelscope - INFO - epoch [1][665/4982]\tlr: 9.661e-05, memory: 13730, loss: 3.3703\n", + "2023-07-02 20:44:18,064 - modelscope - INFO - epoch [1][670/4982]\tlr: 9.656e-05, memory: 13730, loss: 3.2156\n", + "2023-07-02 20:44:20,955 - modelscope - INFO - epoch [1][675/4982]\tlr: 9.650e-05, memory: 13830, loss: 3.4172\n", + "2023-07-02 20:44:24,557 - modelscope - INFO - epoch [1][680/4982]\tlr: 9.644e-05, memory: 13830, loss: 1.4219\n", + "2023-07-02 20:44:27,433 - modelscope - INFO - epoch [1][685/4982]\tlr: 9.638e-05, memory: 13830, loss: 3.5094\n", + "2023-07-02 20:44:30,177 - modelscope - INFO - epoch [1][690/4982]\tlr: 9.632e-05, memory: 13830, loss: 2.3234\n", + "2023-07-02 20:44:32,790 - modelscope - INFO - epoch [1][695/4982]\tlr: 9.627e-05, memory: 13830, loss: 1.7906\n", + "2023-07-02 20:44:35,003 - modelscope - INFO - epoch [1][700/4982]\tlr: 9.621e-05, memory: 13830, loss: 3.4016\n", + "2023-07-02 20:44:38,237 - modelscope - INFO - epoch [1][705/4982]\tlr: 9.615e-05, memory: 13830, loss: 2.1484\n", + "2023-07-02 20:44:42,304 - modelscope - INFO - epoch [1][710/4982]\tlr: 9.609e-05, memory: 13830, loss: 1.9828\n", + "2023-07-02 20:44:45,293 - modelscope - INFO - epoch [1][715/4982]\tlr: 9.602e-05, memory: 13830, loss: 1.6828\n", + "2023-07-02 20:44:48,385 - modelscope - INFO - epoch [1][720/4982]\tlr: 9.596e-05, memory: 13830, loss: 2.0969\n", + "2023-07-02 20:44:50,846 - modelscope - INFO - epoch [1][725/4982]\tlr: 9.590e-05, memory: 13830, loss: 3.2031\n", + "2023-07-02 20:44:53,572 - modelscope - INFO - epoch [1][730/4982]\tlr: 9.584e-05, memory: 13830, loss: 2.8055\n", + "2023-07-02 20:44:54,918 - modelscope - INFO - epoch [1][735/4982]\tlr: 9.578e-05, memory: 13830, loss: 5.0641\n", + "2023-07-02 20:44:58,220 - modelscope - INFO - epoch [1][740/4982]\tlr: 9.572e-05, memory: 13830, loss: 2.5125\n", + "2023-07-02 20:45:01,363 - modelscope - INFO - epoch [1][745/4982]\tlr: 9.565e-05, memory: 13830, loss: 1.5758\n", + "2023-07-02 20:45:03,990 - modelscope - INFO - epoch [1][750/4982]\tlr: 9.559e-05, memory: 13830, loss: 2.3664\n", + "2023-07-02 20:45:06,603 - modelscope - INFO - epoch [1][755/4982]\tlr: 9.553e-05, memory: 13830, loss: 1.8188\n", + "2023-07-02 20:45:09,658 - modelscope - INFO - epoch [1][760/4982]\tlr: 9.546e-05, memory: 13830, loss: 2.6125\n", + "2023-07-02 20:45:12,102 - modelscope - INFO - epoch [1][765/4982]\tlr: 9.540e-05, memory: 13830, loss: 1.7031\n", + "2023-07-02 20:45:14,836 - modelscope - INFO - epoch [1][770/4982]\tlr: 9.533e-05, memory: 13830, loss: 1.7359\n", + "2023-07-02 20:45:17,436 - modelscope - INFO - epoch [1][775/4982]\tlr: 9.527e-05, memory: 13830, loss: 1.4336\n", + "2023-07-02 20:45:20,163 - modelscope - INFO - epoch [1][780/4982]\tlr: 9.520e-05, memory: 13830, loss: 2.5672\n", + "2023-07-02 20:45:23,429 - modelscope - INFO - epoch [1][785/4982]\tlr: 9.513e-05, memory: 13830, loss: 1.9164\n", + "2023-07-02 20:45:26,285 - modelscope - INFO - epoch [1][790/4982]\tlr: 9.507e-05, memory: 13830, loss: 2.3203\n", + "2023-07-02 20:45:28,656 - modelscope - INFO - epoch [1][795/4982]\tlr: 9.500e-05, memory: 13830, loss: 2.7672\n", + "2023-07-02 20:45:31,279 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 20:46:37,656 - modelscope - INFO - Saving checkpoint at 800 iter\n", + "2023-07-02 20:46:37,685 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter600_acc0.6708211898803711\n", + "2023-07-02 20:46:37,687 - modelscope - INFO - Saving checkpoint at 800 iter\n", + "2023-07-02 20:46:37,715 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_600\n", + "2023-07-02 20:46:37,718 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 13830, evaluation/acc: 0.6881, evaluation/loss: 2.2625, loss: 2.6750\n", + "2023-07-02 20:46:40,639 - modelscope - INFO - epoch [1][805/4982]\tlr: 9.486e-05, memory: 13830, loss: 1.8695\n", + "2023-07-02 20:46:43,092 - modelscope - INFO - epoch [1][810/4982]\tlr: 9.480e-05, memory: 13830, loss: 2.8734\n", + "2023-07-02 20:46:46,484 - modelscope - INFO - epoch [1][815/4982]\tlr: 9.473e-05, memory: 13830, loss: 1.7906\n", + "2023-07-02 20:46:49,542 - modelscope - INFO - epoch [1][820/4982]\tlr: 9.466e-05, memory: 13830, loss: 2.6391\n", + "2023-07-02 20:46:52,581 - modelscope - INFO - epoch [1][825/4982]\tlr: 9.459e-05, memory: 13830, loss: 2.3250\n", + "2023-07-02 20:46:55,248 - modelscope - INFO - epoch [1][830/4982]\tlr: 9.452e-05, memory: 13830, loss: 2.3188\n", + "2023-07-02 20:46:58,323 - modelscope - INFO - epoch [1][835/4982]\tlr: 9.445e-05, memory: 13830, loss: 1.8852\n", + "2023-07-02 20:47:00,885 - modelscope - INFO - epoch [1][840/4982]\tlr: 9.438e-05, memory: 13830, loss: 2.5203\n", + "2023-07-02 20:47:03,739 - modelscope - INFO - epoch [1][845/4982]\tlr: 9.431e-05, memory: 13830, loss: 2.2563\n", + "2023-07-02 20:47:06,494 - modelscope - INFO - epoch [1][850/4982]\tlr: 9.424e-05, memory: 13830, loss: 2.4937\n", + "2023-07-02 20:47:08,653 - modelscope - INFO - epoch [1][855/4982]\tlr: 9.416e-05, memory: 13830, loss: 2.1844\n", + "2023-07-02 20:47:12,100 - modelscope - INFO - epoch [1][860/4982]\tlr: 9.409e-05, memory: 13830, loss: 2.6281\n", + "2023-07-02 20:47:14,954 - modelscope - INFO - epoch [1][865/4982]\tlr: 9.402e-05, memory: 13830, loss: 1.7703\n", + "2023-07-02 20:47:17,549 - modelscope - INFO - epoch [1][870/4982]\tlr: 9.395e-05, memory: 13830, loss: 3.3172\n", + "2023-07-02 20:47:20,094 - modelscope - INFO - epoch [1][875/4982]\tlr: 9.387e-05, memory: 13830, loss: 2.2594\n", + "2023-07-02 20:47:23,556 - modelscope - INFO - epoch [1][880/4982]\tlr: 9.380e-05, memory: 13830, loss: 2.6352\n", + "2023-07-02 20:47:25,327 - modelscope - INFO - epoch [1][885/4982]\tlr: 9.373e-05, memory: 13830, loss: 2.7180\n", + "2023-07-02 20:47:28,177 - modelscope - INFO - epoch [1][890/4982]\tlr: 9.365e-05, memory: 13830, loss: 2.3750\n", + "2023-07-02 20:47:30,955 - modelscope - INFO - epoch [1][895/4982]\tlr: 9.358e-05, memory: 13830, loss: 1.7266\n", + "2023-07-02 20:47:34,940 - modelscope - INFO - epoch [1][900/4982]\tlr: 9.350e-05, memory: 13830, loss: 2.1984\n", + "2023-07-02 20:47:37,402 - modelscope - INFO - epoch [1][905/4982]\tlr: 9.343e-05, memory: 13830, loss: 2.2336\n", + "2023-07-02 20:47:40,011 - modelscope - INFO - epoch [1][910/4982]\tlr: 9.335e-05, memory: 13830, loss: 2.7844\n", + "2023-07-02 20:47:42,601 - modelscope - INFO - epoch [1][915/4982]\tlr: 9.327e-05, memory: 13830, loss: 3.2297\n", + "2023-07-02 20:47:44,837 - modelscope - INFO - epoch [1][920/4982]\tlr: 9.320e-05, memory: 13830, loss: 2.4188\n", + "2023-07-02 20:47:47,897 - modelscope - INFO - epoch [1][925/4982]\tlr: 9.312e-05, memory: 13830, loss: 1.6863\n", + "2023-07-02 20:47:50,418 - modelscope - INFO - epoch [1][930/4982]\tlr: 9.304e-05, memory: 13830, loss: 3.9219\n", + "2023-07-02 20:47:52,672 - modelscope - INFO - epoch [1][935/4982]\tlr: 9.296e-05, memory: 13830, loss: 1.6926\n", + "2023-07-02 20:47:55,286 - modelscope - INFO - epoch [1][940/4982]\tlr: 9.289e-05, memory: 13830, loss: 1.7281\n", + "2023-07-02 20:47:59,111 - modelscope - INFO - epoch [1][945/4982]\tlr: 9.281e-05, memory: 13830, loss: 1.1969\n", + "2023-07-02 20:48:01,843 - modelscope - INFO - epoch [1][950/4982]\tlr: 9.273e-05, memory: 13830, loss: 1.6633\n", + "2023-07-02 20:48:04,387 - modelscope - INFO - epoch [1][955/4982]\tlr: 9.265e-05, memory: 13830, loss: 2.2094\n", + "2023-07-02 20:48:06,681 - modelscope - INFO - epoch [1][960/4982]\tlr: 9.257e-05, memory: 13830, loss: 2.1922\n", + "2023-07-02 20:48:09,850 - modelscope - INFO - epoch [1][965/4982]\tlr: 9.249e-05, memory: 13830, loss: 1.3594\n", + "2023-07-02 20:48:12,651 - modelscope - INFO - epoch [1][970/4982]\tlr: 9.241e-05, memory: 13830, loss: 1.7945\n", + "2023-07-02 20:48:15,819 - modelscope - INFO - epoch [1][975/4982]\tlr: 9.233e-05, memory: 13830, loss: 1.7203\n", + "2023-07-02 20:48:18,453 - modelscope - INFO - epoch [1][980/4982]\tlr: 9.225e-05, memory: 13830, loss: 1.8453\n", + "2023-07-02 20:48:20,628 - modelscope - INFO - epoch [1][985/4982]\tlr: 9.216e-05, memory: 13830, loss: 1.8086\n", + "2023-07-02 20:48:22,947 - modelscope - INFO - epoch [1][990/4982]\tlr: 9.208e-05, memory: 13830, loss: 2.6445\n", + "2023-07-02 20:48:25,309 - modelscope - INFO - epoch [1][995/4982]\tlr: 9.200e-05, memory: 13830, loss: 3.2172\n", + "2023-07-02 20:48:28,028 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 20:49:34,496 - modelscope - INFO - Saving checkpoint at 1000 iter\n", + "2023-07-02 20:49:34,522 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter800_acc0.6881153583526611\n", + "2023-07-02 20:49:34,524 - modelscope - INFO - Saving checkpoint at 1000 iter\n", + "2023-07-02 20:49:34,548 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_800\n", + "2023-07-02 20:49:34,551 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 13830, evaluation/acc: 0.7003, evaluation/loss: 2.0893, loss: 2.7594\n", + "2023-07-02 20:49:37,631 - modelscope - INFO - epoch [1][1005/4982]\tlr: 9.183e-05, memory: 13830, loss: 1.3188\n", + "2023-07-02 20:49:40,106 - modelscope - INFO - epoch [1][1010/4982]\tlr: 9.175e-05, memory: 13830, loss: 2.3094\n", + "2023-07-02 20:49:42,559 - modelscope - INFO - epoch [1][1015/4982]\tlr: 9.167e-05, memory: 13830, loss: 2.4734\n", + "2023-07-02 20:49:44,919 - modelscope - INFO - epoch [1][1020/4982]\tlr: 9.158e-05, memory: 13830, loss: 2.0336\n", + "2023-07-02 20:49:49,264 - modelscope - INFO - epoch [1][1025/4982]\tlr: 9.150e-05, memory: 13861, loss: 1.0523\n", + "2023-07-02 20:49:51,204 - modelscope - INFO - epoch [1][1030/4982]\tlr: 9.141e-05, memory: 13861, loss: 3.1086\n", + "2023-07-02 20:49:53,066 - modelscope - INFO - epoch [1][1035/4982]\tlr: 9.133e-05, memory: 13861, loss: 2.3414\n", + "2023-07-02 20:49:56,035 - modelscope - INFO - epoch [1][1040/4982]\tlr: 9.124e-05, memory: 13861, loss: 2.2359\n", + "2023-07-02 20:49:59,351 - modelscope - INFO - epoch [1][1045/4982]\tlr: 9.116e-05, memory: 13861, loss: 1.9051\n", + "2023-07-02 20:50:01,989 - modelscope - INFO - epoch [1][1050/4982]\tlr: 9.107e-05, memory: 13861, loss: 1.5266\n", + "2023-07-02 20:50:04,982 - modelscope - INFO - epoch [1][1055/4982]\tlr: 9.098e-05, memory: 13861, loss: 2.5000\n", + "2023-07-02 20:50:07,348 - modelscope - INFO - epoch [1][1060/4982]\tlr: 9.090e-05, memory: 13861, loss: 2.9164\n", + "2023-07-02 20:50:10,149 - modelscope - INFO - epoch [1][1065/4982]\tlr: 9.081e-05, memory: 13861, loss: 2.1641\n", + "2023-07-02 20:50:13,289 - modelscope - INFO - epoch [1][1070/4982]\tlr: 9.072e-05, memory: 13861, loss: 2.7469\n", + "2023-07-02 20:50:16,220 - modelscope - INFO - epoch [1][1075/4982]\tlr: 9.063e-05, memory: 13861, loss: 2.2922\n", + "2023-07-02 20:50:18,255 - modelscope - INFO - epoch [1][1080/4982]\tlr: 9.054e-05, memory: 13861, loss: 3.7016\n", + "2023-07-02 20:50:21,566 - modelscope - INFO - epoch [1][1085/4982]\tlr: 9.046e-05, memory: 13861, loss: 1.1164\n", + "2023-07-02 20:50:24,961 - modelscope - INFO - epoch [1][1090/4982]\tlr: 9.037e-05, memory: 13861, loss: 1.5523\n", + "2023-07-02 20:50:28,072 - modelscope - INFO - epoch [1][1095/4982]\tlr: 9.028e-05, memory: 13861, loss: 1.9781\n", + "2023-07-02 20:50:31,178 - modelscope - INFO - epoch [1][1100/4982]\tlr: 9.019e-05, memory: 13861, loss: 2.0867\n", + "2023-07-02 20:50:33,103 - modelscope - INFO - epoch [1][1105/4982]\tlr: 9.010e-05, memory: 13861, loss: 2.9258\n", + "2023-07-02 20:50:37,069 - modelscope - INFO - epoch [1][1110/4982]\tlr: 9.001e-05, memory: 14281, loss: 1.8297\n", + "2023-07-02 20:50:39,077 - modelscope - INFO - epoch [1][1115/4982]\tlr: 8.992e-05, memory: 14281, loss: 2.1539\n", + "2023-07-02 20:50:41,028 - modelscope - INFO - epoch [1][1120/4982]\tlr: 8.982e-05, memory: 14281, loss: 2.4891\n", + "2023-07-02 20:50:43,285 - modelscope - INFO - epoch [1][1125/4982]\tlr: 8.973e-05, memory: 14281, loss: 1.7930\n", + "2023-07-02 20:50:46,047 - modelscope - INFO - epoch [1][1130/4982]\tlr: 8.964e-05, memory: 14281, loss: 1.1984\n", + "2023-07-02 20:50:49,011 - modelscope - INFO - epoch [1][1135/4982]\tlr: 8.955e-05, memory: 14281, loss: 3.1102\n", + "2023-07-02 20:50:51,386 - modelscope - INFO - epoch [1][1140/4982]\tlr: 8.946e-05, memory: 14281, loss: 2.2969\n", + "2023-07-02 20:50:54,463 - modelscope - INFO - epoch [1][1145/4982]\tlr: 8.936e-05, memory: 14281, loss: 1.7891\n", + "2023-07-02 20:50:56,539 - modelscope - INFO - epoch [1][1150/4982]\tlr: 8.927e-05, memory: 14281, loss: 2.6641\n", + "2023-07-02 20:50:58,715 - modelscope - INFO - epoch [1][1155/4982]\tlr: 8.918e-05, memory: 14281, loss: 2.5141\n", + "2023-07-02 20:51:01,359 - modelscope - INFO - epoch [1][1160/4982]\tlr: 8.908e-05, memory: 14281, loss: 1.7031\n", + "2023-07-02 20:51:04,218 - modelscope - INFO - epoch [1][1165/4982]\tlr: 8.899e-05, memory: 14281, loss: 2.7891\n", + "2023-07-02 20:51:07,009 - modelscope - INFO - epoch [1][1170/4982]\tlr: 8.889e-05, memory: 14281, loss: 1.6977\n", + "2023-07-02 20:51:09,989 - modelscope - INFO - epoch [1][1175/4982]\tlr: 8.880e-05, memory: 14281, loss: 1.7984\n", + "2023-07-02 20:51:13,347 - modelscope - INFO - epoch [1][1180/4982]\tlr: 8.870e-05, memory: 14281, loss: 1.7750\n", + "2023-07-02 20:51:16,349 - modelscope - INFO - epoch [1][1185/4982]\tlr: 8.861e-05, memory: 14281, loss: 2.2219\n", + "2023-07-02 20:51:18,901 - modelscope - INFO - epoch [1][1190/4982]\tlr: 8.851e-05, memory: 14281, loss: 2.1070\n", + "2023-07-02 20:51:22,332 - modelscope - INFO - epoch [1][1195/4982]\tlr: 8.841e-05, memory: 14281, loss: 1.3805\n", + "2023-07-02 20:51:25,298 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 20:52:31,792 - modelscope - INFO - Saving checkpoint at 1200 iter\n", + "2023-07-02 20:52:31,820 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter1000_acc0.7003207802772522\n", + "2023-07-02 20:52:31,822 - modelscope - INFO - Saving checkpoint at 1200 iter\n", + "2023-07-02 20:52:31,848 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_1000\n", + "2023-07-02 20:52:31,851 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14281, evaluation/acc: 0.7126, evaluation/loss: 1.9764, loss: 1.4297\n", + "2023-07-02 20:52:35,250 - modelscope - INFO - epoch [1][1205/4982]\tlr: 8.822e-05, memory: 14281, loss: 1.4805\n", + "2023-07-02 20:52:38,308 - modelscope - INFO - epoch [1][1210/4982]\tlr: 8.812e-05, memory: 14281, loss: 1.6289\n", + "2023-07-02 20:52:40,236 - modelscope - INFO - epoch [1][1215/4982]\tlr: 8.803e-05, memory: 14281, loss: 1.6109\n", + "2023-07-02 20:52:42,979 - modelscope - INFO - epoch [1][1220/4982]\tlr: 8.793e-05, memory: 14281, loss: 1.8672\n", + "2023-07-02 20:52:45,670 - modelscope - INFO - epoch [1][1225/4982]\tlr: 8.783e-05, memory: 14281, loss: 1.7875\n", + "2023-07-02 20:52:48,769 - modelscope - INFO - epoch [1][1230/4982]\tlr: 8.773e-05, memory: 14281, loss: 2.9453\n", + "2023-07-02 20:52:51,329 - modelscope - INFO - epoch [1][1235/4982]\tlr: 8.763e-05, memory: 14281, loss: 3.7453\n", + "2023-07-02 20:52:54,457 - modelscope - INFO - epoch [1][1240/4982]\tlr: 8.753e-05, memory: 14281, loss: 1.6602\n", + "2023-07-02 20:52:57,272 - modelscope - INFO - epoch [1][1245/4982]\tlr: 8.743e-05, memory: 14281, loss: 1.9398\n", + "2023-07-02 20:52:59,875 - modelscope - INFO - epoch [1][1250/4982]\tlr: 8.733e-05, memory: 14281, loss: 2.6437\n", + "2023-07-02 20:53:03,234 - modelscope - INFO - epoch [1][1255/4982]\tlr: 8.723e-05, memory: 14281, loss: 1.9438\n", + "2023-07-02 20:53:05,817 - modelscope - INFO - epoch [1][1260/4982]\tlr: 8.713e-05, memory: 14281, loss: 2.0344\n", + "2023-07-02 20:53:07,576 - modelscope - INFO - epoch [1][1265/4982]\tlr: 8.703e-05, memory: 14281, loss: 3.1516\n", + "2023-07-02 20:53:10,222 - modelscope - INFO - epoch [1][1270/4982]\tlr: 8.693e-05, memory: 14281, loss: 1.7117\n", + "2023-07-02 20:53:14,014 - modelscope - INFO - epoch [1][1275/4982]\tlr: 8.683e-05, memory: 14281, loss: 1.1664\n", + "2023-07-02 20:53:16,657 - modelscope - INFO - epoch [1][1280/4982]\tlr: 8.673e-05, memory: 14281, loss: 2.4438\n", + "2023-07-02 20:53:19,474 - modelscope - INFO - epoch [1][1285/4982]\tlr: 8.663e-05, memory: 14281, loss: 1.6219\n", + "2023-07-02 20:53:22,505 - modelscope - INFO - epoch [1][1290/4982]\tlr: 8.652e-05, memory: 14281, loss: 1.4367\n", + "2023-07-02 20:53:25,260 - modelscope - INFO - epoch [1][1295/4982]\tlr: 8.642e-05, memory: 14281, loss: 2.8367\n", + "2023-07-02 20:53:27,856 - modelscope - INFO - epoch [1][1300/4982]\tlr: 8.632e-05, memory: 14281, loss: 2.7094\n", + "2023-07-02 20:53:30,269 - modelscope - INFO - epoch [1][1305/4982]\tlr: 8.621e-05, memory: 14281, loss: 2.2687\n", + "2023-07-02 20:53:32,850 - modelscope - INFO - epoch [1][1310/4982]\tlr: 8.611e-05, memory: 14281, loss: 1.6922\n", + "2023-07-02 20:53:35,441 - modelscope - INFO - epoch [1][1315/4982]\tlr: 8.601e-05, memory: 14281, loss: 1.6664\n", + "2023-07-02 20:53:38,415 - modelscope - INFO - epoch [1][1320/4982]\tlr: 8.590e-05, memory: 14281, loss: 1.8898\n", + "2023-07-02 20:53:41,871 - modelscope - INFO - epoch [1][1325/4982]\tlr: 8.580e-05, memory: 14281, loss: 1.3605\n", + "2023-07-02 20:53:44,517 - modelscope - INFO - epoch [1][1330/4982]\tlr: 8.569e-05, memory: 14281, loss: 1.8219\n", + "2023-07-02 20:53:46,642 - modelscope - INFO - epoch [1][1335/4982]\tlr: 8.559e-05, memory: 14281, loss: 2.2359\n", + "2023-07-02 20:53:49,682 - modelscope - INFO - epoch [1][1340/4982]\tlr: 8.548e-05, memory: 14281, loss: 1.8867\n", + "2023-07-02 20:53:52,314 - modelscope - INFO - epoch [1][1345/4982]\tlr: 8.538e-05, memory: 14281, loss: 1.0359\n", + "2023-07-02 20:53:53,796 - modelscope - INFO - epoch [1][1350/4982]\tlr: 8.527e-05, memory: 14281, loss: 3.0266\n", + "2023-07-02 20:53:55,582 - modelscope - INFO - epoch [1][1355/4982]\tlr: 8.516e-05, memory: 14281, loss: 3.4328\n", + "2023-07-02 20:53:57,793 - modelscope - INFO - epoch [1][1360/4982]\tlr: 8.506e-05, memory: 14281, loss: 1.6180\n", + "2023-07-02 20:54:00,871 - modelscope - INFO - epoch [1][1365/4982]\tlr: 8.495e-05, memory: 14281, loss: 1.6867\n", + "2023-07-02 20:54:03,738 - modelscope - INFO - epoch [1][1370/4982]\tlr: 8.484e-05, memory: 14281, loss: 1.8242\n", + "2023-07-02 20:54:05,352 - modelscope - INFO - epoch [1][1375/4982]\tlr: 8.474e-05, memory: 14281, loss: 3.2016\n", + "2023-07-02 20:54:08,417 - modelscope - INFO - epoch [1][1380/4982]\tlr: 8.463e-05, memory: 14281, loss: 1.9574\n", + "2023-07-02 20:54:11,057 - modelscope - INFO - epoch [1][1385/4982]\tlr: 8.452e-05, memory: 14281, loss: 2.2539\n", + "2023-07-02 20:54:13,691 - modelscope - INFO - epoch [1][1390/4982]\tlr: 8.441e-05, memory: 14281, loss: 1.7277\n", + "2023-07-02 20:54:17,235 - modelscope - INFO - epoch [1][1395/4982]\tlr: 8.430e-05, memory: 14281, loss: 1.1039\n", + "2023-07-02 20:54:18,839 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 20:55:25,409 - modelscope - INFO - Saving checkpoint at 1400 iter\n", + "2023-07-02 20:55:25,440 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter1200_acc0.7125999927520752\n", + "2023-07-02 20:55:25,442 - modelscope - INFO - Saving checkpoint at 1400 iter\n", + "2023-07-02 20:55:25,472 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_1200\n", + "2023-07-02 20:55:25,475 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14281, evaluation/acc: 0.7218, evaluation/loss: 1.9104, loss: 1.8773\n", + "2023-07-02 20:55:28,676 - modelscope - INFO - epoch [1][1405/4982]\tlr: 8.408e-05, memory: 14281, loss: 2.2473\n", + "2023-07-02 20:55:32,047 - modelscope - INFO - epoch [1][1410/4982]\tlr: 8.397e-05, memory: 14281, loss: 1.2844\n", + "2023-07-02 20:55:34,358 - modelscope - INFO - epoch [1][1415/4982]\tlr: 8.386e-05, memory: 14281, loss: 2.6406\n", + "2023-07-02 20:55:37,290 - modelscope - INFO - epoch [1][1420/4982]\tlr: 8.375e-05, memory: 14281, loss: 1.2020\n", + "2023-07-02 20:55:39,572 - modelscope - INFO - epoch [1][1425/4982]\tlr: 8.364e-05, memory: 14281, loss: 2.3109\n", + "2023-07-02 20:55:41,133 - modelscope - INFO - epoch [1][1430/4982]\tlr: 8.353e-05, memory: 14281, loss: 3.6844\n", + "2023-07-02 20:55:44,293 - modelscope - INFO - epoch [1][1435/4982]\tlr: 8.342e-05, memory: 14281, loss: 1.2117\n", + "2023-07-02 20:55:47,573 - modelscope - INFO - epoch [1][1440/4982]\tlr: 8.331e-05, memory: 14281, loss: 1.3582\n", + "2023-07-02 20:55:49,943 - modelscope - INFO - epoch [1][1445/4982]\tlr: 8.320e-05, memory: 14281, loss: 1.8289\n", + "2023-07-02 20:55:52,281 - modelscope - INFO - epoch [1][1450/4982]\tlr: 8.309e-05, memory: 14281, loss: 1.6055\n", + "2023-07-02 20:55:55,483 - modelscope - INFO - epoch [1][1455/4982]\tlr: 8.297e-05, memory: 14281, loss: 0.7688\n", + "2023-07-02 20:55:57,759 - modelscope - INFO - epoch [1][1460/4982]\tlr: 8.286e-05, memory: 14281, loss: 2.2945\n", + "2023-07-02 20:56:00,237 - modelscope - INFO - epoch [1][1465/4982]\tlr: 8.275e-05, memory: 14281, loss: 1.8000\n", + "2023-07-02 20:56:03,402 - modelscope - INFO - epoch [1][1470/4982]\tlr: 8.264e-05, memory: 14281, loss: 1.0266\n", + "2023-07-02 20:56:04,994 - modelscope - INFO - epoch [1][1475/4982]\tlr: 8.252e-05, memory: 14281, loss: 2.0094\n", + "2023-07-02 20:56:06,787 - modelscope - INFO - epoch [1][1480/4982]\tlr: 8.241e-05, memory: 14281, loss: 1.9977\n", + "2023-07-02 20:56:09,900 - modelscope - INFO - epoch [1][1485/4982]\tlr: 8.230e-05, memory: 14281, loss: 2.0945\n", + "2023-07-02 20:56:12,226 - modelscope - INFO - epoch [1][1490/4982]\tlr: 8.218e-05, memory: 14281, loss: 2.9172\n", + "2023-07-02 20:56:14,763 - modelscope - INFO - epoch [1][1495/4982]\tlr: 8.207e-05, memory: 14281, loss: 1.8367\n", + "2023-07-02 20:56:17,535 - modelscope - INFO - epoch [1][1500/4982]\tlr: 8.195e-05, memory: 14281, loss: 1.4617\n", + "2023-07-02 20:56:19,733 - modelscope - INFO - epoch [1][1505/4982]\tlr: 8.184e-05, memory: 14281, loss: 1.9328\n", + "2023-07-02 20:56:22,653 - modelscope - INFO - epoch [1][1510/4982]\tlr: 8.172e-05, memory: 14281, loss: 1.5078\n", + "2023-07-02 20:56:26,133 - modelscope - INFO - epoch [1][1515/4982]\tlr: 8.161e-05, memory: 14281, loss: 2.1977\n", + "2023-07-02 20:56:28,551 - modelscope - INFO - epoch [1][1520/4982]\tlr: 8.149e-05, memory: 14281, loss: 2.2246\n", + "2023-07-02 20:56:31,182 - modelscope - INFO - epoch [1][1525/4982]\tlr: 8.138e-05, memory: 14281, loss: 1.9840\n", + "2023-07-02 20:56:33,710 - modelscope - INFO - epoch [1][1530/4982]\tlr: 8.126e-05, memory: 14281, loss: 1.5406\n", + "2023-07-02 20:56:36,337 - modelscope - INFO - epoch [1][1535/4982]\tlr: 8.114e-05, memory: 14281, loss: 1.9930\n", + "2023-07-02 20:56:39,530 - modelscope - INFO - epoch [1][1540/4982]\tlr: 8.103e-05, memory: 14281, loss: 1.8547\n", + "2023-07-02 20:56:42,288 - modelscope - INFO - epoch [1][1545/4982]\tlr: 8.091e-05, memory: 14281, loss: 1.2977\n", + "2023-07-02 20:56:44,838 - modelscope - INFO - epoch [1][1550/4982]\tlr: 8.079e-05, memory: 14281, loss: 1.9984\n", + "2023-07-02 20:56:46,590 - modelscope - INFO - epoch [1][1555/4982]\tlr: 8.068e-05, memory: 14281, loss: 3.7969\n", + "2023-07-02 20:56:49,311 - modelscope - INFO - epoch [1][1560/4982]\tlr: 8.056e-05, memory: 14281, loss: 3.0336\n", + "2023-07-02 20:56:52,158 - modelscope - INFO - epoch [1][1565/4982]\tlr: 8.044e-05, memory: 14281, loss: 1.2789\n", + "2023-07-02 20:56:54,583 - modelscope - INFO - epoch [1][1570/4982]\tlr: 8.032e-05, memory: 14281, loss: 2.0461\n", + "2023-07-02 20:56:57,318 - modelscope - INFO - epoch [1][1575/4982]\tlr: 8.020e-05, memory: 14281, loss: 1.3301\n", + "2023-07-02 20:57:00,187 - modelscope - INFO - epoch [1][1580/4982]\tlr: 8.008e-05, memory: 14281, loss: 1.4945\n", + "2023-07-02 20:57:02,809 - modelscope - INFO - epoch [1][1585/4982]\tlr: 7.997e-05, memory: 14281, loss: 1.7984\n", + "2023-07-02 20:57:05,103 - modelscope - INFO - epoch [1][1590/4982]\tlr: 7.985e-05, memory: 14281, loss: 2.2133\n", + "2023-07-02 20:57:07,880 - modelscope - INFO - epoch [1][1595/4982]\tlr: 7.973e-05, memory: 14281, loss: 1.4664\n", + "2023-07-02 20:57:10,754 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 20:58:17,336 - modelscope - INFO - Saving checkpoint at 1600 iter\n", + "2023-07-02 20:58:17,364 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter1400_acc0.7218371033668518\n", + "2023-07-02 20:58:17,366 - modelscope - INFO - Saving checkpoint at 1600 iter\n", + "2023-07-02 20:58:17,392 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_1400\n", + "2023-07-02 20:58:17,395 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14281, evaluation/acc: 0.7349, evaluation/loss: 1.8596, loss: 0.7406\n", + "2023-07-02 20:58:19,762 - modelscope - INFO - epoch [1][1605/4982]\tlr: 7.949e-05, memory: 14281, loss: 2.4625\n", + "2023-07-02 20:58:22,187 - modelscope - INFO - epoch [1][1610/4982]\tlr: 7.937e-05, memory: 14281, loss: 2.0211\n", + "2023-07-02 20:58:24,593 - modelscope - INFO - epoch [1][1615/4982]\tlr: 7.925e-05, memory: 14281, loss: 1.8141\n", + "2023-07-02 20:58:26,348 - modelscope - INFO - epoch [1][1620/4982]\tlr: 7.913e-05, memory: 14281, loss: 2.8254\n", + "2023-07-02 20:58:28,996 - modelscope - INFO - epoch [1][1625/4982]\tlr: 7.900e-05, memory: 14281, loss: 1.3973\n", + "2023-07-02 20:58:31,382 - modelscope - INFO - epoch [1][1630/4982]\tlr: 7.888e-05, memory: 14281, loss: 2.4805\n", + "2023-07-02 20:58:34,123 - modelscope - INFO - epoch [1][1635/4982]\tlr: 7.876e-05, memory: 14281, loss: 1.2414\n", + "2023-07-02 20:58:37,249 - modelscope - INFO - epoch [1][1640/4982]\tlr: 7.864e-05, memory: 14281, loss: 1.7254\n", + "2023-07-02 20:58:40,060 - modelscope - INFO - epoch [1][1645/4982]\tlr: 7.852e-05, memory: 14281, loss: 2.1672\n", + "2023-07-02 20:58:42,200 - modelscope - INFO - epoch [1][1650/4982]\tlr: 7.840e-05, memory: 14281, loss: 2.4047\n", + "2023-07-02 20:58:44,560 - modelscope - INFO - epoch [1][1655/4982]\tlr: 7.827e-05, memory: 14281, loss: 1.7063\n", + "2023-07-02 20:58:47,535 - modelscope - INFO - epoch [1][1660/4982]\tlr: 7.815e-05, memory: 14281, loss: 1.3406\n", + "2023-07-02 20:58:50,161 - modelscope - INFO - epoch [1][1665/4982]\tlr: 7.803e-05, memory: 14281, loss: 2.4453\n", + "2023-07-02 20:58:52,380 - modelscope - INFO - epoch [1][1670/4982]\tlr: 7.791e-05, memory: 14281, loss: 1.7500\n", + "2023-07-02 20:58:54,351 - modelscope - INFO - epoch [1][1675/4982]\tlr: 7.778e-05, memory: 14281, loss: 2.8453\n", + "2023-07-02 20:58:55,966 - modelscope - INFO - epoch [1][1680/4982]\tlr: 7.766e-05, memory: 14281, loss: 1.8719\n", + "2023-07-02 20:58:58,457 - modelscope - INFO - epoch [1][1685/4982]\tlr: 7.754e-05, memory: 14281, loss: 2.1156\n", + "2023-07-02 20:59:01,212 - modelscope - INFO - epoch [1][1690/4982]\tlr: 7.741e-05, memory: 14281, loss: 1.7188\n", + "2023-07-02 20:59:04,057 - modelscope - INFO - epoch [1][1695/4982]\tlr: 7.729e-05, memory: 14281, loss: 2.5672\n", + "2023-07-02 20:59:07,177 - modelscope - INFO - epoch [1][1700/4982]\tlr: 7.716e-05, memory: 14281, loss: 1.0508\n", + "2023-07-02 20:59:09,355 - modelscope - INFO - epoch [1][1705/4982]\tlr: 7.704e-05, memory: 14281, loss: 1.8687\n", + "2023-07-02 20:59:11,209 - modelscope - INFO - epoch [1][1710/4982]\tlr: 7.691e-05, memory: 14281, loss: 2.7281\n", + "2023-07-02 20:59:14,101 - modelscope - INFO - epoch [1][1715/4982]\tlr: 7.679e-05, memory: 14281, loss: 1.0727\n", + "2023-07-02 20:59:16,660 - modelscope - INFO - epoch [1][1720/4982]\tlr: 7.666e-05, memory: 14281, loss: 1.6773\n", + "2023-07-02 20:59:18,798 - modelscope - INFO - epoch [1][1725/4982]\tlr: 7.654e-05, memory: 14281, loss: 2.3687\n", + "2023-07-02 20:59:20,724 - modelscope - INFO - epoch [1][1730/4982]\tlr: 7.641e-05, memory: 14281, loss: 1.9219\n", + "2023-07-02 20:59:23,591 - modelscope - INFO - epoch [1][1735/4982]\tlr: 7.629e-05, memory: 14281, loss: 1.5344\n", + "2023-07-02 20:59:27,214 - modelscope - INFO - epoch [1][1740/4982]\tlr: 7.616e-05, memory: 14281, loss: 0.5793\n", + "2023-07-02 20:59:29,708 - modelscope - INFO - epoch [1][1745/4982]\tlr: 7.603e-05, memory: 14281, loss: 1.4609\n", + "2023-07-02 20:59:32,082 - modelscope - INFO - epoch [1][1750/4982]\tlr: 7.591e-05, memory: 14281, loss: 1.0852\n", + "2023-07-02 20:59:34,683 - modelscope - INFO - epoch [1][1755/4982]\tlr: 7.578e-05, memory: 14281, loss: 1.5297\n", + "2023-07-02 20:59:36,962 - modelscope - INFO - epoch [1][1760/4982]\tlr: 7.565e-05, memory: 14281, loss: 2.9937\n", + "2023-07-02 20:59:39,715 - modelscope - INFO - epoch [1][1765/4982]\tlr: 7.553e-05, memory: 14281, loss: 2.1242\n", + "2023-07-02 20:59:42,455 - modelscope - INFO - epoch [1][1770/4982]\tlr: 7.540e-05, memory: 14281, loss: 2.3789\n", + "2023-07-02 20:59:45,020 - modelscope - INFO - epoch [1][1775/4982]\tlr: 7.527e-05, memory: 14281, loss: 1.8289\n", + "2023-07-02 20:59:46,865 - modelscope - INFO - epoch [1][1780/4982]\tlr: 7.515e-05, memory: 14281, loss: 2.0219\n", + "2023-07-02 20:59:50,367 - modelscope - INFO - epoch [1][1785/4982]\tlr: 7.502e-05, memory: 14281, loss: 2.6187\n", + "2023-07-02 20:59:52,626 - modelscope - INFO - epoch [1][1790/4982]\tlr: 7.489e-05, memory: 14281, loss: 2.3051\n", + "2023-07-02 20:59:54,711 - modelscope - INFO - epoch [1][1795/4982]\tlr: 7.476e-05, memory: 14281, loss: 2.3953\n", + "2023-07-02 20:59:56,419 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 21:01:03,053 - modelscope - INFO - Saving checkpoint at 1800 iter\n", + "2023-07-02 21:01:03,080 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter1600_acc0.7349275350570679\n", + "2023-07-02 21:01:03,082 - modelscope - INFO - Saving checkpoint at 1800 iter\n", + "2023-07-02 21:01:03,106 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_1600\n", + "2023-07-02 21:01:03,109 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14281, evaluation/acc: 0.7401, evaluation/loss: 1.8176, loss: 2.8625\n", + "2023-07-02 21:01:05,753 - modelscope - INFO - epoch [1][1805/4982]\tlr: 7.450e-05, memory: 14281, loss: 1.8352\n", + "2023-07-02 21:01:08,030 - modelscope - INFO - epoch [1][1810/4982]\tlr: 7.438e-05, memory: 14281, loss: 2.1453\n", + "2023-07-02 21:01:10,702 - modelscope - INFO - epoch [1][1815/4982]\tlr: 7.425e-05, memory: 14281, loss: 1.6281\n", + "2023-07-02 21:01:13,348 - modelscope - INFO - epoch [1][1820/4982]\tlr: 7.412e-05, memory: 14281, loss: 2.3008\n", + "2023-07-02 21:01:16,272 - modelscope - INFO - epoch [1][1825/4982]\tlr: 7.399e-05, memory: 14281, loss: 2.2414\n", + "2023-07-02 21:01:19,067 - modelscope - INFO - epoch [1][1830/4982]\tlr: 7.386e-05, memory: 14281, loss: 2.8672\n", + "2023-07-02 21:01:21,555 - modelscope - INFO - epoch [1][1835/4982]\tlr: 7.373e-05, memory: 14281, loss: 2.3172\n", + "2023-07-02 21:01:24,755 - modelscope - INFO - epoch [1][1840/4982]\tlr: 7.360e-05, memory: 14281, loss: 0.9746\n", + "2023-07-02 21:01:27,186 - modelscope - INFO - epoch [1][1845/4982]\tlr: 7.347e-05, memory: 14281, loss: 1.4992\n", + "2023-07-02 21:01:30,804 - modelscope - INFO - epoch [1][1850/4982]\tlr: 7.334e-05, memory: 14281, loss: 2.0031\n", + "2023-07-02 21:01:34,075 - modelscope - INFO - epoch [1][1855/4982]\tlr: 7.321e-05, memory: 14281, loss: 1.3766\n", + "2023-07-02 21:01:36,465 - modelscope - INFO - epoch [1][1860/4982]\tlr: 7.308e-05, memory: 14281, loss: 2.3203\n", + "2023-07-02 21:01:39,721 - modelscope - INFO - epoch [1][1865/4982]\tlr: 7.295e-05, memory: 14281, loss: 2.5617\n", + "2023-07-02 21:01:43,444 - modelscope - INFO - epoch [1][1870/4982]\tlr: 7.281e-05, memory: 14281, loss: 0.8551\n", + "2023-07-02 21:01:46,641 - modelscope - INFO - epoch [1][1875/4982]\tlr: 7.268e-05, memory: 14281, loss: 2.1117\n", + "2023-07-02 21:01:49,075 - modelscope - INFO - epoch [1][1880/4982]\tlr: 7.255e-05, memory: 14281, loss: 1.9414\n", + "2023-07-02 21:01:51,733 - modelscope - INFO - epoch [1][1885/4982]\tlr: 7.242e-05, memory: 14281, loss: 1.3805\n", + "2023-07-02 21:01:54,863 - modelscope - INFO - epoch [1][1890/4982]\tlr: 7.229e-05, memory: 14281, loss: 2.0562\n", + "2023-07-02 21:01:56,818 - modelscope - INFO - epoch [1][1895/4982]\tlr: 7.216e-05, memory: 14281, loss: 2.2391\n", + "2023-07-02 21:01:59,267 - modelscope - INFO - epoch [1][1900/4982]\tlr: 7.202e-05, memory: 14281, loss: 2.3027\n", + "2023-07-02 21:02:01,900 - modelscope - INFO - epoch [1][1905/4982]\tlr: 7.189e-05, memory: 14281, loss: 1.8711\n", + "2023-07-02 21:02:05,392 - modelscope - INFO - epoch [1][1910/4982]\tlr: 7.176e-05, memory: 14281, loss: 1.0352\n", + "2023-07-02 21:02:07,808 - modelscope - INFO - epoch [1][1915/4982]\tlr: 7.163e-05, memory: 14281, loss: 1.9133\n", + "2023-07-02 21:02:10,597 - modelscope - INFO - epoch [1][1920/4982]\tlr: 7.149e-05, memory: 14281, loss: 1.5922\n", + "2023-07-02 21:02:13,358 - modelscope - INFO - epoch [1][1925/4982]\tlr: 7.136e-05, memory: 14281, loss: 2.3203\n", + "2023-07-02 21:02:15,288 - modelscope - INFO - epoch [1][1930/4982]\tlr: 7.123e-05, memory: 14281, loss: 1.5707\n", + "2023-07-02 21:02:17,292 - modelscope - INFO - epoch [1][1935/4982]\tlr: 7.110e-05, memory: 14281, loss: 2.6484\n", + "2023-07-02 21:02:20,830 - modelscope - INFO - epoch [1][1940/4982]\tlr: 7.096e-05, memory: 14281, loss: 0.7172\n", + "2023-07-02 21:02:22,944 - modelscope - INFO - epoch [1][1945/4982]\tlr: 7.083e-05, memory: 14281, loss: 2.1992\n", + "2023-07-02 21:02:25,967 - modelscope - INFO - epoch [1][1950/4982]\tlr: 7.069e-05, memory: 14281, loss: 1.1105\n", + "2023-07-02 21:02:28,446 - modelscope - INFO - epoch [1][1955/4982]\tlr: 7.056e-05, memory: 14281, loss: 1.2781\n", + "2023-07-02 21:02:31,222 - modelscope - INFO - epoch [1][1960/4982]\tlr: 7.043e-05, memory: 14281, loss: 2.7156\n", + "2023-07-02 21:02:33,689 - modelscope - INFO - epoch [1][1965/4982]\tlr: 7.029e-05, memory: 14281, loss: 2.1977\n", + "2023-07-02 21:02:36,277 - modelscope - INFO - epoch [1][1970/4982]\tlr: 7.016e-05, memory: 14281, loss: 1.8652\n", + "2023-07-02 21:02:39,628 - modelscope - INFO - epoch [1][1975/4982]\tlr: 7.002e-05, memory: 14281, loss: 0.9414\n", + "2023-07-02 21:02:41,404 - modelscope - INFO - epoch [1][1980/4982]\tlr: 6.989e-05, memory: 14281, loss: 2.2672\n", + "2023-07-02 21:02:44,260 - modelscope - INFO - epoch [1][1985/4982]\tlr: 6.975e-05, memory: 14281, loss: 2.0039\n", + "2023-07-02 21:02:46,214 - modelscope - INFO - epoch [1][1990/4982]\tlr: 6.962e-05, memory: 14281, loss: 2.1391\n", + "2023-07-02 21:02:48,596 - modelscope - INFO - epoch [1][1995/4982]\tlr: 6.948e-05, memory: 14281, loss: 2.2766\n", + "2023-07-02 21:02:51,578 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.24it/s]\n", + "2023-07-02 21:03:57,832 - modelscope - INFO - Saving checkpoint at 2000 iter\n", + "2023-07-02 21:03:57,857 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter1800_acc0.7400715351104736\n", + "2023-07-02 21:03:57,860 - modelscope - INFO - Saving checkpoint at 2000 iter\n", + "2023-07-02 21:03:57,883 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_1800\n", + "2023-07-02 21:03:57,885 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14281, evaluation/acc: 0.7442, evaluation/loss: 1.7936, loss: 1.5309\n", + "2023-07-02 21:04:00,725 - modelscope - INFO - epoch [1][2005/4982]\tlr: 6.921e-05, memory: 14281, loss: 1.2211\n", + "2023-07-02 21:04:02,917 - modelscope - INFO - epoch [1][2010/4982]\tlr: 6.908e-05, memory: 14281, loss: 2.4078\n", + "2023-07-02 21:04:05,194 - modelscope - INFO - epoch [1][2015/4982]\tlr: 6.894e-05, memory: 14281, loss: 2.0891\n", + "2023-07-02 21:04:06,825 - modelscope - INFO - epoch [1][2020/4982]\tlr: 6.881e-05, memory: 14281, loss: 2.4773\n", + "2023-07-02 21:04:09,109 - modelscope - INFO - epoch [1][2025/4982]\tlr: 6.867e-05, memory: 14281, loss: 1.7293\n", + "2023-07-02 21:04:12,824 - modelscope - INFO - epoch [1][2030/4982]\tlr: 6.854e-05, memory: 14281, loss: 0.9602\n", + "2023-07-02 21:04:15,460 - modelscope - INFO - epoch [1][2035/4982]\tlr: 6.840e-05, memory: 14281, loss: 1.4973\n", + "2023-07-02 21:04:18,540 - modelscope - INFO - epoch [1][2040/4982]\tlr: 6.826e-05, memory: 14281, loss: 2.0359\n", + "2023-07-02 21:04:21,265 - modelscope - INFO - epoch [1][2045/4982]\tlr: 6.813e-05, memory: 14281, loss: 1.5586\n", + "2023-07-02 21:04:24,566 - modelscope - INFO - epoch [1][2050/4982]\tlr: 6.799e-05, memory: 14281, loss: 1.3984\n", + "2023-07-02 21:04:27,716 - modelscope - INFO - epoch [1][2055/4982]\tlr: 6.785e-05, memory: 14281, loss: 1.6156\n", + "2023-07-02 21:04:29,775 - modelscope - INFO - epoch [1][2060/4982]\tlr: 6.772e-05, memory: 14281, loss: 2.4398\n", + "2023-07-02 21:04:33,407 - modelscope - INFO - epoch [1][2065/4982]\tlr: 6.758e-05, memory: 14281, loss: 1.2191\n", + "2023-07-02 21:04:35,873 - modelscope - INFO - epoch [1][2070/4982]\tlr: 6.744e-05, memory: 14281, loss: 1.5117\n", + "2023-07-02 21:04:38,406 - modelscope - INFO - epoch [1][2075/4982]\tlr: 6.731e-05, memory: 14281, loss: 1.5688\n", + "2023-07-02 21:04:40,452 - modelscope - INFO - epoch [1][2080/4982]\tlr: 6.717e-05, memory: 14281, loss: 1.3535\n", + "2023-07-02 21:04:42,464 - modelscope - INFO - epoch [1][2085/4982]\tlr: 6.703e-05, memory: 14281, loss: 3.2313\n", + "2023-07-02 21:04:44,395 - modelscope - INFO - epoch [1][2090/4982]\tlr: 6.689e-05, memory: 14281, loss: 1.8109\n", + "2023-07-02 21:04:47,097 - modelscope - INFO - epoch [1][2095/4982]\tlr: 6.676e-05, memory: 14281, loss: 2.6109\n", + "2023-07-02 21:04:50,488 - modelscope - INFO - epoch [1][2100/4982]\tlr: 6.662e-05, memory: 14281, loss: 2.3133\n", + "2023-07-02 21:04:53,478 - modelscope - INFO - epoch [1][2105/4982]\tlr: 6.648e-05, memory: 14281, loss: 1.5336\n", + "2023-07-02 21:04:56,669 - modelscope - INFO - epoch [1][2110/4982]\tlr: 6.634e-05, memory: 14281, loss: 1.8234\n", + "2023-07-02 21:05:00,502 - modelscope - INFO - epoch [1][2115/4982]\tlr: 6.620e-05, memory: 14329, loss: 3.0766\n", + "2023-07-02 21:05:02,541 - modelscope - INFO - epoch [1][2120/4982]\tlr: 6.607e-05, memory: 14329, loss: 1.3789\n", + "2023-07-02 21:05:05,161 - modelscope - INFO - epoch [1][2125/4982]\tlr: 6.593e-05, memory: 14329, loss: 1.5391\n", + "2023-07-02 21:05:07,009 - modelscope - INFO - epoch [1][2130/4982]\tlr: 6.579e-05, memory: 14329, loss: 2.6172\n", + "2023-07-02 21:05:10,521 - modelscope - INFO - epoch [1][2135/4982]\tlr: 6.565e-05, memory: 14329, loss: 1.7750\n", + "2023-07-02 21:05:13,068 - modelscope - INFO - epoch [1][2140/4982]\tlr: 6.551e-05, memory: 14329, loss: 2.1238\n", + "2023-07-02 21:05:15,637 - modelscope - INFO - epoch [1][2145/4982]\tlr: 6.537e-05, memory: 14329, loss: 2.5039\n", + "2023-07-02 21:05:18,628 - modelscope - INFO - epoch [1][2150/4982]\tlr: 6.523e-05, memory: 14329, loss: 1.6203\n", + "2023-07-02 21:05:21,523 - modelscope - INFO - epoch [1][2155/4982]\tlr: 6.510e-05, memory: 14329, loss: 0.9555\n", + "2023-07-02 21:05:24,213 - modelscope - INFO - epoch [1][2160/4982]\tlr: 6.496e-05, memory: 14329, loss: 2.1133\n", + "2023-07-02 21:05:27,402 - modelscope - INFO - epoch [1][2165/4982]\tlr: 6.482e-05, memory: 14329, loss: 1.1963\n", + "2023-07-02 21:05:29,840 - modelscope - INFO - epoch [1][2170/4982]\tlr: 6.468e-05, memory: 14329, loss: 1.3637\n", + "2023-07-02 21:05:32,853 - modelscope - INFO - epoch [1][2175/4982]\tlr: 6.454e-05, memory: 14329, loss: 1.7201\n", + "2023-07-02 21:05:35,628 - modelscope - INFO - epoch [1][2180/4982]\tlr: 6.440e-05, memory: 14329, loss: 2.0109\n", + "2023-07-02 21:05:38,589 - modelscope - INFO - epoch [1][2185/4982]\tlr: 6.426e-05, memory: 14329, loss: 1.2418\n", + "2023-07-02 21:05:40,918 - modelscope - INFO - epoch [1][2190/4982]\tlr: 6.412e-05, memory: 14329, loss: 2.0758\n", + "2023-07-02 21:05:43,421 - modelscope - INFO - epoch [1][2195/4982]\tlr: 6.398e-05, memory: 14329, loss: 1.7094\n", + "2023-07-02 21:05:46,523 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.21it/s]\n", + "2023-07-02 21:06:53,212 - modelscope - INFO - Saving checkpoint at 2200 iter\n", + "2023-07-02 21:06:53,240 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter2000_acc0.7442383766174316\n", + "2023-07-02 21:06:53,243 - modelscope - INFO - Saving checkpoint at 2200 iter\n", + "2023-07-02 21:06:53,269 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_2000\n", + "2023-07-02 21:06:53,272 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14329, evaluation/acc: 0.7494, evaluation/loss: 1.7767, loss: 2.1570\n", + "2023-07-02 21:06:55,998 - modelscope - INFO - epoch [1][2205/4982]\tlr: 6.370e-05, memory: 14329, loss: 1.3469\n", + "2023-07-02 21:06:59,535 - modelscope - INFO - epoch [1][2210/4982]\tlr: 6.356e-05, memory: 14329, loss: 1.3730\n", + "2023-07-02 21:07:01,992 - modelscope - INFO - epoch [1][2215/4982]\tlr: 6.342e-05, memory: 14329, loss: 2.2066\n", + "2023-07-02 21:07:04,789 - modelscope - INFO - epoch [1][2220/4982]\tlr: 6.328e-05, memory: 14329, loss: 1.7098\n", + "2023-07-02 21:07:07,714 - modelscope - INFO - epoch [1][2225/4982]\tlr: 6.314e-05, memory: 14329, loss: 2.0953\n", + "2023-07-02 21:07:09,812 - modelscope - INFO - epoch [1][2230/4982]\tlr: 6.300e-05, memory: 14329, loss: 2.3914\n", + "2023-07-02 21:07:12,315 - modelscope - INFO - epoch [1][2235/4982]\tlr: 6.286e-05, memory: 14329, loss: 2.6797\n", + "2023-07-02 21:07:15,918 - modelscope - INFO - epoch [1][2240/4982]\tlr: 6.272e-05, memory: 14329, loss: 1.3217\n", + "2023-07-02 21:07:19,044 - modelscope - INFO - epoch [1][2245/4982]\tlr: 6.258e-05, memory: 14329, loss: 1.4527\n", + "2023-07-02 21:07:21,636 - modelscope - INFO - epoch [1][2250/4982]\tlr: 6.244e-05, memory: 14329, loss: 2.1770\n", + "2023-07-02 21:07:23,761 - modelscope - INFO - epoch [1][2255/4982]\tlr: 6.230e-05, memory: 14329, loss: 1.8191\n", + "2023-07-02 21:07:25,994 - modelscope - INFO - epoch [1][2260/4982]\tlr: 6.216e-05, memory: 14329, loss: 1.3582\n", + "2023-07-02 21:07:28,770 - modelscope - INFO - epoch [1][2265/4982]\tlr: 6.202e-05, memory: 14329, loss: 1.0121\n", + "2023-07-02 21:07:32,193 - modelscope - INFO - epoch [1][2270/4982]\tlr: 6.188e-05, memory: 14329, loss: 1.0039\n", + "2023-07-02 21:07:34,881 - modelscope - INFO - epoch [1][2275/4982]\tlr: 6.174e-05, memory: 14329, loss: 1.2828\n", + "2023-07-02 21:07:37,688 - modelscope - INFO - epoch [1][2280/4982]\tlr: 6.159e-05, memory: 14329, loss: 1.4516\n", + "2023-07-02 21:07:40,006 - modelscope - INFO - epoch [1][2285/4982]\tlr: 6.145e-05, memory: 14329, loss: 1.5963\n", + "2023-07-02 21:07:42,993 - modelscope - INFO - epoch [1][2290/4982]\tlr: 6.131e-05, memory: 14329, loss: 2.7687\n", + "2023-07-02 21:07:46,133 - modelscope - INFO - epoch [1][2295/4982]\tlr: 6.117e-05, memory: 14329, loss: 1.5977\n", + "2023-07-02 21:07:47,508 - modelscope - INFO - epoch [1][2300/4982]\tlr: 6.103e-05, memory: 14329, loss: 2.5945\n", + "2023-07-02 21:07:50,902 - modelscope - INFO - epoch [1][2305/4982]\tlr: 6.089e-05, memory: 14329, loss: 1.2125\n", + "2023-07-02 21:07:53,059 - modelscope - INFO - epoch [1][2310/4982]\tlr: 6.075e-05, memory: 14329, loss: 2.2883\n", + "2023-07-02 21:07:56,237 - modelscope - INFO - epoch [1][2315/4982]\tlr: 6.061e-05, memory: 14329, loss: 0.8787\n", + "2023-07-02 21:07:59,345 - modelscope - INFO - epoch [1][2320/4982]\tlr: 6.046e-05, memory: 14329, loss: 2.6320\n", + "2023-07-02 21:08:02,587 - modelscope - INFO - epoch [1][2325/4982]\tlr: 6.032e-05, memory: 14329, loss: 1.4213\n", + "2023-07-02 21:08:04,652 - modelscope - INFO - epoch [1][2330/4982]\tlr: 6.018e-05, memory: 14329, loss: 2.7547\n", + "2023-07-02 21:08:07,208 - modelscope - INFO - epoch [1][2335/4982]\tlr: 6.004e-05, memory: 14329, loss: 2.1891\n", + "2023-07-02 21:08:09,836 - modelscope - INFO - epoch [1][2340/4982]\tlr: 5.990e-05, memory: 14329, loss: 1.9711\n", + "2023-07-02 21:08:12,642 - modelscope - INFO - epoch [1][2345/4982]\tlr: 5.976e-05, memory: 14329, loss: 1.2281\n", + "2023-07-02 21:08:15,772 - modelscope - INFO - epoch [1][2350/4982]\tlr: 5.961e-05, memory: 14329, loss: 1.1650\n", + "2023-07-02 21:08:18,568 - modelscope - INFO - epoch [1][2355/4982]\tlr: 5.947e-05, memory: 14329, loss: 1.0545\n", + "2023-07-02 21:08:21,580 - modelscope - INFO - epoch [1][2360/4982]\tlr: 5.933e-05, memory: 14329, loss: 2.3699\n", + "2023-07-02 21:08:24,345 - modelscope - INFO - epoch [1][2365/4982]\tlr: 5.919e-05, memory: 14329, loss: 1.7188\n", + "2023-07-02 21:08:27,132 - modelscope - INFO - epoch [1][2370/4982]\tlr: 5.905e-05, memory: 14329, loss: 0.8174\n", + "2023-07-02 21:08:28,995 - modelscope - INFO - epoch [1][2375/4982]\tlr: 5.891e-05, memory: 14329, loss: 2.0500\n", + "2023-07-02 21:08:32,221 - modelscope - INFO - epoch [1][2380/4982]\tlr: 5.876e-05, memory: 14329, loss: 0.8354\n", + "2023-07-02 21:08:34,747 - modelscope - INFO - epoch [1][2385/4982]\tlr: 5.862e-05, memory: 14329, loss: 1.3457\n", + "2023-07-02 21:08:38,256 - modelscope - INFO - epoch [1][2390/4982]\tlr: 5.848e-05, memory: 14329, loss: 1.9180\n", + "2023-07-02 21:08:40,701 - modelscope - INFO - epoch [1][2395/4982]\tlr: 5.834e-05, memory: 14329, loss: 1.1666\n", + "2023-07-02 21:08:43,933 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:09:50,373 - modelscope - INFO - Saving checkpoint at 2400 iter\n", + "2023-07-02 21:09:50,402 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter2200_acc0.749400794506073\n", + "2023-07-02 21:09:50,404 - modelscope - INFO - Saving checkpoint at 2400 iter\n", + "2023-07-02 21:09:50,432 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_2200\n", + "2023-07-02 21:09:50,435 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14329, evaluation/acc: 0.7535, evaluation/loss: 1.7703, loss: 1.5938\n", + "2023-07-02 21:09:53,136 - modelscope - INFO - epoch [1][2405/4982]\tlr: 5.805e-05, memory: 14329, loss: 3.0355\n", + "2023-07-02 21:09:55,673 - modelscope - INFO - epoch [1][2410/4982]\tlr: 5.791e-05, memory: 14329, loss: 1.9070\n", + "2023-07-02 21:09:58,239 - modelscope - INFO - epoch [1][2415/4982]\tlr: 5.777e-05, memory: 14329, loss: 1.1090\n", + "2023-07-02 21:10:00,413 - modelscope - INFO - epoch [1][2420/4982]\tlr: 5.763e-05, memory: 14329, loss: 1.3535\n", + "2023-07-02 21:10:02,887 - modelscope - INFO - epoch [1][2425/4982]\tlr: 5.748e-05, memory: 14329, loss: 1.4563\n", + "2023-07-02 21:10:05,462 - modelscope - INFO - epoch [1][2430/4982]\tlr: 5.734e-05, memory: 14329, loss: 2.2436\n", + "2023-07-02 21:10:08,549 - modelscope - INFO - epoch [1][2435/4982]\tlr: 5.720e-05, memory: 14329, loss: 1.8266\n", + "2023-07-02 21:10:11,226 - modelscope - INFO - epoch [1][2440/4982]\tlr: 5.706e-05, memory: 14329, loss: 1.8402\n", + "2023-07-02 21:10:13,579 - modelscope - INFO - epoch [1][2445/4982]\tlr: 5.691e-05, memory: 14329, loss: 2.0742\n", + "2023-07-02 21:10:15,828 - modelscope - INFO - epoch [1][2450/4982]\tlr: 5.677e-05, memory: 14329, loss: 1.5211\n", + "2023-07-02 21:10:18,658 - modelscope - INFO - epoch [1][2455/4982]\tlr: 5.663e-05, memory: 14329, loss: 0.9520\n", + "2023-07-02 21:10:21,705 - modelscope - INFO - epoch [1][2460/4982]\tlr: 5.649e-05, memory: 14329, loss: 1.4098\n", + "2023-07-02 21:10:24,494 - modelscope - INFO - epoch [1][2465/4982]\tlr: 5.635e-05, memory: 14329, loss: 1.5748\n", + "2023-07-02 21:10:27,349 - modelscope - INFO - epoch [1][2470/4982]\tlr: 5.620e-05, memory: 14329, loss: 2.5328\n", + "2023-07-02 21:10:29,516 - modelscope - INFO - epoch [1][2475/4982]\tlr: 5.606e-05, memory: 14329, loss: 1.2904\n", + "2023-07-02 21:10:32,690 - modelscope - INFO - epoch [1][2480/4982]\tlr: 5.592e-05, memory: 14329, loss: 0.5270\n", + "2023-07-02 21:10:35,469 - modelscope - INFO - epoch [1][2485/4982]\tlr: 5.578e-05, memory: 14329, loss: 0.9842\n", + "2023-07-02 21:10:37,617 - modelscope - INFO - epoch [1][2490/4982]\tlr: 5.563e-05, memory: 14329, loss: 2.4695\n", + "2023-07-02 21:10:40,562 - modelscope - INFO - epoch [1][2495/4982]\tlr: 5.549e-05, memory: 14329, loss: 1.2441\n", + "2023-07-02 21:10:42,074 - modelscope - INFO - epoch [1][2500/4982]\tlr: 5.535e-05, memory: 14329, loss: 2.1055\n", + "2023-07-02 21:10:44,402 - modelscope - INFO - epoch [1][2505/4982]\tlr: 5.521e-05, memory: 14329, loss: 1.5461\n", + "2023-07-02 21:10:47,254 - modelscope - INFO - epoch [1][2510/4982]\tlr: 5.506e-05, memory: 14329, loss: 2.3160\n", + "2023-07-02 21:10:50,538 - modelscope - INFO - epoch [1][2515/4982]\tlr: 5.492e-05, memory: 14329, loss: 1.4293\n", + "2023-07-02 21:10:53,161 - modelscope - INFO - epoch [1][2520/4982]\tlr: 5.478e-05, memory: 14329, loss: 2.6732\n", + "2023-07-02 21:10:55,975 - modelscope - INFO - epoch [1][2525/4982]\tlr: 5.464e-05, memory: 14329, loss: 1.1059\n", + "2023-07-02 21:10:59,325 - modelscope - INFO - epoch [1][2530/4982]\tlr: 5.449e-05, memory: 14329, loss: 0.7672\n", + "2023-07-02 21:11:02,511 - modelscope - INFO - epoch [1][2535/4982]\tlr: 5.435e-05, memory: 14329, loss: 1.0480\n", + "2023-07-02 21:11:04,652 - modelscope - INFO - epoch [1][2540/4982]\tlr: 5.421e-05, memory: 14329, loss: 1.4984\n", + "2023-07-02 21:11:08,281 - modelscope - INFO - epoch [1][2545/4982]\tlr: 5.407e-05, memory: 14329, loss: 1.1805\n", + "2023-07-02 21:11:10,297 - modelscope - INFO - epoch [1][2550/4982]\tlr: 5.392e-05, memory: 14329, loss: 2.0984\n", + "2023-07-02 21:11:13,563 - modelscope - INFO - epoch [1][2555/4982]\tlr: 5.378e-05, memory: 14329, loss: 0.5590\n", + "2023-07-02 21:11:15,666 - modelscope - INFO - epoch [1][2560/4982]\tlr: 5.364e-05, memory: 14329, loss: 1.8969\n", + "2023-07-02 21:11:17,895 - modelscope - INFO - epoch [1][2565/4982]\tlr: 5.350e-05, memory: 14329, loss: 2.2344\n", + "2023-07-02 21:11:20,533 - modelscope - INFO - epoch [1][2570/4982]\tlr: 5.335e-05, memory: 14329, loss: 1.2381\n", + "2023-07-02 21:11:23,834 - modelscope - INFO - epoch [1][2575/4982]\tlr: 5.321e-05, memory: 14329, loss: 1.7533\n", + "2023-07-02 21:11:26,883 - modelscope - INFO - epoch [1][2580/4982]\tlr: 5.307e-05, memory: 14329, loss: 0.9559\n", + "2023-07-02 21:11:29,602 - modelscope - INFO - epoch [1][2585/4982]\tlr: 5.293e-05, memory: 14329, loss: 1.1484\n", + "2023-07-02 21:11:31,820 - modelscope - INFO - epoch [1][2590/4982]\tlr: 5.279e-05, memory: 14329, loss: 1.4527\n", + "2023-07-02 21:11:33,946 - modelscope - INFO - epoch [1][2595/4982]\tlr: 5.264e-05, memory: 14329, loss: 2.1156\n", + "2023-07-02 21:11:36,808 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:12:43,304 - modelscope - INFO - Saving checkpoint at 2600 iter\n", + "2023-07-02 21:12:43,335 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter2400_acc0.7534938454627991\n", + "2023-07-02 21:12:43,337 - modelscope - INFO - Saving checkpoint at 2600 iter\n", + "2023-07-02 21:12:43,366 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_2400\n", + "2023-07-02 21:12:43,369 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14329, evaluation/acc: 0.7577, evaluation/loss: 1.7432, loss: 1.3414\n", + "2023-07-02 21:12:45,632 - modelscope - INFO - epoch [1][2605/4982]\tlr: 5.236e-05, memory: 14329, loss: 1.1031\n", + "2023-07-02 21:12:47,931 - modelscope - INFO - epoch [1][2610/4982]\tlr: 5.222e-05, memory: 14329, loss: 2.4422\n", + "2023-07-02 21:12:50,545 - modelscope - INFO - epoch [1][2615/4982]\tlr: 5.207e-05, memory: 14329, loss: 1.2281\n", + "2023-07-02 21:12:53,002 - modelscope - INFO - epoch [1][2620/4982]\tlr: 5.193e-05, memory: 14329, loss: 1.9912\n", + "2023-07-02 21:12:55,893 - modelscope - INFO - epoch [1][2625/4982]\tlr: 5.179e-05, memory: 14329, loss: 1.7354\n", + "2023-07-02 21:12:58,266 - modelscope - INFO - epoch [1][2630/4982]\tlr: 5.165e-05, memory: 14329, loss: 3.0562\n", + "2023-07-02 21:13:00,767 - modelscope - INFO - epoch [1][2635/4982]\tlr: 5.151e-05, memory: 14329, loss: 1.7664\n", + "2023-07-02 21:13:04,043 - modelscope - INFO - epoch [1][2640/4982]\tlr: 5.136e-05, memory: 14329, loss: 1.7547\n", + "2023-07-02 21:13:06,487 - modelscope - INFO - epoch [1][2645/4982]\tlr: 5.122e-05, memory: 14329, loss: 2.0453\n", + "2023-07-02 21:13:09,480 - modelscope - INFO - epoch [1][2650/4982]\tlr: 5.108e-05, memory: 14329, loss: 1.5508\n", + "2023-07-02 21:13:11,484 - modelscope - INFO - epoch [1][2655/4982]\tlr: 5.094e-05, memory: 14329, loss: 2.8527\n", + "2023-07-02 21:13:14,637 - modelscope - INFO - epoch [1][2660/4982]\tlr: 5.080e-05, memory: 14329, loss: 0.4787\n", + "2023-07-02 21:13:17,215 - modelscope - INFO - epoch [1][2665/4982]\tlr: 5.066e-05, memory: 14329, loss: 1.1926\n", + "2023-07-02 21:13:19,892 - modelscope - INFO - epoch [1][2670/4982]\tlr: 5.051e-05, memory: 14329, loss: 2.3055\n", + "2023-07-02 21:13:21,987 - modelscope - INFO - epoch [1][2675/4982]\tlr: 5.037e-05, memory: 14329, loss: 1.6938\n", + "2023-07-02 21:13:24,761 - modelscope - INFO - epoch [1][2680/4982]\tlr: 5.023e-05, memory: 14329, loss: 2.2922\n", + "2023-07-02 21:13:26,815 - modelscope - INFO - epoch [1][2685/4982]\tlr: 5.009e-05, memory: 14329, loss: 1.6898\n", + "2023-07-02 21:13:29,236 - modelscope - INFO - epoch [1][2690/4982]\tlr: 4.995e-05, memory: 14329, loss: 2.2826\n", + "2023-07-02 21:13:31,582 - modelscope - INFO - epoch [1][2695/4982]\tlr: 4.981e-05, memory: 14329, loss: 1.7828\n", + "2023-07-02 21:13:33,912 - modelscope - INFO - epoch [1][2700/4982]\tlr: 4.966e-05, memory: 14329, loss: 1.8785\n", + "2023-07-02 21:13:36,729 - modelscope - INFO - epoch [1][2705/4982]\tlr: 4.952e-05, memory: 14329, loss: 1.4273\n", + "2023-07-02 21:13:38,262 - modelscope - INFO - epoch [1][2710/4982]\tlr: 4.938e-05, memory: 14329, loss: 1.5227\n", + "2023-07-02 21:13:40,572 - modelscope - INFO - epoch [1][2715/4982]\tlr: 4.924e-05, memory: 14329, loss: 2.0828\n", + "2023-07-02 21:13:43,610 - modelscope - INFO - epoch [1][2720/4982]\tlr: 4.910e-05, memory: 14329, loss: 1.7301\n", + "2023-07-02 21:13:46,147 - modelscope - INFO - epoch [1][2725/4982]\tlr: 4.896e-05, memory: 14329, loss: 1.8305\n", + "2023-07-02 21:13:49,457 - modelscope - INFO - epoch [1][2730/4982]\tlr: 4.882e-05, memory: 14329, loss: 1.6883\n", + "2023-07-02 21:13:51,690 - modelscope - INFO - epoch [1][2735/4982]\tlr: 4.868e-05, memory: 14329, loss: 1.3963\n", + "2023-07-02 21:13:54,487 - modelscope - INFO - epoch [1][2740/4982]\tlr: 4.854e-05, memory: 14329, loss: 1.2293\n", + "2023-07-02 21:13:56,303 - modelscope - INFO - epoch [1][2745/4982]\tlr: 4.839e-05, memory: 14329, loss: 1.7289\n", + "2023-07-02 21:13:59,073 - modelscope - INFO - epoch [1][2750/4982]\tlr: 4.825e-05, memory: 14329, loss: 1.1637\n", + "2023-07-02 21:14:02,327 - modelscope - INFO - epoch [1][2755/4982]\tlr: 4.811e-05, memory: 14329, loss: 1.3336\n", + "2023-07-02 21:14:05,192 - modelscope - INFO - epoch [1][2760/4982]\tlr: 4.797e-05, memory: 14329, loss: 0.9352\n", + "2023-07-02 21:14:07,032 - modelscope - INFO - epoch [1][2765/4982]\tlr: 4.783e-05, memory: 14329, loss: 1.9258\n", + "2023-07-02 21:14:10,206 - modelscope - INFO - epoch [1][2770/4982]\tlr: 4.769e-05, memory: 14329, loss: 2.0555\n", + "2023-07-02 21:14:12,659 - modelscope - INFO - epoch [1][2775/4982]\tlr: 4.755e-05, memory: 14329, loss: 1.5836\n", + "2023-07-02 21:14:15,156 - modelscope - INFO - epoch [1][2780/4982]\tlr: 4.741e-05, memory: 14329, loss: 1.6203\n", + "2023-07-02 21:14:18,171 - modelscope - INFO - epoch [1][2785/4982]\tlr: 4.727e-05, memory: 14329, loss: 2.1402\n", + "2023-07-02 21:14:20,575 - modelscope - INFO - epoch [1][2790/4982]\tlr: 4.713e-05, memory: 14329, loss: 1.6504\n", + "2023-07-02 21:14:23,247 - modelscope - INFO - epoch [1][2795/4982]\tlr: 4.699e-05, memory: 14329, loss: 1.7109\n", + "2023-07-02 21:14:26,026 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:15:32,451 - modelscope - INFO - Saving checkpoint at 2800 iter\n", + "2023-07-02 21:15:32,483 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter2600_acc0.7577160000801086\n", + "2023-07-02 21:15:32,485 - modelscope - INFO - Saving checkpoint at 2800 iter\n", + "2023-07-02 21:15:32,515 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_2600\n", + "2023-07-02 21:15:32,518 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14329, evaluation/acc: 0.7621, evaluation/loss: 1.7451, loss: 2.2227\n", + "2023-07-02 21:15:34,950 - modelscope - INFO - epoch [1][2805/4982]\tlr: 4.671e-05, memory: 14329, loss: 2.0086\n", + "2023-07-02 21:15:38,272 - modelscope - INFO - epoch [1][2810/4982]\tlr: 4.657e-05, memory: 14329, loss: 0.8770\n", + "2023-07-02 21:15:41,346 - modelscope - INFO - epoch [1][2815/4982]\tlr: 4.643e-05, memory: 14329, loss: 0.7887\n", + "2023-07-02 21:15:43,033 - modelscope - INFO - epoch [1][2820/4982]\tlr: 4.629e-05, memory: 14329, loss: 2.8648\n", + "2023-07-02 21:15:45,965 - modelscope - INFO - epoch [1][2825/4982]\tlr: 4.615e-05, memory: 14329, loss: 1.9832\n", + "2023-07-02 21:15:48,381 - modelscope - INFO - epoch [1][2830/4982]\tlr: 4.601e-05, memory: 14329, loss: 1.4816\n", + "2023-07-02 21:15:51,262 - modelscope - INFO - epoch [1][2835/4982]\tlr: 4.587e-05, memory: 14329, loss: 1.3080\n", + "2023-07-02 21:15:53,969 - modelscope - INFO - epoch [1][2840/4982]\tlr: 4.573e-05, memory: 14329, loss: 1.2664\n", + "2023-07-02 21:15:56,145 - modelscope - INFO - epoch [1][2845/4982]\tlr: 4.559e-05, memory: 14329, loss: 2.4719\n", + "2023-07-02 21:15:58,623 - modelscope - INFO - epoch [1][2850/4982]\tlr: 4.545e-05, memory: 14329, loss: 1.0096\n", + "2023-07-02 21:16:01,537 - modelscope - INFO - epoch [1][2855/4982]\tlr: 4.532e-05, memory: 14329, loss: 1.7023\n", + "2023-07-02 21:16:05,216 - modelscope - INFO - epoch [1][2860/4982]\tlr: 4.518e-05, memory: 14329, loss: 1.8641\n", + "2023-07-02 21:16:08,050 - modelscope - INFO - epoch [1][2865/4982]\tlr: 4.504e-05, memory: 14329, loss: 2.1398\n", + "2023-07-02 21:16:10,270 - modelscope - INFO - epoch [1][2870/4982]\tlr: 4.490e-05, memory: 14329, loss: 1.9180\n", + "2023-07-02 21:16:12,856 - modelscope - INFO - epoch [1][2875/4982]\tlr: 4.476e-05, memory: 14329, loss: 1.6426\n", + "2023-07-02 21:16:15,831 - modelscope - INFO - epoch [1][2880/4982]\tlr: 4.462e-05, memory: 14329, loss: 1.9609\n", + "2023-07-02 21:16:18,475 - modelscope - INFO - epoch [1][2885/4982]\tlr: 4.448e-05, memory: 14329, loss: 1.3818\n", + "2023-07-02 21:16:21,513 - modelscope - INFO - epoch [1][2890/4982]\tlr: 4.434e-05, memory: 14329, loss: 1.8543\n", + "2023-07-02 21:16:23,561 - modelscope - INFO - epoch [1][2895/4982]\tlr: 4.421e-05, memory: 14329, loss: 1.6133\n", + "2023-07-02 21:16:25,999 - modelscope - INFO - epoch [1][2900/4982]\tlr: 4.407e-05, memory: 14329, loss: 2.2039\n", + "2023-07-02 21:16:28,248 - modelscope - INFO - epoch [1][2905/4982]\tlr: 4.393e-05, memory: 14329, loss: 1.5797\n", + "2023-07-02 21:16:31,059 - modelscope - INFO - epoch [1][2910/4982]\tlr: 4.379e-05, memory: 14329, loss: 1.0002\n", + "2023-07-02 21:16:33,522 - modelscope - INFO - epoch [1][2915/4982]\tlr: 4.365e-05, memory: 14329, loss: 1.5379\n", + "2023-07-02 21:16:35,881 - modelscope - INFO - epoch [1][2920/4982]\tlr: 4.352e-05, memory: 14329, loss: 2.8797\n", + "2023-07-02 21:16:38,582 - modelscope - INFO - epoch [1][2925/4982]\tlr: 4.338e-05, memory: 14329, loss: 2.2234\n", + "2023-07-02 21:16:41,105 - modelscope - INFO - epoch [1][2930/4982]\tlr: 4.324e-05, memory: 14329, loss: 0.9779\n", + "2023-07-02 21:16:43,610 - modelscope - INFO - epoch [1][2935/4982]\tlr: 4.310e-05, memory: 14329, loss: 1.1336\n", + "2023-07-02 21:16:46,978 - modelscope - INFO - epoch [1][2940/4982]\tlr: 4.297e-05, memory: 14329, loss: 1.7703\n", + "2023-07-02 21:16:49,719 - modelscope - INFO - epoch [1][2945/4982]\tlr: 4.283e-05, memory: 14329, loss: 2.1102\n", + "2023-07-02 21:16:52,425 - modelscope - INFO - epoch [1][2950/4982]\tlr: 4.269e-05, memory: 14329, loss: 1.6873\n", + "2023-07-02 21:16:54,893 - modelscope - INFO - epoch [1][2955/4982]\tlr: 4.256e-05, memory: 14329, loss: 1.8313\n", + "2023-07-02 21:16:58,211 - modelscope - INFO - epoch [1][2960/4982]\tlr: 4.242e-05, memory: 14329, loss: 1.2132\n", + "2023-07-02 21:17:01,430 - modelscope - INFO - epoch [1][2965/4982]\tlr: 4.228e-05, memory: 14329, loss: 1.5578\n", + "2023-07-02 21:17:04,190 - modelscope - INFO - epoch [1][2970/4982]\tlr: 4.215e-05, memory: 14329, loss: 1.1242\n", + "2023-07-02 21:17:07,777 - modelscope - INFO - epoch [1][2975/4982]\tlr: 4.201e-05, memory: 14329, loss: 1.3516\n", + "2023-07-02 21:17:11,666 - modelscope - INFO - epoch [1][2980/4982]\tlr: 4.187e-05, memory: 14329, loss: 1.2953\n", + "2023-07-02 21:17:14,548 - modelscope - INFO - epoch [1][2985/4982]\tlr: 4.174e-05, memory: 14329, loss: 2.3777\n", + "2023-07-02 21:17:17,244 - modelscope - INFO - epoch [1][2990/4982]\tlr: 4.160e-05, memory: 14329, loss: 1.8803\n", + "2023-07-02 21:17:20,544 - modelscope - INFO - epoch [1][2995/4982]\tlr: 4.147e-05, memory: 14329, loss: 1.1699\n", + "2023-07-02 21:17:22,682 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 21:18:29,245 - modelscope - INFO - Saving checkpoint at 3000 iter\n", + "2023-07-02 21:18:29,273 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter2800_acc0.7621409296989441\n", + "2023-07-02 21:18:29,275 - modelscope - INFO - Saving checkpoint at 3000 iter\n", + "2023-07-02 21:18:29,301 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_2800\n", + "2023-07-02 21:18:29,303 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14329, evaluation/acc: 0.7655, evaluation/loss: 1.7432, loss: 1.2258\n", + "2023-07-02 21:18:31,804 - modelscope - INFO - epoch [1][3005/4982]\tlr: 4.120e-05, memory: 14329, loss: 2.2777\n", + "2023-07-02 21:18:35,465 - modelscope - INFO - epoch [1][3010/4982]\tlr: 4.106e-05, memory: 14329, loss: 1.4781\n", + "2023-07-02 21:18:38,255 - modelscope - INFO - epoch [1][3015/4982]\tlr: 4.092e-05, memory: 14329, loss: 1.4242\n", + "2023-07-02 21:18:41,641 - modelscope - INFO - epoch [1][3020/4982]\tlr: 4.079e-05, memory: 14449, loss: 2.5148\n", + "2023-07-02 21:18:44,184 - modelscope - INFO - epoch [1][3025/4982]\tlr: 4.065e-05, memory: 14449, loss: 1.9086\n", + "2023-07-02 21:18:47,235 - modelscope - INFO - epoch [1][3030/4982]\tlr: 4.052e-05, memory: 14449, loss: 2.3363\n", + "2023-07-02 21:18:50,005 - modelscope - INFO - epoch [1][3035/4982]\tlr: 4.039e-05, memory: 14449, loss: 1.4543\n", + "2023-07-02 21:18:52,482 - modelscope - INFO - epoch [1][3040/4982]\tlr: 4.025e-05, memory: 14449, loss: 2.1744\n", + "2023-07-02 21:18:55,300 - modelscope - INFO - epoch [1][3045/4982]\tlr: 4.012e-05, memory: 14449, loss: 1.8871\n", + "2023-07-02 21:18:58,643 - modelscope - INFO - epoch [1][3050/4982]\tlr: 3.998e-05, memory: 14449, loss: 1.6809\n", + "2023-07-02 21:19:01,867 - modelscope - INFO - epoch [1][3055/4982]\tlr: 3.985e-05, memory: 14449, loss: 2.7977\n", + "2023-07-02 21:19:05,785 - modelscope - INFO - epoch [1][3060/4982]\tlr: 3.971e-05, memory: 14449, loss: 1.6258\n", + "2023-07-02 21:19:09,029 - modelscope - INFO - epoch [1][3065/4982]\tlr: 3.958e-05, memory: 14449, loss: 0.9796\n", + "2023-07-02 21:19:11,551 - modelscope - INFO - epoch [1][3070/4982]\tlr: 3.945e-05, memory: 14449, loss: 2.2262\n", + "2023-07-02 21:19:14,238 - modelscope - INFO - epoch [1][3075/4982]\tlr: 3.931e-05, memory: 14449, loss: 1.3527\n", + "2023-07-02 21:19:16,361 - modelscope - INFO - epoch [1][3080/4982]\tlr: 3.918e-05, memory: 14449, loss: 1.6689\n", + "2023-07-02 21:19:18,345 - modelscope - INFO - epoch [1][3085/4982]\tlr: 3.905e-05, memory: 14449, loss: 2.9641\n", + "2023-07-02 21:19:20,849 - modelscope - INFO - epoch [1][3090/4982]\tlr: 3.891e-05, memory: 14449, loss: 1.6723\n", + "2023-07-02 21:19:23,101 - modelscope - INFO - epoch [1][3095/4982]\tlr: 3.878e-05, memory: 14449, loss: 2.7703\n", + "2023-07-02 21:19:25,726 - modelscope - INFO - epoch [1][3100/4982]\tlr: 3.865e-05, memory: 14449, loss: 0.8043\n", + "2023-07-02 21:19:28,252 - modelscope - INFO - epoch [1][3105/4982]\tlr: 3.852e-05, memory: 14449, loss: 2.0820\n", + "2023-07-02 21:19:30,440 - modelscope - INFO - epoch [1][3110/4982]\tlr: 3.838e-05, memory: 14449, loss: 2.3492\n", + "2023-07-02 21:19:33,686 - modelscope - INFO - epoch [1][3115/4982]\tlr: 3.825e-05, memory: 14449, loss: 0.8090\n", + "2023-07-02 21:19:36,596 - modelscope - INFO - epoch [1][3120/4982]\tlr: 3.812e-05, memory: 14449, loss: 0.6620\n", + "2023-07-02 21:19:38,596 - modelscope - INFO - epoch [1][3125/4982]\tlr: 3.799e-05, memory: 14449, loss: 2.6781\n", + "2023-07-02 21:19:41,115 - modelscope - INFO - epoch [1][3130/4982]\tlr: 3.786e-05, memory: 14449, loss: 1.4328\n", + "2023-07-02 21:19:44,046 - modelscope - INFO - epoch [1][3135/4982]\tlr: 3.772e-05, memory: 14449, loss: 1.3764\n", + "2023-07-02 21:19:47,148 - modelscope - INFO - epoch [1][3140/4982]\tlr: 3.759e-05, memory: 14449, loss: 1.0316\n", + "2023-07-02 21:19:50,062 - modelscope - INFO - epoch [1][3145/4982]\tlr: 3.746e-05, memory: 14449, loss: 1.6078\n", + "2023-07-02 21:19:52,899 - modelscope - INFO - epoch [1][3150/4982]\tlr: 3.733e-05, memory: 14449, loss: 1.9883\n", + "2023-07-02 21:19:55,621 - modelscope - INFO - epoch [1][3155/4982]\tlr: 3.720e-05, memory: 14449, loss: 1.6697\n", + "2023-07-02 21:19:57,950 - modelscope - INFO - epoch [1][3160/4982]\tlr: 3.707e-05, memory: 14449, loss: 2.7109\n", + "2023-07-02 21:20:00,606 - modelscope - INFO - epoch [1][3165/4982]\tlr: 3.694e-05, memory: 14449, loss: 1.5930\n", + "2023-07-02 21:20:04,380 - modelscope - INFO - epoch [1][3170/4982]\tlr: 3.681e-05, memory: 14449, loss: 1.5211\n", + "2023-07-02 21:20:07,165 - modelscope - INFO - epoch [1][3175/4982]\tlr: 3.668e-05, memory: 14449, loss: 1.1980\n", + "2023-07-02 21:20:09,788 - modelscope - INFO - epoch [1][3180/4982]\tlr: 3.655e-05, memory: 14449, loss: 1.7625\n", + "2023-07-02 21:20:12,711 - modelscope - INFO - epoch [1][3185/4982]\tlr: 3.642e-05, memory: 14449, loss: 1.6734\n", + "2023-07-02 21:20:15,469 - modelscope - INFO - epoch [1][3190/4982]\tlr: 3.629e-05, memory: 14449, loss: 1.9477\n", + "2023-07-02 21:20:18,068 - modelscope - INFO - epoch [1][3195/4982]\tlr: 3.616e-05, memory: 14449, loss: 1.4062\n", + "2023-07-02 21:20:20,228 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:21:26,662 - modelscope - INFO - Saving checkpoint at 3200 iter\n", + "2023-07-02 21:21:26,689 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter3000_acc0.7654780745506287\n", + "2023-07-02 21:21:26,692 - modelscope - INFO - Saving checkpoint at 3200 iter\n", + "2023-07-02 21:21:26,718 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_3000\n", + "2023-07-02 21:21:26,721 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7670, evaluation/loss: 1.7173, loss: 2.3687\n", + "2023-07-02 21:21:29,912 - modelscope - INFO - epoch [1][3205/4982]\tlr: 3.590e-05, memory: 14449, loss: 1.7494\n", + "2023-07-02 21:21:32,447 - modelscope - INFO - epoch [1][3210/4982]\tlr: 3.577e-05, memory: 14449, loss: 2.1035\n", + "2023-07-02 21:21:35,773 - modelscope - INFO - epoch [1][3215/4982]\tlr: 3.565e-05, memory: 14449, loss: 0.8089\n", + "2023-07-02 21:21:38,867 - modelscope - INFO - epoch [1][3220/4982]\tlr: 3.552e-05, memory: 14449, loss: 1.5078\n", + "2023-07-02 21:21:42,117 - modelscope - INFO - epoch [1][3225/4982]\tlr: 3.539e-05, memory: 14449, loss: 0.6988\n", + "2023-07-02 21:21:44,231 - modelscope - INFO - epoch [1][3230/4982]\tlr: 3.526e-05, memory: 14449, loss: 2.9305\n", + "2023-07-02 21:21:46,826 - modelscope - INFO - epoch [1][3235/4982]\tlr: 3.513e-05, memory: 14449, loss: 1.9297\n", + "2023-07-02 21:21:49,591 - modelscope - INFO - epoch [1][3240/4982]\tlr: 3.501e-05, memory: 14449, loss: 0.5963\n", + "2023-07-02 21:21:51,805 - modelscope - INFO - epoch [1][3245/4982]\tlr: 3.488e-05, memory: 14449, loss: 3.5063\n", + "2023-07-02 21:21:54,641 - modelscope - INFO - epoch [1][3250/4982]\tlr: 3.475e-05, memory: 14449, loss: 2.2263\n", + "2023-07-02 21:21:56,972 - modelscope - INFO - epoch [1][3255/4982]\tlr: 3.462e-05, memory: 14449, loss: 2.3281\n", + "2023-07-02 21:21:59,236 - modelscope - INFO - epoch [1][3260/4982]\tlr: 3.450e-05, memory: 14449, loss: 1.6074\n", + "2023-07-02 21:22:02,735 - modelscope - INFO - epoch [1][3265/4982]\tlr: 3.437e-05, memory: 14449, loss: 0.7896\n", + "2023-07-02 21:22:05,850 - modelscope - INFO - epoch [1][3270/4982]\tlr: 3.424e-05, memory: 14449, loss: 2.6018\n", + "2023-07-02 21:22:07,890 - modelscope - INFO - epoch [1][3275/4982]\tlr: 3.412e-05, memory: 14449, loss: 1.3377\n", + "2023-07-02 21:22:10,846 - modelscope - INFO - epoch [1][3280/4982]\tlr: 3.399e-05, memory: 14449, loss: 1.4023\n", + "2023-07-02 21:22:13,203 - modelscope - INFO - epoch [1][3285/4982]\tlr: 3.387e-05, memory: 14449, loss: 2.1109\n", + "2023-07-02 21:22:15,914 - modelscope - INFO - epoch [1][3290/4982]\tlr: 3.374e-05, memory: 14449, loss: 1.3941\n", + "2023-07-02 21:22:18,753 - modelscope - INFO - epoch [1][3295/4982]\tlr: 3.362e-05, memory: 14449, loss: 2.0223\n", + "2023-07-02 21:22:21,131 - modelscope - INFO - epoch [1][3300/4982]\tlr: 3.349e-05, memory: 14449, loss: 1.3546\n", + "2023-07-02 21:22:22,563 - modelscope - INFO - epoch [1][3305/4982]\tlr: 3.337e-05, memory: 14449, loss: 2.2541\n", + "2023-07-02 21:22:26,351 - modelscope - INFO - epoch [1][3310/4982]\tlr: 3.324e-05, memory: 14449, loss: 2.1484\n", + "2023-07-02 21:22:29,794 - modelscope - INFO - epoch [1][3315/4982]\tlr: 3.312e-05, memory: 14449, loss: 0.9180\n", + "2023-07-02 21:22:31,954 - modelscope - INFO - epoch [1][3320/4982]\tlr: 3.299e-05, memory: 14449, loss: 2.4869\n", + "2023-07-02 21:22:34,848 - modelscope - INFO - epoch [1][3325/4982]\tlr: 3.287e-05, memory: 14449, loss: 1.0967\n", + "2023-07-02 21:22:37,229 - modelscope - INFO - epoch [1][3330/4982]\tlr: 3.275e-05, memory: 14449, loss: 2.1406\n", + "2023-07-02 21:22:39,882 - modelscope - INFO - epoch [1][3335/4982]\tlr: 3.262e-05, memory: 14449, loss: 1.9133\n", + "2023-07-02 21:22:42,375 - modelscope - INFO - epoch [1][3340/4982]\tlr: 3.250e-05, memory: 14449, loss: 2.0443\n", + "2023-07-02 21:22:45,140 - modelscope - INFO - epoch [1][3345/4982]\tlr: 3.238e-05, memory: 14449, loss: 2.7484\n", + "2023-07-02 21:22:48,235 - modelscope - INFO - epoch [1][3350/4982]\tlr: 3.225e-05, memory: 14449, loss: 1.3258\n", + "2023-07-02 21:22:50,145 - modelscope - INFO - epoch [1][3355/4982]\tlr: 3.213e-05, memory: 14449, loss: 2.4828\n", + "2023-07-02 21:22:53,373 - modelscope - INFO - epoch [1][3360/4982]\tlr: 3.201e-05, memory: 14449, loss: 1.3379\n", + "2023-07-02 21:22:55,667 - modelscope - INFO - epoch [1][3365/4982]\tlr: 3.189e-05, memory: 14449, loss: 2.0289\n", + "2023-07-02 21:22:57,577 - modelscope - INFO - epoch [1][3370/4982]\tlr: 3.176e-05, memory: 14449, loss: 2.0500\n", + "2023-07-02 21:23:00,744 - modelscope - INFO - epoch [1][3375/4982]\tlr: 3.164e-05, memory: 14449, loss: 1.0834\n", + "2023-07-02 21:23:04,128 - modelscope - INFO - epoch [1][3380/4982]\tlr: 3.152e-05, memory: 14449, loss: 0.8875\n", + "2023-07-02 21:23:07,233 - modelscope - INFO - epoch [1][3385/4982]\tlr: 3.140e-05, memory: 14449, loss: 1.1375\n", + "2023-07-02 21:23:09,464 - modelscope - INFO - epoch [1][3390/4982]\tlr: 3.128e-05, memory: 14449, loss: 2.3506\n", + "2023-07-02 21:23:12,230 - modelscope - INFO - epoch [1][3395/4982]\tlr: 3.116e-05, memory: 14449, loss: 1.0258\n", + "2023-07-02 21:23:15,891 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:24:22,313 - modelscope - INFO - Saving checkpoint at 3400 iter\n", + "2023-07-02 21:24:22,343 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter3200_acc0.7669530510902405\n", + "2023-07-02 21:24:22,345 - modelscope - INFO - Saving checkpoint at 3400 iter\n", + "2023-07-02 21:24:22,373 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_3200\n", + "2023-07-02 21:24:22,376 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7689, evaluation/loss: 1.6972, loss: 1.1217\n", + "2023-07-02 21:24:25,324 - modelscope - INFO - epoch [1][3405/4982]\tlr: 3.092e-05, memory: 14449, loss: 1.3055\n", + "2023-07-02 21:24:28,008 - modelscope - INFO - epoch [1][3410/4982]\tlr: 3.080e-05, memory: 14449, loss: 1.8813\n", + "2023-07-02 21:24:30,896 - modelscope - INFO - epoch [1][3415/4982]\tlr: 3.068e-05, memory: 14449, loss: 1.8965\n", + "2023-07-02 21:24:33,316 - modelscope - INFO - epoch [1][3420/4982]\tlr: 3.056e-05, memory: 14449, loss: 2.1344\n", + "2023-07-02 21:24:35,511 - modelscope - INFO - epoch [1][3425/4982]\tlr: 3.044e-05, memory: 14449, loss: 2.6798\n", + "2023-07-02 21:24:38,328 - modelscope - INFO - epoch [1][3430/4982]\tlr: 3.032e-05, memory: 14449, loss: 0.9617\n", + "2023-07-02 21:24:41,517 - modelscope - INFO - epoch [1][3435/4982]\tlr: 3.020e-05, memory: 14449, loss: 1.7773\n", + "2023-07-02 21:24:44,031 - modelscope - INFO - epoch [1][3440/4982]\tlr: 3.008e-05, memory: 14449, loss: 0.9613\n", + "2023-07-02 21:24:46,636 - modelscope - INFO - epoch [1][3445/4982]\tlr: 2.996e-05, memory: 14449, loss: 2.5844\n", + "2023-07-02 21:24:49,249 - modelscope - INFO - epoch [1][3450/4982]\tlr: 2.984e-05, memory: 14449, loss: 1.5498\n", + "2023-07-02 21:24:51,312 - modelscope - INFO - epoch [1][3455/4982]\tlr: 2.973e-05, memory: 14449, loss: 3.1250\n", + "2023-07-02 21:24:53,950 - modelscope - INFO - epoch [1][3460/4982]\tlr: 2.961e-05, memory: 14449, loss: 1.4406\n", + "2023-07-02 21:24:58,115 - modelscope - INFO - epoch [1][3465/4982]\tlr: 2.949e-05, memory: 14449, loss: 1.8449\n", + "2023-07-02 21:25:01,189 - modelscope - INFO - epoch [1][3470/4982]\tlr: 2.938e-05, memory: 14449, loss: 1.5242\n", + "2023-07-02 21:25:04,395 - modelscope - INFO - epoch [1][3475/4982]\tlr: 2.926e-05, memory: 14449, loss: 1.7469\n", + "2023-07-02 21:25:06,700 - modelscope - INFO - epoch [1][3480/4982]\tlr: 2.914e-05, memory: 14449, loss: 2.0787\n", + "2023-07-02 21:25:09,262 - modelscope - INFO - epoch [1][3485/4982]\tlr: 2.903e-05, memory: 14449, loss: 2.8416\n", + "2023-07-02 21:25:11,210 - modelscope - INFO - epoch [1][3490/4982]\tlr: 2.891e-05, memory: 14449, loss: 1.3633\n", + "2023-07-02 21:25:13,408 - modelscope - INFO - epoch [1][3495/4982]\tlr: 2.879e-05, memory: 14449, loss: 2.1203\n", + "2023-07-02 21:25:16,422 - modelscope - INFO - epoch [1][3500/4982]\tlr: 2.868e-05, memory: 14449, loss: 1.2863\n", + "2023-07-02 21:25:19,311 - modelscope - INFO - epoch [1][3505/4982]\tlr: 2.856e-05, memory: 14449, loss: 2.5109\n", + "2023-07-02 21:25:22,759 - modelscope - INFO - epoch [1][3510/4982]\tlr: 2.845e-05, memory: 14449, loss: 1.1850\n", + "2023-07-02 21:25:25,501 - modelscope - INFO - epoch [1][3515/4982]\tlr: 2.833e-05, memory: 14449, loss: 1.2992\n", + "2023-07-02 21:25:27,731 - modelscope - INFO - epoch [1][3520/4982]\tlr: 2.822e-05, memory: 14449, loss: 1.6945\n", + "2023-07-02 21:25:30,093 - modelscope - INFO - epoch [1][3525/4982]\tlr: 2.810e-05, memory: 14449, loss: 1.4635\n", + "2023-07-02 21:25:32,786 - modelscope - INFO - epoch [1][3530/4982]\tlr: 2.799e-05, memory: 14449, loss: 1.3238\n", + "2023-07-02 21:25:35,630 - modelscope - INFO - epoch [1][3535/4982]\tlr: 2.788e-05, memory: 14449, loss: 1.7512\n", + "2023-07-02 21:25:38,803 - modelscope - INFO - epoch [1][3540/4982]\tlr: 2.776e-05, memory: 14449, loss: 0.5063\n", + "2023-07-02 21:25:41,431 - modelscope - INFO - epoch [1][3545/4982]\tlr: 2.765e-05, memory: 14449, loss: 2.9984\n", + "2023-07-02 21:25:44,590 - modelscope - INFO - epoch [1][3550/4982]\tlr: 2.754e-05, memory: 14449, loss: 1.9760\n", + "2023-07-02 21:25:47,035 - modelscope - INFO - epoch [1][3555/4982]\tlr: 2.743e-05, memory: 14449, loss: 1.2375\n", + "2023-07-02 21:25:49,304 - modelscope - INFO - epoch [1][3560/4982]\tlr: 2.731e-05, memory: 14449, loss: 2.3781\n", + "2023-07-02 21:25:51,809 - modelscope - INFO - epoch [1][3565/4982]\tlr: 2.720e-05, memory: 14449, loss: 1.3707\n", + "2023-07-02 21:25:55,272 - modelscope - INFO - epoch [1][3570/4982]\tlr: 2.709e-05, memory: 14449, loss: 2.1244\n", + "2023-07-02 21:25:57,747 - modelscope - INFO - epoch [1][3575/4982]\tlr: 2.698e-05, memory: 14449, loss: 0.8705\n", + "2023-07-02 21:26:00,593 - modelscope - INFO - epoch [1][3580/4982]\tlr: 2.687e-05, memory: 14449, loss: 2.1484\n", + "2023-07-02 21:26:02,783 - modelscope - INFO - epoch [1][3585/4982]\tlr: 2.676e-05, memory: 14449, loss: 1.3639\n", + "2023-07-02 21:26:04,331 - modelscope - INFO - epoch [1][3590/4982]\tlr: 2.665e-05, memory: 14449, loss: 1.5500\n", + "2023-07-02 21:26:07,565 - modelscope - INFO - epoch [1][3595/4982]\tlr: 2.654e-05, memory: 14449, loss: 1.4891\n", + "2023-07-02 21:26:09,515 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 21:27:16,035 - modelscope - INFO - Saving checkpoint at 3600 iter\n", + "2023-07-02 21:27:16,062 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter3400_acc0.768944263458252\n", + "2023-07-02 21:27:16,065 - modelscope - INFO - Saving checkpoint at 3600 iter\n", + "2023-07-02 21:27:16,090 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_3400\n", + "2023-07-02 21:27:16,092 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7704, evaluation/loss: 1.6898, loss: 2.3109\n", + "2023-07-02 21:27:17,958 - modelscope - INFO - epoch [1][3605/4982]\tlr: 2.632e-05, memory: 14449, loss: 1.5484\n", + "2023-07-02 21:27:20,844 - modelscope - INFO - epoch [1][3610/4982]\tlr: 2.621e-05, memory: 14449, loss: 1.7049\n", + "2023-07-02 21:27:24,038 - modelscope - INFO - epoch [1][3615/4982]\tlr: 2.610e-05, memory: 14449, loss: 1.1580\n", + "2023-07-02 21:27:26,611 - modelscope - INFO - epoch [1][3620/4982]\tlr: 2.599e-05, memory: 14449, loss: 1.1926\n", + "2023-07-02 21:27:29,270 - modelscope - INFO - epoch [1][3625/4982]\tlr: 2.588e-05, memory: 14449, loss: 1.9445\n", + "2023-07-02 21:27:32,570 - modelscope - INFO - epoch [1][3630/4982]\tlr: 2.577e-05, memory: 14449, loss: 0.8320\n", + "2023-07-02 21:27:34,890 - modelscope - INFO - epoch [1][3635/4982]\tlr: 2.566e-05, memory: 14449, loss: 1.8961\n", + "2023-07-02 21:27:37,762 - modelscope - INFO - epoch [1][3640/4982]\tlr: 2.556e-05, memory: 14449, loss: 1.3434\n", + "2023-07-02 21:27:40,862 - modelscope - INFO - epoch [1][3645/4982]\tlr: 2.545e-05, memory: 14449, loss: 1.6516\n", + "2023-07-02 21:27:43,323 - modelscope - INFO - epoch [1][3650/4982]\tlr: 2.534e-05, memory: 14449, loss: 3.4539\n", + "2023-07-02 21:27:46,306 - modelscope - INFO - epoch [1][3655/4982]\tlr: 2.523e-05, memory: 14449, loss: 1.5139\n", + "2023-07-02 21:27:48,976 - modelscope - INFO - epoch [1][3660/4982]\tlr: 2.513e-05, memory: 14449, loss: 1.6055\n", + "2023-07-02 21:27:52,023 - modelscope - INFO - epoch [1][3665/4982]\tlr: 2.502e-05, memory: 14449, loss: 0.5375\n", + "2023-07-02 21:27:55,459 - modelscope - INFO - epoch [1][3670/4982]\tlr: 2.492e-05, memory: 14449, loss: 1.8552\n", + "2023-07-02 21:27:58,311 - modelscope - INFO - epoch [1][3675/4982]\tlr: 2.481e-05, memory: 14449, loss: 1.0477\n", + "2023-07-02 21:28:00,477 - modelscope - INFO - epoch [1][3680/4982]\tlr: 2.470e-05, memory: 14449, loss: 1.8646\n", + "2023-07-02 21:28:02,402 - modelscope - INFO - epoch [1][3685/4982]\tlr: 2.460e-05, memory: 14449, loss: 2.7117\n", + "2023-07-02 21:28:05,217 - modelscope - INFO - epoch [1][3690/4982]\tlr: 2.449e-05, memory: 14449, loss: 2.6594\n", + "2023-07-02 21:28:07,697 - modelscope - INFO - epoch [1][3695/4982]\tlr: 2.439e-05, memory: 14449, loss: 1.9680\n", + "2023-07-02 21:28:11,289 - modelscope - INFO - epoch [1][3700/4982]\tlr: 2.429e-05, memory: 14449, loss: 1.4680\n", + "2023-07-02 21:28:14,322 - modelscope - INFO - epoch [1][3705/4982]\tlr: 2.418e-05, memory: 14449, loss: 2.1742\n", + "2023-07-02 21:28:16,434 - modelscope - INFO - epoch [1][3710/4982]\tlr: 2.408e-05, memory: 14449, loss: 2.0691\n", + "2023-07-02 21:28:19,150 - modelscope - INFO - epoch [1][3715/4982]\tlr: 2.398e-05, memory: 14449, loss: 1.6078\n", + "2023-07-02 21:28:22,166 - modelscope - INFO - epoch [1][3720/4982]\tlr: 2.387e-05, memory: 14449, loss: 0.9880\n", + "2023-07-02 21:28:24,924 - modelscope - INFO - epoch [1][3725/4982]\tlr: 2.377e-05, memory: 14449, loss: 1.1384\n", + "2023-07-02 21:28:28,212 - modelscope - INFO - epoch [1][3730/4982]\tlr: 2.367e-05, memory: 14449, loss: 1.3064\n", + "2023-07-02 21:28:30,391 - modelscope - INFO - epoch [1][3735/4982]\tlr: 2.357e-05, memory: 14449, loss: 2.5031\n", + "2023-07-02 21:28:32,316 - modelscope - INFO - epoch [1][3740/4982]\tlr: 2.346e-05, memory: 14449, loss: 1.1914\n", + "2023-07-02 21:28:35,087 - modelscope - INFO - epoch [1][3745/4982]\tlr: 2.336e-05, memory: 14449, loss: 1.5630\n", + "2023-07-02 21:28:38,274 - modelscope - INFO - epoch [1][3750/4982]\tlr: 2.326e-05, memory: 14449, loss: 1.5844\n", + "2023-07-02 21:28:40,649 - modelscope - INFO - epoch [1][3755/4982]\tlr: 2.316e-05, memory: 14449, loss: 2.6648\n", + "2023-07-02 21:28:43,226 - modelscope - INFO - epoch [1][3760/4982]\tlr: 2.306e-05, memory: 14449, loss: 1.3648\n", + "2023-07-02 21:28:45,433 - modelscope - INFO - epoch [1][3765/4982]\tlr: 2.296e-05, memory: 14449, loss: 2.8930\n", + "2023-07-02 21:28:48,571 - modelscope - INFO - epoch [1][3770/4982]\tlr: 2.286e-05, memory: 14449, loss: 1.8161\n", + "2023-07-02 21:28:51,247 - modelscope - INFO - epoch [1][3775/4982]\tlr: 2.276e-05, memory: 14449, loss: 2.2783\n", + "2023-07-02 21:28:53,364 - modelscope - INFO - epoch [1][3780/4982]\tlr: 2.266e-05, memory: 14449, loss: 2.4652\n", + "2023-07-02 21:28:56,459 - modelscope - INFO - epoch [1][3785/4982]\tlr: 2.256e-05, memory: 14449, loss: 0.5556\n", + "2023-07-02 21:28:58,529 - modelscope - INFO - epoch [1][3790/4982]\tlr: 2.247e-05, memory: 14449, loss: 1.4350\n", + "2023-07-02 21:29:01,457 - modelscope - INFO - epoch [1][3795/4982]\tlr: 2.237e-05, memory: 14449, loss: 2.3062\n", + "2023-07-02 21:29:03,885 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 21:30:10,496 - modelscope - INFO - Saving checkpoint at 3800 iter\n", + "2023-07-02 21:30:10,522 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter3600_acc0.7704192399978638\n", + "2023-07-02 21:30:10,525 - modelscope - INFO - Saving checkpoint at 3800 iter\n", + "2023-07-02 21:30:10,549 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_3600\n", + "2023-07-02 21:30:10,552 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7714, evaluation/loss: 1.6864, loss: 1.6359\n", + "2023-07-02 21:30:12,897 - modelscope - INFO - epoch [1][3805/4982]\tlr: 2.217e-05, memory: 14449, loss: 2.1727\n", + "2023-07-02 21:30:15,703 - modelscope - INFO - epoch [1][3810/4982]\tlr: 2.208e-05, memory: 14449, loss: 1.7061\n", + "2023-07-02 21:30:18,582 - modelscope - INFO - epoch [1][3815/4982]\tlr: 2.198e-05, memory: 14449, loss: 0.9371\n", + "2023-07-02 21:30:21,148 - modelscope - INFO - epoch [1][3820/4982]\tlr: 2.188e-05, memory: 14449, loss: 1.7875\n", + "2023-07-02 21:30:23,806 - modelscope - INFO - epoch [1][3825/4982]\tlr: 2.179e-05, memory: 14449, loss: 2.2953\n", + "2023-07-02 21:30:26,426 - modelscope - INFO - epoch [1][3830/4982]\tlr: 2.169e-05, memory: 14449, loss: 2.3281\n", + "2023-07-02 21:30:28,893 - modelscope - INFO - epoch [1][3835/4982]\tlr: 2.160e-05, memory: 14449, loss: 1.5443\n", + "2023-07-02 21:30:31,735 - modelscope - INFO - epoch [1][3840/4982]\tlr: 2.150e-05, memory: 14449, loss: 2.0406\n", + "2023-07-02 21:30:33,879 - modelscope - INFO - epoch [1][3845/4982]\tlr: 2.141e-05, memory: 14449, loss: 2.1980\n", + "2023-07-02 21:30:36,598 - modelscope - INFO - epoch [1][3850/4982]\tlr: 2.131e-05, memory: 14449, loss: 1.5972\n", + "2023-07-02 21:30:39,142 - modelscope - INFO - epoch [1][3855/4982]\tlr: 2.122e-05, memory: 14449, loss: 2.2004\n", + "2023-07-02 21:30:41,541 - modelscope - INFO - epoch [1][3860/4982]\tlr: 2.112e-05, memory: 14449, loss: 1.5225\n", + "2023-07-02 21:30:44,206 - modelscope - INFO - epoch [1][3865/4982]\tlr: 2.103e-05, memory: 14449, loss: 2.0740\n", + "2023-07-02 21:30:47,318 - modelscope - INFO - epoch [1][3870/4982]\tlr: 2.094e-05, memory: 14449, loss: 2.7250\n", + "2023-07-02 21:30:50,059 - modelscope - INFO - epoch [1][3875/4982]\tlr: 2.084e-05, memory: 14449, loss: 2.2059\n", + "2023-07-02 21:30:52,045 - modelscope - INFO - epoch [1][3880/4982]\tlr: 2.075e-05, memory: 14449, loss: 1.7930\n", + "2023-07-02 21:30:54,716 - modelscope - INFO - epoch [1][3885/4982]\tlr: 2.066e-05, memory: 14449, loss: 1.6184\n", + "2023-07-02 21:30:56,979 - modelscope - INFO - epoch [1][3890/4982]\tlr: 2.057e-05, memory: 14449, loss: 2.1453\n", + "2023-07-02 21:31:01,437 - modelscope - INFO - epoch [1][3895/4982]\tlr: 2.048e-05, memory: 14449, loss: 1.2229\n", + "2023-07-02 21:31:05,207 - modelscope - INFO - epoch [1][3900/4982]\tlr: 2.039e-05, memory: 14449, loss: 1.7156\n", + "2023-07-02 21:31:07,873 - modelscope - INFO - epoch [1][3905/4982]\tlr: 2.029e-05, memory: 14449, loss: 1.8084\n", + "2023-07-02 21:31:10,896 - modelscope - INFO - epoch [1][3910/4982]\tlr: 2.020e-05, memory: 14449, loss: 0.4583\n", + "2023-07-02 21:31:13,623 - modelscope - INFO - epoch [1][3915/4982]\tlr: 2.011e-05, memory: 14449, loss: 3.1516\n", + "2023-07-02 21:31:16,647 - modelscope - INFO - epoch [1][3920/4982]\tlr: 2.002e-05, memory: 14449, loss: 1.0519\n", + "2023-07-02 21:31:19,431 - modelscope - INFO - epoch [1][3925/4982]\tlr: 1.994e-05, memory: 14449, loss: 2.3402\n", + "2023-07-02 21:31:21,995 - modelscope - INFO - epoch [1][3930/4982]\tlr: 1.985e-05, memory: 14449, loss: 2.3391\n", + "2023-07-02 21:31:24,439 - modelscope - INFO - epoch [1][3935/4982]\tlr: 1.976e-05, memory: 14449, loss: 2.4483\n", + "2023-07-02 21:31:26,586 - modelscope - INFO - epoch [1][3940/4982]\tlr: 1.967e-05, memory: 14449, loss: 2.2727\n", + "2023-07-02 21:31:28,897 - modelscope - INFO - epoch [1][3945/4982]\tlr: 1.958e-05, memory: 14449, loss: 3.0383\n", + "2023-07-02 21:31:31,754 - modelscope - INFO - epoch [1][3950/4982]\tlr: 1.949e-05, memory: 14449, loss: 1.5698\n", + "2023-07-02 21:31:35,256 - modelscope - INFO - epoch [1][3955/4982]\tlr: 1.941e-05, memory: 14449, loss: 1.2930\n", + "2023-07-02 21:31:37,474 - modelscope - INFO - epoch [1][3960/4982]\tlr: 1.932e-05, memory: 14449, loss: 1.4481\n", + "2023-07-02 21:31:40,154 - modelscope - INFO - epoch [1][3965/4982]\tlr: 1.923e-05, memory: 14449, loss: 1.6508\n", + "2023-07-02 21:31:42,215 - modelscope - INFO - epoch [1][3970/4982]\tlr: 1.915e-05, memory: 14449, loss: 1.6758\n", + "2023-07-02 21:31:44,996 - modelscope - INFO - epoch [1][3975/4982]\tlr: 1.906e-05, memory: 14449, loss: 3.0355\n", + "2023-07-02 21:31:47,982 - modelscope - INFO - epoch [1][3980/4982]\tlr: 1.898e-05, memory: 14449, loss: 2.0975\n", + "2023-07-02 21:31:50,425 - modelscope - INFO - epoch [1][3985/4982]\tlr: 1.889e-05, memory: 14449, loss: 2.7559\n", + "2023-07-02 21:31:53,599 - modelscope - INFO - epoch [1][3990/4982]\tlr: 1.881e-05, memory: 14449, loss: 0.6062\n", + "2023-07-02 21:31:56,806 - modelscope - INFO - epoch [1][3995/4982]\tlr: 1.872e-05, memory: 14449, loss: 1.8811\n", + "2023-07-02 21:31:59,002 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.24it/s]\n", + "2023-07-02 21:33:05,226 - modelscope - INFO - Saving checkpoint at 4000 iter\n", + "2023-07-02 21:33:05,253 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter3800_acc0.7713964581489563\n", + "2023-07-02 21:33:05,255 - modelscope - INFO - Saving checkpoint at 4000 iter\n", + "2023-07-02 21:33:05,280 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_3800\n", + "2023-07-02 21:33:05,283 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7721, evaluation/loss: 1.6809, loss: 2.3164\n", + "2023-07-02 21:33:07,641 - modelscope - INFO - epoch [1][4005/4982]\tlr: 1.855e-05, memory: 14449, loss: 1.3918\n", + "2023-07-02 21:33:10,090 - modelscope - INFO - epoch [1][4010/4982]\tlr: 1.847e-05, memory: 14449, loss: 1.7758\n", + "2023-07-02 21:33:13,438 - modelscope - INFO - epoch [1][4015/4982]\tlr: 1.839e-05, memory: 14449, loss: 0.8627\n", + "2023-07-02 21:33:16,653 - modelscope - INFO - epoch [1][4020/4982]\tlr: 1.831e-05, memory: 14449, loss: 1.2715\n", + "2023-07-02 21:33:20,248 - modelscope - INFO - epoch [1][4025/4982]\tlr: 1.822e-05, memory: 14449, loss: 2.1164\n", + "2023-07-02 21:33:23,029 - modelscope - INFO - epoch [1][4030/4982]\tlr: 1.814e-05, memory: 14449, loss: 1.0982\n", + "2023-07-02 21:33:25,384 - modelscope - INFO - epoch [1][4035/4982]\tlr: 1.806e-05, memory: 14449, loss: 1.3770\n", + "2023-07-02 21:33:27,542 - modelscope - INFO - epoch [1][4040/4982]\tlr: 1.798e-05, memory: 14449, loss: 1.4436\n", + "2023-07-02 21:33:29,897 - modelscope - INFO - epoch [1][4045/4982]\tlr: 1.790e-05, memory: 14449, loss: 1.6316\n", + "2023-07-02 21:33:32,478 - modelscope - INFO - epoch [1][4050/4982]\tlr: 1.782e-05, memory: 14449, loss: 0.8738\n", + "2023-07-02 21:33:35,228 - modelscope - INFO - epoch [1][4055/4982]\tlr: 1.774e-05, memory: 14449, loss: 1.9016\n", + "2023-07-02 21:33:37,569 - modelscope - INFO - epoch [1][4060/4982]\tlr: 1.766e-05, memory: 14449, loss: 1.6512\n", + "2023-07-02 21:33:40,234 - modelscope - INFO - epoch [1][4065/4982]\tlr: 1.758e-05, memory: 14449, loss: 1.3039\n", + "2023-07-02 21:33:42,749 - modelscope - INFO - epoch [1][4070/4982]\tlr: 1.750e-05, memory: 14449, loss: 1.2514\n", + "2023-07-02 21:33:45,340 - modelscope - INFO - epoch [1][4075/4982]\tlr: 1.742e-05, memory: 14449, loss: 2.8492\n", + "2023-07-02 21:33:47,472 - modelscope - INFO - epoch [1][4080/4982]\tlr: 1.734e-05, memory: 14449, loss: 2.0809\n", + "2023-07-02 21:33:50,149 - modelscope - INFO - epoch [1][4085/4982]\tlr: 1.727e-05, memory: 14449, loss: 1.1375\n", + "2023-07-02 21:33:53,306 - modelscope - INFO - epoch [1][4090/4982]\tlr: 1.719e-05, memory: 14449, loss: 0.4272\n", + "2023-07-02 21:33:55,772 - modelscope - INFO - epoch [1][4095/4982]\tlr: 1.711e-05, memory: 14449, loss: 3.0484\n", + "2023-07-02 21:33:58,344 - modelscope - INFO - epoch [1][4100/4982]\tlr: 1.704e-05, memory: 14449, loss: 1.9910\n", + "2023-07-02 21:34:00,903 - modelscope - INFO - epoch [1][4105/4982]\tlr: 1.696e-05, memory: 14449, loss: 1.7889\n", + "2023-07-02 21:34:03,059 - modelscope - INFO - epoch [1][4110/4982]\tlr: 1.688e-05, memory: 14449, loss: 1.2016\n", + "2023-07-02 21:34:05,621 - modelscope - INFO - epoch [1][4115/4982]\tlr: 1.681e-05, memory: 14449, loss: 1.8453\n", + "2023-07-02 21:34:09,027 - modelscope - INFO - epoch [1][4120/4982]\tlr: 1.673e-05, memory: 14449, loss: 1.5453\n", + "2023-07-02 21:34:11,741 - modelscope - INFO - epoch [1][4125/4982]\tlr: 1.666e-05, memory: 14449, loss: 1.9316\n", + "2023-07-02 21:34:13,865 - modelscope - INFO - epoch [1][4130/4982]\tlr: 1.659e-05, memory: 14449, loss: 2.3094\n", + "2023-07-02 21:34:16,258 - modelscope - INFO - epoch [1][4135/4982]\tlr: 1.651e-05, memory: 14449, loss: 2.5703\n", + "2023-07-02 21:34:20,487 - modelscope - INFO - epoch [1][4140/4982]\tlr: 1.644e-05, memory: 14449, loss: 1.3984\n", + "2023-07-02 21:34:23,365 - modelscope - INFO - epoch [1][4145/4982]\tlr: 1.636e-05, memory: 14449, loss: 1.5207\n", + "2023-07-02 21:34:26,448 - modelscope - INFO - epoch [1][4150/4982]\tlr: 1.629e-05, memory: 14449, loss: 1.3838\n", + "2023-07-02 21:34:28,356 - modelscope - INFO - epoch [1][4155/4982]\tlr: 1.622e-05, memory: 14449, loss: 1.5562\n", + "2023-07-02 21:34:30,276 - modelscope - INFO - epoch [1][4160/4982]\tlr: 1.615e-05, memory: 14449, loss: 2.0258\n", + "2023-07-02 21:34:33,019 - modelscope - INFO - epoch [1][4165/4982]\tlr: 1.608e-05, memory: 14449, loss: 1.0586\n", + "2023-07-02 21:34:35,587 - modelscope - INFO - epoch [1][4170/4982]\tlr: 1.601e-05, memory: 14449, loss: 2.0258\n", + "2023-07-02 21:34:38,118 - modelscope - INFO - epoch [1][4175/4982]\tlr: 1.593e-05, memory: 14449, loss: 1.7780\n", + "2023-07-02 21:34:40,812 - modelscope - INFO - epoch [1][4180/4982]\tlr: 1.586e-05, memory: 14449, loss: 1.4871\n", + "2023-07-02 21:34:43,689 - modelscope - INFO - epoch [1][4185/4982]\tlr: 1.579e-05, memory: 14449, loss: 2.4375\n", + "2023-07-02 21:34:45,571 - modelscope - INFO - epoch [1][4190/4982]\tlr: 1.572e-05, memory: 14449, loss: 2.8734\n", + "2023-07-02 21:34:47,974 - modelscope - INFO - epoch [1][4195/4982]\tlr: 1.566e-05, memory: 14449, loss: 1.9576\n", + "2023-07-02 21:34:50,431 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.24it/s]\n", + "2023-07-02 21:35:56,740 - modelscope - INFO - Saving checkpoint at 4200 iter\n", + "2023-07-02 21:35:56,767 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_4000\n", + "2023-07-02 21:35:56,770 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7719, evaluation/loss: 1.6805, loss: 3.5922\n", + "2023-07-02 21:35:58,922 - modelscope - INFO - epoch [1][4205/4982]\tlr: 1.552e-05, memory: 14449, loss: 2.2658\n", + "2023-07-02 21:36:01,295 - modelscope - INFO - epoch [1][4210/4982]\tlr: 1.545e-05, memory: 14449, loss: 1.6580\n", + "2023-07-02 21:36:04,097 - modelscope - INFO - epoch [1][4215/4982]\tlr: 1.538e-05, memory: 14449, loss: 1.6982\n", + "2023-07-02 21:36:06,731 - modelscope - INFO - epoch [1][4220/4982]\tlr: 1.532e-05, memory: 14449, loss: 1.9359\n", + "2023-07-02 21:36:08,551 - modelscope - INFO - epoch [1][4225/4982]\tlr: 1.525e-05, memory: 14449, loss: 2.5812\n", + "2023-07-02 21:36:11,911 - modelscope - INFO - epoch [1][4230/4982]\tlr: 1.518e-05, memory: 14449, loss: 1.9195\n", + "2023-07-02 21:36:14,506 - modelscope - INFO - epoch [1][4235/4982]\tlr: 1.512e-05, memory: 14449, loss: 1.2545\n", + "2023-07-02 21:36:17,733 - modelscope - INFO - epoch [1][4240/4982]\tlr: 1.505e-05, memory: 14449, loss: 1.9451\n", + "2023-07-02 21:36:20,470 - modelscope - INFO - epoch [1][4245/4982]\tlr: 1.499e-05, memory: 14449, loss: 1.4648\n", + "2023-07-02 21:36:22,770 - modelscope - INFO - epoch [1][4250/4982]\tlr: 1.492e-05, memory: 14449, loss: 1.6961\n", + "2023-07-02 21:36:25,378 - modelscope - INFO - epoch [1][4255/4982]\tlr: 1.486e-05, memory: 14449, loss: 2.4164\n", + "2023-07-02 21:36:27,752 - modelscope - INFO - epoch [1][4260/4982]\tlr: 1.479e-05, memory: 14449, loss: 1.9963\n", + "2023-07-02 21:36:30,118 - modelscope - INFO - epoch [1][4265/4982]\tlr: 1.473e-05, memory: 14449, loss: 2.1148\n", + "2023-07-02 21:36:33,660 - modelscope - INFO - epoch [1][4270/4982]\tlr: 1.466e-05, memory: 14449, loss: 1.0082\n", + "2023-07-02 21:36:37,177 - modelscope - INFO - epoch [1][4275/4982]\tlr: 1.460e-05, memory: 14449, loss: 1.0070\n", + "2023-07-02 21:36:39,794 - modelscope - INFO - epoch [1][4280/4982]\tlr: 1.454e-05, memory: 14449, loss: 2.2496\n", + "2023-07-02 21:36:42,033 - modelscope - INFO - epoch [1][4285/4982]\tlr: 1.448e-05, memory: 14449, loss: 2.6797\n", + "2023-07-02 21:36:45,045 - modelscope - INFO - epoch [1][4290/4982]\tlr: 1.442e-05, memory: 14449, loss: 1.7584\n", + "2023-07-02 21:36:47,854 - modelscope - INFO - epoch [1][4295/4982]\tlr: 1.435e-05, memory: 14449, loss: 0.8922\n", + "2023-07-02 21:36:50,056 - modelscope - INFO - epoch [1][4300/4982]\tlr: 1.429e-05, memory: 14449, loss: 0.9248\n", + "2023-07-02 21:36:52,432 - modelscope - INFO - epoch [1][4305/4982]\tlr: 1.423e-05, memory: 14449, loss: 2.2406\n", + "2023-07-02 21:36:55,320 - modelscope - INFO - epoch [1][4310/4982]\tlr: 1.417e-05, memory: 14449, loss: 2.6234\n", + "2023-07-02 21:36:57,625 - modelscope - INFO - epoch [1][4315/4982]\tlr: 1.411e-05, memory: 14449, loss: 2.5016\n", + "2023-07-02 21:36:59,666 - modelscope - INFO - epoch [1][4320/4982]\tlr: 1.405e-05, memory: 14449, loss: 2.4305\n", + "2023-07-02 21:37:01,862 - modelscope - INFO - epoch [1][4325/4982]\tlr: 1.400e-05, memory: 14449, loss: 2.3391\n", + "2023-07-02 21:37:03,730 - modelscope - INFO - epoch [1][4330/4982]\tlr: 1.394e-05, memory: 14449, loss: 2.1297\n", + "2023-07-02 21:37:06,491 - modelscope - INFO - epoch [1][4335/4982]\tlr: 1.388e-05, memory: 14449, loss: 1.5926\n", + "2023-07-02 21:37:08,327 - modelscope - INFO - epoch [1][4340/4982]\tlr: 1.382e-05, memory: 14449, loss: 2.0867\n", + "2023-07-02 21:37:10,978 - modelscope - INFO - epoch [1][4345/4982]\tlr: 1.376e-05, memory: 14449, loss: 1.5793\n", + "2023-07-02 21:37:13,418 - modelscope - INFO - epoch [1][4350/4982]\tlr: 1.371e-05, memory: 14449, loss: 1.3965\n", + "2023-07-02 21:37:16,097 - modelscope - INFO - epoch [1][4355/4982]\tlr: 1.365e-05, memory: 14449, loss: 1.6531\n", + "2023-07-02 21:37:18,922 - modelscope - INFO - epoch [1][4360/4982]\tlr: 1.360e-05, memory: 14449, loss: 1.2753\n", + "2023-07-02 21:37:21,708 - modelscope - INFO - epoch [1][4365/4982]\tlr: 1.354e-05, memory: 14449, loss: 1.6145\n", + "2023-07-02 21:37:23,716 - modelscope - INFO - epoch [1][4370/4982]\tlr: 1.349e-05, memory: 14449, loss: 2.6463\n", + "2023-07-02 21:37:27,213 - modelscope - INFO - epoch [1][4375/4982]\tlr: 1.343e-05, memory: 14449, loss: 0.6934\n", + "2023-07-02 21:37:30,031 - modelscope - INFO - epoch [1][4380/4982]\tlr: 1.338e-05, memory: 14449, loss: 2.2023\n", + "2023-07-02 21:37:33,441 - modelscope - INFO - epoch [1][4385/4982]\tlr: 1.332e-05, memory: 14449, loss: 1.6848\n", + "2023-07-02 21:37:35,797 - modelscope - INFO - epoch [1][4390/4982]\tlr: 1.327e-05, memory: 14449, loss: 1.6936\n", + "2023-07-02 21:37:39,329 - modelscope - INFO - epoch [1][4395/4982]\tlr: 1.322e-05, memory: 14449, loss: 0.5190\n", + "2023-07-02 21:37:41,815 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:38:48,264 - modelscope - INFO - Saving checkpoint at 4400 iter\n", + "2023-07-02 21:38:48,291 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter4000_acc0.7720601558685303\n", + "2023-07-02 21:38:48,293 - modelscope - INFO - Saving checkpoint at 4400 iter\n", + "2023-07-02 21:38:48,319 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_4200\n", + "2023-07-02 21:38:48,321 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7722, evaluation/loss: 1.6760, loss: 2.0141\n", + "2023-07-02 21:38:52,426 - modelscope - INFO - epoch [1][4405/4982]\tlr: 1.311e-05, memory: 14449, loss: 1.0922\n", + "2023-07-02 21:38:54,940 - modelscope - INFO - epoch [1][4410/4982]\tlr: 1.306e-05, memory: 14449, loss: 1.1858\n", + "2023-07-02 21:38:57,631 - modelscope - INFO - epoch [1][4415/4982]\tlr: 1.301e-05, memory: 14449, loss: 2.2687\n", + "2023-07-02 21:39:01,287 - modelscope - INFO - epoch [1][4420/4982]\tlr: 1.296e-05, memory: 14449, loss: 1.2707\n", + "2023-07-02 21:39:04,825 - modelscope - INFO - epoch [1][4425/4982]\tlr: 1.291e-05, memory: 14449, loss: 2.9891\n", + "2023-07-02 21:39:07,641 - modelscope - INFO - epoch [1][4430/4982]\tlr: 1.286e-05, memory: 14449, loss: 1.6935\n", + "2023-07-02 21:39:10,432 - modelscope - INFO - epoch [1][4435/4982]\tlr: 1.281e-05, memory: 14449, loss: 1.4844\n", + "2023-07-02 21:39:13,413 - modelscope - INFO - epoch [1][4440/4982]\tlr: 1.276e-05, memory: 14449, loss: 1.8453\n", + "2023-07-02 21:39:17,035 - modelscope - INFO - epoch [1][4445/4982]\tlr: 1.271e-05, memory: 14449, loss: 1.4854\n", + "2023-07-02 21:39:20,194 - modelscope - INFO - epoch [1][4450/4982]\tlr: 1.266e-05, memory: 14449, loss: 1.2645\n", + "2023-07-02 21:39:23,060 - modelscope - INFO - epoch [1][4455/4982]\tlr: 1.261e-05, memory: 14449, loss: 1.7969\n", + "2023-07-02 21:39:25,473 - modelscope - INFO - epoch [1][4460/4982]\tlr: 1.257e-05, memory: 14449, loss: 2.3201\n", + "2023-07-02 21:39:28,124 - modelscope - INFO - epoch [1][4465/4982]\tlr: 1.252e-05, memory: 14449, loss: 1.7680\n", + "2023-07-02 21:39:30,849 - modelscope - INFO - epoch [1][4470/4982]\tlr: 1.247e-05, memory: 14449, loss: 1.6301\n", + "2023-07-02 21:39:33,762 - modelscope - INFO - epoch [1][4475/4982]\tlr: 1.243e-05, memory: 14449, loss: 2.1186\n", + "2023-07-02 21:39:36,085 - modelscope - INFO - epoch [1][4480/4982]\tlr: 1.238e-05, memory: 14449, loss: 1.4234\n", + "2023-07-02 21:39:38,762 - modelscope - INFO - epoch [1][4485/4982]\tlr: 1.233e-05, memory: 14449, loss: 1.7797\n", + "2023-07-02 21:39:41,748 - modelscope - INFO - epoch [1][4490/4982]\tlr: 1.229e-05, memory: 14449, loss: 1.6820\n", + "2023-07-02 21:39:44,541 - modelscope - INFO - epoch [1][4495/4982]\tlr: 1.224e-05, memory: 14449, loss: 1.0109\n", + "2023-07-02 21:39:47,053 - modelscope - INFO - epoch [1][4500/4982]\tlr: 1.220e-05, memory: 14449, loss: 2.4484\n", + "2023-07-02 21:39:49,590 - modelscope - INFO - epoch [1][4505/4982]\tlr: 1.216e-05, memory: 14449, loss: 1.8258\n", + "2023-07-02 21:39:52,526 - modelscope - INFO - epoch [1][4510/4982]\tlr: 1.211e-05, memory: 14449, loss: 2.8773\n", + "2023-07-02 21:39:55,867 - modelscope - INFO - epoch [1][4515/4982]\tlr: 1.207e-05, memory: 14449, loss: 1.6246\n", + "2023-07-02 21:39:58,627 - modelscope - INFO - epoch [1][4520/4982]\tlr: 1.203e-05, memory: 14449, loss: 2.5562\n", + "2023-07-02 21:40:01,603 - modelscope - INFO - epoch [1][4525/4982]\tlr: 1.199e-05, memory: 14449, loss: 1.4436\n", + "2023-07-02 21:40:04,193 - modelscope - INFO - epoch [1][4530/4982]\tlr: 1.194e-05, memory: 14449, loss: 1.3711\n", + "2023-07-02 21:40:07,773 - modelscope - INFO - epoch [1][4535/4982]\tlr: 1.190e-05, memory: 14449, loss: 1.8023\n", + "2023-07-02 21:40:10,054 - modelscope - INFO - epoch [1][4540/4982]\tlr: 1.186e-05, memory: 14449, loss: 2.0508\n", + "2023-07-02 21:40:12,973 - modelscope - INFO - epoch [1][4545/4982]\tlr: 1.182e-05, memory: 14449, loss: 2.5195\n", + "2023-07-02 21:40:16,038 - modelscope - INFO - epoch [1][4550/4982]\tlr: 1.178e-05, memory: 14449, loss: 1.7164\n", + "2023-07-02 21:40:18,581 - modelscope - INFO - epoch [1][4555/4982]\tlr: 1.174e-05, memory: 14449, loss: 1.5645\n", + "2023-07-02 21:40:20,963 - modelscope - INFO - epoch [1][4560/4982]\tlr: 1.170e-05, memory: 14449, loss: 2.0105\n", + "2023-07-02 21:40:23,706 - modelscope - INFO - epoch [1][4565/4982]\tlr: 1.167e-05, memory: 14449, loss: 1.3252\n", + "2023-07-02 21:40:25,962 - modelscope - INFO - epoch [1][4570/4982]\tlr: 1.163e-05, memory: 14449, loss: 1.8855\n", + "2023-07-02 21:40:29,182 - modelscope - INFO - epoch [1][4575/4982]\tlr: 1.159e-05, memory: 14449, loss: 1.2594\n", + "2023-07-02 21:40:31,408 - modelscope - INFO - epoch [1][4580/4982]\tlr: 1.155e-05, memory: 14449, loss: 2.0570\n", + "2023-07-02 21:40:34,024 - modelscope - INFO - epoch [1][4585/4982]\tlr: 1.152e-05, memory: 14449, loss: 2.6170\n", + "2023-07-02 21:40:36,599 - modelscope - INFO - epoch [1][4590/4982]\tlr: 1.148e-05, memory: 14449, loss: 1.6721\n", + "2023-07-02 21:40:39,014 - modelscope - INFO - epoch [1][4595/4982]\tlr: 1.144e-05, memory: 14449, loss: 1.1687\n", + "2023-07-02 21:40:41,965 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.22it/s]\n", + "2023-07-02 21:41:48,497 - modelscope - INFO - Saving checkpoint at 4600 iter\n", + "2023-07-02 21:41:48,524 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter4400_acc0.7721523642539978\n", + "2023-07-02 21:41:48,526 - modelscope - INFO - Saving checkpoint at 4600 iter\n", + "2023-07-02 21:41:48,552 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_4400\n", + "2023-07-02 21:41:48,555 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7725, evaluation/loss: 1.6727, loss: 1.6291\n", + "2023-07-02 21:41:51,846 - modelscope - INFO - epoch [1][4605/4982]\tlr: 1.137e-05, memory: 14449, loss: 0.3742\n", + "2023-07-02 21:41:54,432 - modelscope - INFO - epoch [1][4610/4982]\tlr: 1.134e-05, memory: 14449, loss: 1.9832\n", + "2023-07-02 21:41:56,756 - modelscope - INFO - epoch [1][4615/4982]\tlr: 1.130e-05, memory: 14449, loss: 1.6234\n", + "2023-07-02 21:41:59,635 - modelscope - INFO - epoch [1][4620/4982]\tlr: 1.127e-05, memory: 14449, loss: 1.2416\n", + "2023-07-02 21:42:02,440 - modelscope - INFO - epoch [1][4625/4982]\tlr: 1.124e-05, memory: 14449, loss: 1.9668\n", + "2023-07-02 21:42:04,595 - modelscope - INFO - epoch [1][4630/4982]\tlr: 1.120e-05, memory: 14449, loss: 1.1527\n", + "2023-07-02 21:42:07,367 - modelscope - INFO - epoch [1][4635/4982]\tlr: 1.117e-05, memory: 14449, loss: 2.0367\n", + "2023-07-02 21:42:09,781 - modelscope - INFO - epoch [1][4640/4982]\tlr: 1.114e-05, memory: 14449, loss: 1.6268\n", + "2023-07-02 21:42:12,158 - modelscope - INFO - epoch [1][4645/4982]\tlr: 1.111e-05, memory: 14449, loss: 2.4633\n", + "2023-07-02 21:42:14,206 - modelscope - INFO - epoch [1][4650/4982]\tlr: 1.108e-05, memory: 14449, loss: 2.8531\n", + "2023-07-02 21:42:16,879 - modelscope - INFO - epoch [1][4655/4982]\tlr: 1.105e-05, memory: 14449, loss: 2.2703\n", + "2023-07-02 21:42:20,006 - modelscope - INFO - epoch [1][4660/4982]\tlr: 1.102e-05, memory: 14449, loss: 0.8350\n", + "2023-07-02 21:42:22,598 - modelscope - INFO - epoch [1][4665/4982]\tlr: 1.099e-05, memory: 14449, loss: 1.9375\n", + "2023-07-02 21:42:26,607 - modelscope - INFO - epoch [1][4670/4982]\tlr: 1.096e-05, memory: 14449, loss: 0.9594\n", + "2023-07-02 21:42:30,336 - modelscope - INFO - epoch [1][4675/4982]\tlr: 1.093e-05, memory: 14449, loss: 1.2943\n", + "2023-07-02 21:42:32,894 - modelscope - INFO - epoch [1][4680/4982]\tlr: 1.090e-05, memory: 14449, loss: 1.4293\n", + "2023-07-02 21:42:37,079 - modelscope - INFO - epoch [1][4685/4982]\tlr: 1.087e-05, memory: 14449, loss: 1.4109\n", + "2023-07-02 21:42:40,878 - modelscope - INFO - epoch [1][4690/4982]\tlr: 1.084e-05, memory: 14449, loss: 0.6270\n", + "2023-07-02 21:42:43,202 - modelscope - INFO - epoch [1][4695/4982]\tlr: 1.082e-05, memory: 14449, loss: 1.4430\n", + "2023-07-02 21:42:45,786 - modelscope - INFO - epoch [1][4700/4982]\tlr: 1.079e-05, memory: 14449, loss: 1.2656\n", + "2023-07-02 21:42:47,371 - modelscope - INFO - epoch [1][4705/4982]\tlr: 1.076e-05, memory: 14449, loss: 1.9141\n", + "2023-07-02 21:42:50,147 - modelscope - INFO - epoch [1][4710/4982]\tlr: 1.074e-05, memory: 14449, loss: 1.1176\n", + "2023-07-02 21:42:52,690 - modelscope - INFO - epoch [1][4715/4982]\tlr: 1.071e-05, memory: 14449, loss: 2.7781\n", + "2023-07-02 21:42:55,645 - modelscope - INFO - epoch [1][4720/4982]\tlr: 1.069e-05, memory: 14449, loss: 0.4620\n", + "2023-07-02 21:42:58,615 - modelscope - INFO - epoch [1][4725/4982]\tlr: 1.066e-05, memory: 14449, loss: 1.2354\n", + "2023-07-02 21:43:00,944 - modelscope - INFO - epoch [1][4730/4982]\tlr: 1.064e-05, memory: 14449, loss: 1.4683\n", + "2023-07-02 21:43:04,011 - modelscope - INFO - epoch [1][4735/4982]\tlr: 1.062e-05, memory: 14449, loss: 1.3249\n", + "2023-07-02 21:43:06,962 - modelscope - INFO - epoch [1][4740/4982]\tlr: 1.059e-05, memory: 14449, loss: 1.0039\n", + "2023-07-02 21:43:10,074 - modelscope - INFO - epoch [1][4745/4982]\tlr: 1.057e-05, memory: 14449, loss: 1.9678\n", + "2023-07-02 21:43:12,406 - modelscope - INFO - epoch [1][4750/4982]\tlr: 1.055e-05, memory: 14449, loss: 0.6996\n", + "2023-07-02 21:43:15,125 - modelscope - INFO - epoch [1][4755/4982]\tlr: 1.053e-05, memory: 14449, loss: 0.9693\n", + "2023-07-02 21:43:17,919 - modelscope - INFO - epoch [1][4760/4982]\tlr: 1.050e-05, memory: 14449, loss: 2.0680\n", + "2023-07-02 21:43:20,500 - modelscope - INFO - epoch [1][4765/4982]\tlr: 1.048e-05, memory: 14449, loss: 1.6277\n", + "2023-07-02 21:43:22,713 - modelscope - INFO - epoch [1][4770/4982]\tlr: 1.046e-05, memory: 14449, loss: 1.9484\n", + "2023-07-02 21:43:24,366 - modelscope - INFO - epoch [1][4775/4982]\tlr: 1.044e-05, memory: 14449, loss: 2.6502\n", + "2023-07-02 21:43:27,079 - modelscope - INFO - epoch [1][4780/4982]\tlr: 1.042e-05, memory: 14449, loss: 1.2715\n", + "2023-07-02 21:43:29,023 - modelscope - INFO - epoch [1][4785/4982]\tlr: 1.040e-05, memory: 14449, loss: 1.8383\n", + "2023-07-02 21:43:31,660 - modelscope - INFO - epoch [1][4790/4982]\tlr: 1.038e-05, memory: 14449, loss: 1.6623\n", + "2023-07-02 21:43:34,660 - modelscope - INFO - epoch [1][4795/4982]\tlr: 1.037e-05, memory: 14449, loss: 1.2914\n", + "2023-07-02 21:43:37,720 - modelscope - WARNING - ('METRICS', 'default', 'my_metric') not found in ast index file\n", + "Total test samples: 100%|██████████| 281/281 [01:06<00:00, 4.23it/s]\n", + "2023-07-02 21:44:44,218 - modelscope - INFO - Saving checkpoint at 4800 iter\n", + "2023-07-02 21:44:44,248 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/best_iter4600_acc0.7724842429161072\n", + "2023-07-02 21:44:44,250 - modelscope - INFO - Saving checkpoint at 4800 iter\n", + "2023-07-02 21:44:44,279 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_4600\n", + "2023-07-02 21:44:44,282 - modelscope - INFO - epoch(eval) [1][281]\tmemory: 14449, evaluation/acc: 0.7729, evaluation/loss: 1.6707, loss: 1.1414\n", + "2023-07-02 21:44:46,870 - modelscope - INFO - epoch [1][4805/4982]\tlr: 1.033e-05, memory: 14449, loss: 0.6551\n", + "2023-07-02 21:44:49,076 - modelscope - INFO - epoch [1][4810/4982]\tlr: 1.031e-05, memory: 14449, loss: 1.6857\n", + "2023-07-02 21:44:51,074 - modelscope - INFO - epoch [1][4815/4982]\tlr: 1.030e-05, memory: 14449, loss: 1.9123\n", + "2023-07-02 21:44:53,385 - modelscope - INFO - epoch [1][4820/4982]\tlr: 1.028e-05, memory: 14449, loss: 1.4424\n", + "2023-07-02 21:44:55,581 - modelscope - INFO - epoch [1][4825/4982]\tlr: 1.027e-05, memory: 14449, loss: 2.2789\n", + "2023-07-02 21:44:58,108 - modelscope - INFO - epoch [1][4830/4982]\tlr: 1.025e-05, memory: 14449, loss: 1.9641\n", + "2023-07-02 21:45:00,888 - modelscope - INFO - epoch [1][4835/4982]\tlr: 1.024e-05, memory: 14449, loss: 1.6689\n", + "2023-07-02 21:45:02,999 - modelscope - INFO - epoch [1][4840/4982]\tlr: 1.022e-05, memory: 14449, loss: 1.9693\n", + "2023-07-02 21:45:06,302 - modelscope - INFO - epoch [1][4845/4982]\tlr: 1.021e-05, memory: 14449, loss: 1.3166\n", + "2023-07-02 21:45:09,602 - modelscope - INFO - epoch [1][4850/4982]\tlr: 1.019e-05, memory: 14449, loss: 1.5213\n", + "2023-07-02 21:45:12,571 - modelscope - INFO - epoch [1][4855/4982]\tlr: 1.018e-05, memory: 14449, loss: 1.8047\n", + "2023-07-02 21:45:14,672 - modelscope - INFO - epoch [1][4860/4982]\tlr: 1.017e-05, memory: 14449, loss: 1.5372\n", + "2023-07-02 21:45:17,717 - modelscope - INFO - epoch [1][4865/4982]\tlr: 1.016e-05, memory: 14449, loss: 1.3180\n", + "2023-07-02 21:45:20,504 - modelscope - INFO - epoch [1][4870/4982]\tlr: 1.014e-05, memory: 14449, loss: 1.3500\n", + "2023-07-02 21:45:23,506 - modelscope - INFO - epoch [1][4875/4982]\tlr: 1.013e-05, memory: 14449, loss: 2.2521\n", + "2023-07-02 21:45:25,399 - modelscope - INFO - epoch [1][4880/4982]\tlr: 1.012e-05, memory: 14449, loss: 1.9281\n", + "2023-07-02 21:45:28,444 - modelscope - INFO - epoch [1][4885/4982]\tlr: 1.011e-05, memory: 14449, loss: 1.4693\n", + "2023-07-02 21:45:31,381 - modelscope - INFO - epoch [1][4890/4982]\tlr: 1.010e-05, memory: 14449, loss: 2.0117\n", + "2023-07-02 21:45:35,557 - modelscope - INFO - epoch [1][4895/4982]\tlr: 1.009e-05, memory: 14449, loss: 0.5264\n", + "2023-07-02 21:45:39,804 - modelscope - INFO - epoch [1][4900/4982]\tlr: 1.008e-05, memory: 14449, loss: 1.2449\n", + "2023-07-02 21:45:42,752 - modelscope - INFO - epoch [1][4905/4982]\tlr: 1.008e-05, memory: 14449, loss: 1.3134\n", + "2023-07-02 21:45:45,007 - modelscope - INFO - epoch [1][4910/4982]\tlr: 1.007e-05, memory: 14449, loss: 0.9836\n", + "2023-07-02 21:45:47,247 - modelscope - INFO - epoch [1][4915/4982]\tlr: 1.006e-05, memory: 14449, loss: 1.8653\n", + "2023-07-02 21:45:49,545 - modelscope - INFO - epoch [1][4920/4982]\tlr: 1.005e-05, memory: 14449, loss: 1.9227\n", + "2023-07-02 21:45:52,533 - modelscope - INFO - epoch [1][4925/4982]\tlr: 1.005e-05, memory: 14449, loss: 1.1875\n", + "2023-07-02 21:45:55,303 - modelscope - INFO - epoch [1][4930/4982]\tlr: 1.004e-05, memory: 14449, loss: 1.9453\n", + "2023-07-02 21:45:58,165 - modelscope - INFO - epoch [1][4935/4982]\tlr: 1.003e-05, memory: 14449, loss: 0.6951\n", + "2023-07-02 21:46:01,430 - modelscope - INFO - epoch [1][4940/4982]\tlr: 1.003e-05, memory: 14449, loss: 0.7973\n", + "2023-07-02 21:46:04,313 - modelscope - INFO - epoch [1][4945/4982]\tlr: 1.002e-05, memory: 14449, loss: 1.8844\n", + "2023-07-02 21:46:06,392 - modelscope - INFO - epoch [1][4950/4982]\tlr: 1.002e-05, memory: 14449, loss: 1.5102\n", + "2023-07-02 21:46:08,801 - modelscope - INFO - epoch [1][4955/4982]\tlr: 1.002e-05, memory: 14449, loss: 2.2773\n", + "2023-07-02 21:46:11,500 - modelscope - INFO - epoch [1][4960/4982]\tlr: 1.001e-05, memory: 14449, loss: 1.6867\n", + "2023-07-02 21:46:13,716 - modelscope - INFO - epoch [1][4965/4982]\tlr: 1.001e-05, memory: 14449, loss: 2.5187\n", + "2023-07-02 21:46:16,514 - modelscope - INFO - epoch [1][4970/4982]\tlr: 1.001e-05, memory: 14449, loss: 1.1453\n", + "2023-07-02 21:46:19,686 - modelscope - INFO - epoch [1][4975/4982]\tlr: 1.000e-05, memory: 14449, loss: 1.6125\n", + "2023-07-02 21:46:23,065 - modelscope - INFO - epoch [1][4980/4982]\tlr: 1.000e-05, memory: 14449, loss: 2.1379\n", + "2023-07-02 21:46:24,007 - modelscope - INFO - Saving checkpoint at 4982 iter\n", + "2023-07-02 21:46:24,163 - modelscope - INFO - deleting checkpoint: /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505/iter_4800\n", + "2023-07-02 21:46:24,209 - modelscope - INFO - Train finished. Uploading models, waiting...\n", + "2023-07-02 21:46:24,299 - modelscope - INFO - {'done': True}\n" + ] + } + ], + "source": [ + "def cfg_modify_fn(cfg: Config) -> Config:\n", + " cfg.update(CONFIG)\n", + " return cfg\n", + "\n", + "\n", + "trainer = EpochBasedTrainer(\n", + " model=model,\n", + " cfg_file=cfg_file,\n", + " data_collator=data_collate_fn,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=val_dataset,\n", + " remove_unused_data=True,\n", + " seed=42,\n", + " cfg_modify_fn=cfg_modify_fn,\n", + ")\n", + "\n", + "trainer.train()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 可视化\n", + "tensorboard 命令: (e.g.) \n", + "`tensorboard --logdir /home/hackathon/my_git/agent/runs/chatglm2/v1-20230702-203505 --port 6006`\n", + "\n", + "\n", + "The following code is copied from baichuan_sft.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['lr', 'loss', 'evaluation/acc', 'evaluation/loss'])\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tb_dir = os.path.join(WORK_DIR, 'tensorboard_output')\n", + "fname = os.listdir(tb_dir)[0]\n", + "tb_path = os.path.join(tb_dir, fname)\n", + "#\n", + "data = read_tensorboard_file(tb_path)\n", + "print(data.keys())\n", + "_ = plot_image(data, 'loss', 0.9)\n", + "_ = plot_image(data, 'lr', 0)\n", + "_ = plot_image(data, 'evaluation/acc', 0)\n", + "_ = plot_image(data, 'evaluation/loss', 0)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 推理\n", + "推理部分见chatglm2_infer.ipynb" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hackathon", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch/stable_diffusion/custom/finetune_stable_diffusion_custom.py b/examples/pytorch/stable_diffusion/custom/finetune_stable_diffusion_custom.py new file mode 100644 index 00000000..007ea82b --- /dev/null +++ b/examples/pytorch/stable_diffusion/custom/finetune_stable_diffusion_custom.py @@ -0,0 +1,168 @@ +import os +from dataclasses import dataclass, field + +import cv2 + +from modelscope.metainfo import Trainers +from modelscope.msdatasets import MsDataset +from modelscope.pipelines import pipeline +from modelscope.trainers import EpochBasedTrainer, build_trainer +from modelscope.trainers.training_args import TrainingArgs +from modelscope.utils.constant import DownloadMode, Tasks + + +# Load configuration file and dataset +@dataclass(init=False) +class StableDiffusionCustomArguments(TrainingArgs): + class_prompt: str = field( + default=None, + metadata={ + 'help': + 'The prompt to specify images in the same class as provided instance images.', + }) + + instance_prompt: str = field( + default=None, + metadata={ + 'help': 'The prompt with identifier specifying the instance.', + }) + + modifier_token: str = field( + default=None, + metadata={ + 'help': 'A token to use as a modifier for the concept.', + }) + + num_class_images: int = field( + default=200, + metadata={ + 'help': 'Minimal class images for prior preservation loss.', + }) + + train_batch_size: int = field( + default=4, + metadata={ + 'help': 'Batch size (per device) for the training dataloader.', + }) + + sample_batch_size: int = field( + default=4, + metadata={ + 'help': 'Batch size (per device) for sampling images.', + }) + + initializer_token: str = field( + default='ktn+pll+ucd', + metadata={ + 'help': 'A token to use as initializer word.', + }) + + class_data_dir: str = field( + default='/tmp/class_data', + metadata={ + 'help': 'A folder containing the training data of class images.', + }) + + resolution: int = field( + default=512, + metadata={ + 'help': + 'The resolution for input images, all the images in the train/validation dataset will be resized to this', + }) + + prior_loss_weight: float = field( + default=1.0, + metadata={ + 'help': 'The weight of prior preservation loss.', + }) + + freeze_model: str = field( + default='crossattn_kv', + metadata={ + 'help': + 'crossattn to enable fine-tuning of all params in the cross attention.', + }) + + instance_data_name: str = field( + default='buptwq/lora-stable-diffusion-finetune-dog', + metadata={ + 'help': 'The instance data local dir or online ID.', + }) + + concepts_list: str = field( + default=None, + metadata={ + 'help': 'Path to json containing multiple concepts.', + }) + + +training_args = StableDiffusionCustomArguments( + task='text-to-image-synthesis').parse_cli() +config, args = training_args.to_config() + +if os.path.exists(args.train_dataset_name): + # Load local dataset + train_dataset = MsDataset.load(args.train_dataset_name) + validation_dataset = MsDataset.load(args.train_dataset_name) +else: + # Load online dataset + train_dataset = MsDataset.load( + args.train_dataset_name, + split='train', + download_mode=DownloadMode.FORCE_REDOWNLOAD) + validation_dataset = MsDataset.load( + args.train_dataset_name, + split='validation', + download_mode=DownloadMode.FORCE_REDOWNLOAD) + + +def cfg_modify_fn(cfg): + if args.use_model_config: + cfg.merge_from_dict(config) + else: + cfg = config + cfg.train.lr_scheduler = { + 'type': 'LambdaLR', + 'lr_lambda': lambda _: 1, + 'last_epoch': -1 + } + return cfg + + +kwargs = dict( + model=training_args.model, + model_revision=args.model_revision, + class_prompt=args.class_prompt, + instance_prompt=args.instance_prompt, + modifier_token=args.modifier_token, + num_class_images=args.num_class_images, + train_batch_size=args.train_batch_size, + sample_batch_size=args.sample_batch_size, + initializer_token=args.initializer_token, + class_data_dir=args.class_data_dir, + concepts_list=args.concepts_list, + resolution=args.resolution, + prior_loss_weight=args.prior_loss_weight, + freeze_model=args.freeze_model, + instance_data_name=args.instance_data_name, + work_dir=training_args.work_dir, + train_dataset=train_dataset, + eval_dataset=validation_dataset, + cfg_modify_fn=cfg_modify_fn) + +# build trainer and training +trainer = build_trainer(name=Trainers.custom_diffusion, default_args=kwargs) +trainer.train() + +# pipeline after training and save result +pipe = pipeline( + task=Tasks.text_to_image_synthesis, + model=training_args.model, + custom_dir=training_args.work_dir + '/output', + modifier_token='+', + model_revision=args.model_revision) + +output = pipe({'text': args.instance_prompt}) +# visualize the result on ipynb and save it +output +cv2.imwrite('./custom_result.png', output['output_imgs'][0]) diff --git a/examples/pytorch/stable_diffusion/custom/run_train_custom.sh b/examples/pytorch/stable_diffusion/custom/run_train_custom.sh new file mode 100644 index 00000000..fab8e059 --- /dev/null +++ b/examples/pytorch/stable_diffusion/custom/run_train_custom.sh @@ -0,0 +1,17 @@ +PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/custom/finetune_stable_diffusion_custom.py \ + --model 'AI-ModelScope/stable-diffusion-v2-1' \ + --model_revision 'v1.0.9' \ + --class_prompt "dog" \ + --instance_prompt="photo of a dog" \ + --work_dir './tmp/custom_diffusion' \ + --class_data_dir './tmp/class_data' \ + --train_dataset_name 'buptwq/lora-stable-diffusion-finetune-dog' \ + --max_epochs 250 \ + --modifier_token "+" \ + --num_class_images=200 \ + --save_ckpt_strategy 'by_epoch' \ + --logging_interval 1 \ + --train.dataloader.workers_per_gpu 0 \ + --evaluation.dataloader.workers_per_gpu 0 \ + --train.optimizer.lr 1e-5 \ + --use_model_config true diff --git a/examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py b/examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py index 1f38cff7..760396d0 100644 --- a/examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py +++ b/examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py @@ -1,3 +1,4 @@ +import os from dataclasses import dataclass, field import cv2 @@ -63,14 +64,20 @@ training_args = StableDiffusionDreamboothArguments( task='text-to-image-synthesis').parse_cli() config, args = training_args.to_config() -train_dataset = MsDataset.load( - args.train_dataset_name, - split='train', - download_mode=DownloadMode.FORCE_REDOWNLOAD) -validation_dataset = MsDataset.load( - args.train_dataset_name, - split='validation', - download_mode=DownloadMode.FORCE_REDOWNLOAD) +if os.path.exists(args.train_dataset_name): + # Load local dataset + train_dataset = MsDataset.load(args.train_dataset_name) + validation_dataset = MsDataset.load(args.train_dataset_name) +else: + # Load online dataset + train_dataset = MsDataset.load( + args.train_dataset_name, + split='train', + download_mode=DownloadMode.FORCE_REDOWNLOAD) + validation_dataset = MsDataset.load( + args.train_dataset_name, + split='validation', + download_mode=DownloadMode.FORCE_REDOWNLOAD) def cfg_modify_fn(cfg): @@ -113,4 +120,6 @@ pipe = pipeline( model_revision=args.model_revision) output = pipe({'text': args.prompt}) +# visualize the result on ipynb and save it +output cv2.imwrite('./dreambooth_result.png', output['output_imgs'][0]) diff --git a/examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py b/examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py index 183e817d..6001af48 100644 --- a/examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py +++ b/examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py @@ -1,3 +1,4 @@ +import os from dataclasses import dataclass, field import cv2 @@ -18,19 +19,31 @@ class StableDiffusionLoraArguments(TrainingArgs): 'help': 'The pipeline prompt.', }) + lora_rank: int = field( + default=4, + metadata={ + 'help': 'The rank size of lora intermediate linear.', + }) + training_args = StableDiffusionLoraArguments( task='text-to-image-synthesis').parse_cli() config, args = training_args.to_config() -train_dataset = MsDataset.load( - args.train_dataset_name, - split='train', - download_mode=DownloadMode.FORCE_REDOWNLOAD) -validation_dataset = MsDataset.load( - args.train_dataset_name, - split='validation', - download_mode=DownloadMode.FORCE_REDOWNLOAD) +if os.path.exists(args.train_dataset_name): + # Load local dataset + train_dataset = MsDataset.load(args.train_dataset_name) + validation_dataset = MsDataset.load(args.train_dataset_name) +else: + # Load online dataset + train_dataset = MsDataset.load( + args.train_dataset_name, + split='train', + download_mode=DownloadMode.FORCE_REDOWNLOAD) + validation_dataset = MsDataset.load( + args.train_dataset_name, + split='validation', + download_mode=DownloadMode.FORCE_REDOWNLOAD) def cfg_modify_fn(cfg): @@ -52,6 +65,7 @@ kwargs = dict( work_dir=training_args.work_dir, train_dataset=train_dataset, eval_dataset=validation_dataset, + lora_rank=args.lora_rank, cfg_modify_fn=cfg_modify_fn) # build trainer and training @@ -66,4 +80,6 @@ pipe = pipeline( model_revision=args.model_revision) output = pipe({'text': args.prompt}) +# visualize the result on ipynb and save it +output cv2.imwrite('./lora_result.png', output['output_imgs'][0]) diff --git a/examples/pytorch/stable_diffusion/lora/run_train_lora.sh b/examples/pytorch/stable_diffusion/lora/run_train_lora.sh index 876a2475..bf62f833 100644 --- a/examples/pytorch/stable_diffusion/lora/run_train_lora.sh +++ b/examples/pytorch/stable_diffusion/lora/run_train_lora.sh @@ -5,6 +5,7 @@ PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/lora/finetune_stable_dif --work_dir './tmp/lora_diffusion' \ --train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \ --max_epochs 100 \ + --lora_rank 4 \ --save_ckpt_strategy 'by_epoch' \ --logging_interval 1 \ --train.dataloader.workers_per_gpu 0 \ diff --git a/examples/pytorch/stable_diffusion/tutorial.ipynb b/examples/pytorch/stable_diffusion/tutorial.ipynb new file mode 100644 index 00000000..941b4e76 --- /dev/null +++ b/examples/pytorch/stable_diffusion/tutorial.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modelscope微调Stable Diffusion教程\n", + "## 原理讲解\n", + "\n", + "从头训练stable diffusion需要数十万美元和一个月以上的时间,巨额的算力和时间成本让普通人难以承受。因此最理想的手段是利用开源的stable diffusion模型,通过微调手段定制化属于自己的模型。近年涌现出很多有效的微调stable diffusion手段,如[Textual Inversion](https://arxiv.org/abs/2208.01618)、[Dreambooth](https://arxiv.org/pdf/2208.12242.pdf)、[Lora](https://arxiv.org/abs/2106.09685)、[Custom Diffusion](https://arxiv.org/pdf/2302.05543.pdf)等,Modelscope目前已经支持了Dreambooth和Lora两种方法。\n", + "\n", + "### Dreambooth\n", + "如果我们直接使用几张图片微调Stable Diffusion模型,很容易陷入“过拟合”的状态,通常的表现为模型生成的结果同质化且损失了泛化能力。除此之外,还容易遇到语言漂移的问题,严重影响了模型性能。Dreambooth提出了重建损失和特定类别先验保留损失相结合的方法来解决这一问题。\n", + "\n", + "### Lora\n", + "Lora的全称是Low-Rank Adaptation,是一种低阶自适应技术。这项技术起源于微调大型语言模型,在stable diffusion上也能取得非常好的效果。因为大模型是一般是过参数化的,它们有更小的内在维度,Lora模型主要依赖于这个低的内在维度去做任务适配。通过低秩分解(先降维再升维)来模拟参数的改变量,从而以极小的参数量来实现大模型的间接训练。\n", + "\n", + "如下图所示,Lora在原先的模型层中并行插入了可训练的排序分解矩阵层,这个矩阵层是由一个降维矩阵A和一个升维矩阵B组成的。降维矩阵A采用高斯分布初始化,升维矩阵B初始化为全0,保证训练开始时旁路为0矩阵。在训练的时候原模型固定,只训练降维矩阵A和升维矩阵B;在推理的时候,将矩阵层加到原参数上。大量实验表明,对于stable diffusion我们用Lora微调Unet网络注意力层可以取得良好的效果。\n", + "\n", + "## 动手实践\n", + "\n", + "首先我们需要下载代码和安装环境。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "git clone https://github.com/modelscope/modelscope.git\n", + "cd modelscope" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "然后我们执行脚本,开始dreambooth和lora的训练和推理。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "bash examples/pytorch/stable_diffusion/dreambooth/run_train_dreambooth.sh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "bash examples/pytorch/stable_diffusion/lora/run_train_lora.sh" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/modelscope/__init__.py b/modelscope/__init__.py index f7553958..bf95cb81 100644 --- a/modelscope/__init__.py +++ b/modelscope/__init__.py @@ -26,6 +26,9 @@ if TYPE_CHECKING: from .pipelines import Pipeline, pipeline from .utils.hub import read_config, create_model_if_not_exist from .utils.logger import get_logger + from .utils.hf_util import AutoConfig, GenerationConfig + from .utils.hf_util import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM + from .utils.hf_util import AutoTokenizer from .msdatasets import MsDataset else: @@ -65,6 +68,10 @@ else: 'pipelines': ['Pipeline', 'pipeline'], 'utils.hub': ['read_config', 'create_model_if_not_exist'], 'utils.logger': ['get_logger'], + 'utils.hf_util': [ + 'AutoConfig', 'GenerationConfig', 'AutoModel', + 'AutoModelForCausalLM', 'AutoModelForSeq2SeqLM', 'AutoTokenizer' + ], 'msdatasets': ['MsDataset'] } diff --git a/modelscope/exporters/base.py b/modelscope/exporters/base.py index d105afd2..1bfed176 100644 --- a/modelscope/exporters/base.py +++ b/modelscope/exporters/base.py @@ -9,7 +9,7 @@ from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger from .builder import build_exporter -logger = get_logger(__name__) +logger = get_logger() class Exporter(ABC): diff --git a/modelscope/exporters/cv/cartoon_translation_exporter.py b/modelscope/exporters/cv/cartoon_translation_exporter.py index 79b859cb..0cfd746f 100644 --- a/modelscope/exporters/cv/cartoon_translation_exporter.py +++ b/modelscope/exporters/cv/cartoon_translation_exporter.py @@ -9,7 +9,7 @@ from modelscope.exporters.tf_model_exporter import TfModelExporter from modelscope.models.cv.cartoon import CartoonModel from modelscope.utils.logger import get_logger -logger = get_logger(__name__) +logger = get_logger() if version.parse(tf.__version__) < version.parse('2'): pass diff --git a/modelscope/exporters/nlp/csanmt_for_translation_exporter.py b/modelscope/exporters/nlp/csanmt_for_translation_exporter.py index 6b69595d..65b55b43 100644 --- a/modelscope/exporters/nlp/csanmt_for_translation_exporter.py +++ b/modelscope/exporters/nlp/csanmt_for_translation_exporter.py @@ -13,7 +13,7 @@ from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import compare_arguments_nested -logger = get_logger(__name__) +logger = get_logger() if tf.__version__ >= '2.0': tf = tf.compat.v1 diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 53745d8e..40445584 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -112,6 +112,7 @@ class Models(object): image_quality_assessment_degradation = 'image-quality-assessment-degradation' m2fp = 'm2fp' nerf_recon_acc = 'nerf-recon-acc' + nerf_recon_vq_compression = 'nerf-recon-vq-compression' bts_depth_estimation = 'bts-depth-estimation' vision_efficient_tuning = 'vision-efficient-tuning' bad_image_detecting = 'bad-image-detecting' @@ -168,6 +169,7 @@ class Models(object): doc2bot = 'doc2bot' peer = 'peer' llama = 'llama' + llama2 = 'llama2' chatglm_6b = 'chatglm6b' chatglm2_6b = 'chatglm2-6b' @@ -409,6 +411,7 @@ class Pipelines(object): image_human_parsing = 'm2fp-image-human-parsing' object_detection_3d_depe = 'object-detection-3d-depe' nerf_recon_acc = 'nerf-recon-acc' + nerf_recon_vq_compression = 'nerf-recon-vq-compression' bad_image_detecting = 'bad-image-detecting' controllable_image_generation = 'controllable-image-generation' fast_instance_segmentation = 'fast-instance-segmentation' @@ -534,6 +537,7 @@ class Pipelines(object): soonet_video_temporal_grounding = 'soonet-video-temporal-grounding' efficient_diffusion_tuning = 'efficient-diffusion-tuning' multimodal_dialogue = 'multimodal-dialogue' + llama2_text_generation_pipeline = 'llama2-text-generation-pipeline' # science tasks protein_structure = 'unifold-protein-structure' @@ -854,6 +858,9 @@ DEFAULT_MODEL_FOR_PIPELINE = { 'damo/cv_mobilenet-v2_bad-image-detecting'), Tasks.nerf_recon_acc: (Pipelines.nerf_recon_acc, 'damo/cv_nerf-3d-reconstruction-accelerate_damo'), + Tasks.nerf_recon_vq_compression: ( + Pipelines.nerf_recon_vq_compression, + 'damo/cv_nerf-3d-reconstruction-vq-compression_damo'), Tasks.siamese_uie: (Pipelines.siamese_uie, 'damo/nlp_structbert_siamese-uie_chinese-base'), Tasks.pedestrian_attribute_recognition: ( @@ -918,6 +925,7 @@ class MultiModalTrainers(object): stable_diffusion = 'stable-diffusion' lora_diffusion = 'lora-diffusion' dreambooth_diffusion = 'dreambooth-diffusion' + custom_diffusion = 'custom-diffusion' class AudioTrainers(object): @@ -998,6 +1006,7 @@ class Preprocessors(object): ocr_detection = 'ocr-detection' bad_image_detecting_preprocessor = 'bad-image-detecting-preprocessor' nerf_recon_acc_preprocessor = 'nerf-recon-acc-preprocessor' + nerf_recon_vq_compression_preprocessor = 'nerf-recon-vq-compression-preprocessor' controllable_image_generation_preprocessor = 'controllable-image-generation-preprocessor' image_classification_preprocessor = 'image-classification-preprocessor' diff --git a/modelscope/models/base/base_model.py b/modelscope/models/base/base_model.py index 0edb740e..02f50483 100644 --- a/modelscope/models/base/base_model.py +++ b/modelscope/models/base/base_model.py @@ -88,6 +88,8 @@ class Model(ABC): equal to the model saved. For example, load a `backbone` into a `text-classification` model. Other kwargs will be directly fed into the `model` key, to replace the default configs. + use_hf(bool): If set True, will use AutoModel in hf to initialize the model to keep compatibility + with huggingface transformers. Returns: A model instance. @@ -116,6 +118,11 @@ class Model(ABC): local_model_dir = snapshot_download( model_name_or_path, revision, user_agent=invoked_by) logger.info(f'initialize model from {local_model_dir}') + + if kwargs.pop('use_hf', False): + from modelscope import AutoModel + return AutoModel.from_pretrained(local_model_dir) + if cfg_dict is not None: cfg = cfg_dict else: diff --git a/modelscope/models/cv/nerf_recon_vq_compression/__init__.py b/modelscope/models/cv/nerf_recon_vq_compression/__init__.py new file mode 100644 index 00000000..40bdbffb --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .nerf_recon_vq_compression import NeRFReconVQCompression + # from .nerf_preprocess import NeRFReconPreprocessor + +else: + _import_structure = { + 'nerf_recon_vq_compression': ['NeRFReconVQCompression'] + } + # _import_structure = {'nerf_preprocess': ['NeRFReconPreprocessor']} + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/cv/nerf_recon_vq_compression/dataloader/__init__.py b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/__init__.py new file mode 100644 index 00000000..4e3a7cd6 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/__init__.py @@ -0,0 +1,12 @@ +from .blender import BlenderDataset +from .llff import LLFFDataset +from .nsvf import NSVF +from .ray_utils import get_rays, ndc_rays_blender +from .tankstemple import TanksTempleDataset + +dataset_dict = { + 'blender': BlenderDataset, + 'llff': LLFFDataset, + 'tankstemple': TanksTempleDataset, + 'nsvf': NSVF +} diff --git a/modelscope/models/cv/nerf_recon_vq_compression/dataloader/blender.py b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/blender.py new file mode 100644 index 00000000..7174514c --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/blender.py @@ -0,0 +1,173 @@ +import os + +import cv2 +import json +import numpy as np +import torch +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms as T +from tqdm import tqdm + +from .ray_utils import * + + +def trans_t(t): + return torch.Tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, t], + [0, 0, 0, 1]]).float() + + +def rot_phi(phi): + return torch.Tensor([[1, 0, 0, 0], [0, np.cos(phi), -np.sin(phi), 0], + [0, np.sin(phi), np.cos(phi), 0], [0, 0, 0, + 1]]).float() + + +def rot_theta(th): + return torch.Tensor([[np.cos(th), 0, -np.sin(th), 0], [0, 1, 0, 0], + [np.sin(th), 0, np.cos(th), 0], [0, 0, 0, + 1]]).float() + + +def pose_spherical(theta, phi, radius): + c2w = trans_t(radius) + c2w = rot_phi(phi / 180. * np.pi) @ c2w + c2w = rot_theta(theta / 180. * np.pi) @ c2w + c2w = torch.Tensor( + np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1] + ])) @ c2w + return c2w + + +class BlenderDataset(Dataset): + + def __init__(self, + datadir, + split='train', + downsample=1.0, + is_stack=False, + N_vis=-1): + + self.N_vis = N_vis + self.root_dir = datadir + self.split = split + self.is_stack = is_stack + self.img_wh = (int(800 / downsample), int(800 / downsample)) + self.define_transforms() + + self.scene_bbox = torch.tensor([[-1.5, -1.5, -1.5], [1.5, 1.5, 1.5]]) + self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], + [0, 0, -1, 0], [0, 0, 0, 1]]) + self.read_meta() + self.define_proj_mat() + + self.white_bg = True + self.near_far = [2.0, 6.0] + + self.center = torch.mean(self.scene_bbox, axis=0).float().view(1, 1, 3) + self.radius = (self.scene_bbox[1] - self.center).float().view(1, 1, 3) + self.downsample = downsample + + def read_depth(self, filename): + depth = np.array(read_pfm(filename)[0], dtype=np.float32) # (800, 800) + return depth + + def read_meta(self): + + with open( + os.path.join(self.root_dir, f'transforms_{self.split}.json'), + 'r') as f: + self.meta = json.load(f) + + w, h = self.img_wh + self.focal = 0.5 * 800 / np.tan(0.5 * self.meta['camera_angle_x']) + self.focal *= self.img_wh[0] / 800 + + # ray directions for all pixels, same for all images (same H, W, focal) + self.directions = get_ray_directions( + h, w, [self.focal, self.focal]) # (h, w, 3) + self.directions = self.directions / torch.norm( + self.directions, dim=-1, keepdim=True) + self.intrinsics = torch.tensor([[self.focal, 0, w / 2], + [0, self.focal, h / 2], [0, 0, + 1]]).float() + + self.image_paths = [] + self.poses = [] + self.all_rays = [] + self.all_rgbs = [] + self.all_masks = [] + self.all_depth = [] + self.downsample = 1.0 + + img_eval_interval = 1 if self.N_vis < 0 else len( + self.meta['frames']) // self.N_vis + idxs = list(range(0, len(self.meta['frames']), img_eval_interval)) + for i in tqdm(idxs, desc=f'Loading data {self.split} ({len(idxs)})'): + + frame = self.meta['frames'][i] + pose = np.array(frame['transform_matrix']) @ self.blender2opencv + c2w = torch.FloatTensor(pose) + self.poses += [c2w] + + image_path = os.path.join(self.root_dir, + f"{frame['file_path']}.png") + self.image_paths += [image_path] + img = Image.open(image_path) + + if self.downsample != 1.0: + img = img.resize(self.img_wh, Image.LANCZOS) + img = self.transform(img) # (4, h, w) + img = img.view(4, -1).permute(1, 0) # (h*w, 4) RGBA + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + self.all_rgbs += [img] + + rays_o, rays_d = get_rays(self.directions, c2w) # both (h*w, 3) + self.all_rays += [torch.cat([rays_o, rays_d], 1)] # (h*w, 6) + + self.poses = torch.stack(self.poses) + if not self.is_stack: + self.all_rays = torch.cat(self.all_rays, 0) + self.all_rgbs = torch.cat(self.all_rgbs, 0) + + else: + self.all_rays = torch.stack(self.all_rays, 0) + self.all_rgbs = torch.stack(self.all_rgbs, + 0).reshape(-1, *self.img_wh[::-1], 3) + + def define_transforms(self): + self.transform = T.ToTensor() + + def define_proj_mat(self): + self.proj_mat = self.intrinsics.unsqueeze(0) @ torch.inverse( + self.poses)[:, :3] + + def world2ndc(self, points, lindisp=None): + device = points.device + return (points - self.center.to(device)) / self.radius.to(device) + + def __len__(self): + return len(self.all_rgbs) + + def __getitem__(self, idx): + + if self.split == 'train': # use data in the buffers + sample = {'rays': self.all_rays[idx], 'rgbs': self.all_rgbs[idx]} + + else: # create data for each image separately + + img = self.all_rgbs[idx] + rays = self.all_rays[idx] + mask = self.all_masks[idx] # for quantity evaluation + + sample = {'rays': rays, 'rgbs': img, 'mask': mask} + return sample + + def get_render_pose(self, N_cameras=120): + render_poses = torch.stack([ + pose_spherical(angle, -30.0, 4.0) + for angle in np.linspace(-180, 180, N_cameras + 1)[:-1] + ], 0) + blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], + [0, 0, 0, 1]]) + return render_poses @ torch.Tensor(blender2opencv).float() diff --git a/modelscope/models/cv/nerf_recon_vq_compression/dataloader/llff.py b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/llff.py new file mode 100644 index 00000000..5d25e358 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/llff.py @@ -0,0 +1,278 @@ +import glob +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms as T + +from .ray_utils import * + + +def normalize(v): + """Normalize a vector.""" + return v / np.linalg.norm(v) + + +def average_poses(poses): + """ + Calculate the average pose, which is then used to center all poses + using @center_poses. Its computation is as follows: + 1. Compute the center: the average of pose centers. + 2. Compute the z axis: the normalized average z axis. + 3. Compute axis y': the average y axis. + 4. Compute x' = y' cross product z, then normalize it as the x axis. + 5. Compute the y axis: z cross product x. + + Note that at step 3, we cannot directly use y' as y axis since it's + not necessarily orthogonal to z axis. We need to pass from x to y. + Inputs: + poses: (N_images, 3, 4) + Outputs: + pose_avg: (3, 4) the average pose + """ + # 1. Compute the center + center = poses[..., 3].mean(0) # (3) + + # 2. Compute the z axis + z = normalize(poses[..., 2].mean(0)) # (3) + + # 3. Compute axis y' (no need to normalize as it's not the final output) + y_ = poses[..., 1].mean(0) # (3) + + # 4. Compute the x axis + x = normalize(np.cross(z, y_)) # (3) + + # 5. Compute the y axis (as z and x are normalized, y is already of norm 1) + y = np.cross(x, z) # (3) + + pose_avg = np.stack([x, y, z, center], 1) # (3, 4) + + return pose_avg + + +def center_poses(poses, blender2opencv): + """ + Center the poses so that we can use NDC. + See https://github.com/bmild/nerf/issues/34 + Inputs: + poses: (N_images, 3, 4) + Outputs: + poses_centered: (N_images, 3, 4) the centered poses + pose_avg: (3, 4) the average pose + """ + poses = poses @ blender2opencv + pose_avg = average_poses(poses) # (3, 4) + pose_avg_homo = np.eye(4) + pose_avg_homo[: + 3] = pose_avg # convert to homogeneous coordinate for faster computation + pose_avg_homo = pose_avg_homo + # by simply adding 0, 0, 0, 1 as the last row + last_row = np.tile(np.array([0, 0, 0, 1]), + (len(poses), 1, 1)) # (N_images, 1, 4) + poses_homo = \ + np.concatenate([poses, last_row], 1) # (N_images, 4, 4) homogeneous coordinate + + poses_centered = np.linalg.inv( + pose_avg_homo) @ poses_homo # (N_images, 4, 4) + # poses_centered = poses_centered @ blender2opencv + poses_centered = poses_centered[:, :3] # (N_images, 3, 4) + + return poses_centered, pose_avg_homo + + +def viewmatrix(z, up, pos): + vec2 = normalize(z) + vec1_avg = up + vec0 = normalize(np.cross(vec1_avg, vec2)) + vec1 = normalize(np.cross(vec2, vec0)) + m = np.eye(4) + m[:3] = np.stack([-vec0, vec1, vec2, pos], 1) + return m + + +def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, N_rots=2, N=120): + render_poses = [] + rads = np.array(list(rads) + [1.]) + + for theta in np.linspace(0., 2. * np.pi * N_rots, N + 1)[:-1]: + c = np.dot( + c2w[:3, :4], + np.array( + [np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.]) + * rads) + z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.]))) + render_poses.append(viewmatrix(z, up, c)) + return render_poses + + +def get_spiral(c2ws_all, near_fars, rads_scale=1.0, N_views=120): + # center pose + c2w = average_poses(c2ws_all) + + # Get average pose + up = normalize(c2ws_all[:, :3, 1].sum(0)) + + # Find a reasonable "focus depth" for this dataset + dt = 0.75 + close_depth, inf_depth = near_fars.min() * 0.9, near_fars.max() * 5.0 + focal = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + + # Get radii for spiral path + zdelta = near_fars.min() * .2 + tt = c2ws_all[:, :3, 3] + rads = np.percentile(np.abs(tt), 90, 0) * rads_scale + render_poses = render_path_spiral( + c2w, up, rads, focal, zdelta, zrate=.5, N=N_views) + return np.stack(render_poses) + + +class LLFFDataset(Dataset): + + def __init__(self, + datadir, + split='train', + downsample=4, + is_stack=False, + hold_every=8): + """ + spheric_poses: whether the images are taken in a spheric inward-facing manner + default: False (forward-facing) + val_num: number of val images (used for multigpu training, validate same image for all gpus) + """ + + self.root_dir = datadir + self.split = split + self.hold_every = hold_every + self.is_stack = is_stack + self.downsample = downsample + self.define_transforms() + + self.blender2opencv = np.eye(4) + # np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) + self.read_meta() + self.white_bg = False + + # self.near_far = [np.min(self.near_fars[:,0]),np.max(self.near_fars[:,1])] + self.near_far = [0.0, 1.0] + self.scene_bbox = torch.tensor([[-1.5, -1.67, -1.0], [1.5, 1.67, 1.0]]) + # self.scene_bbox = torch.tensor([[-1.67, -1.5, -1.0], [1.67, 1.5, 1.0]]) + self.center = torch.mean(self.scene_bbox, dim=0).float().view(1, 1, 3) + self.invradius = 1.0 / (self.scene_bbox[1] - self.center).float().view( + 1, 1, 3) + + def read_meta(self): + + poses_bounds = np.load( + os.path.join(self.root_dir, 'poses_bounds.npy')) # (N_images, 17) + self.image_paths = sorted( + glob.glob(os.path.join(self.root_dir, 'images_4/*'))) + # load full resolution image then resize + if self.split in ['train', 'test']: + assert len(poses_bounds) == len(self.image_paths), \ + 'Mismatch between number of images and number of poses! Please rerun COLMAP!' + + poses = poses_bounds[:, :15].reshape(-1, 3, 5) # (N_images, 3, 5) + self.near_fars = poses_bounds[:, -2:] # (N_images, 2) + # hwf = poses[:, :, -1] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[ + 0, :, -1] # original intrinsics, same for all images + self.img_wh = np.array( + [int(W / self.downsample), + int(H / self.downsample)]) + self.focal = [ + self.focal * self.img_wh[0] / W, self.focal * self.img_wh[1] / H + ] + + # Step 2: correct poses + # Original poses has rotation in form "down right back", change to "right up back" + # See https://github.com/bmild/nerf/issues/34 + poses = np.concatenate( + [poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1) + # (N_images, 3, 4) exclude H, W, focal + self.poses, self.pose_avg = center_poses(poses, self.blender2opencv) + + # Step 3: correct scale so that the nearest depth is at a little more than 1.0 + # See https://github.com/bmild/nerf/issues/34 + near_original = self.near_fars.min() + scale_factor = near_original * 0.75 # 0.75 is the default parameter + # the nearest depth is at 1/0.75=1.33 + self.near_fars /= scale_factor + self.poses[..., 3] /= scale_factor + + # build rendering path + N_views = 120 + # N_rots = 2 + # tt = self.poses[:, :3, 3] # ptstocam(poses[:3,3,:].T, c2w).T + # up = normalize(self.poses[:, :3, 1].sum(0)) + # rads = np.percentile(np.abs(tt), 90, 0) + + self.render_path = get_spiral( + self.poses, self.near_fars, N_views=N_views) + + # distances_from_center = np.linalg.norm(self.poses[..., 3], axis=1) + # val_idx = np.argmin(distances_from_center) # choose val image as the closest to + # center image + + # ray directions for all pixels, same for all images (same H, W, focal) + W, H = self.img_wh + self.directions = get_ray_directions_blender(H, W, + self.focal) # (H, W, 3) + + # average_pose = average_poses(self.poses) + # dists = np.sum( + # np.square(average_pose[:3, 3] - self.poses[:, :3, 3]), -1) + i_test = np.arange(0, self.poses.shape[0], + self.hold_every) # [np.argmin(dists)] + img_list = i_test if self.split != 'train' else list( + set(np.arange(len(self.poses))) - set(i_test)) + + # use first N_images-1 to train, the LAST is val + self.all_rays = [] + self.all_rgbs = [] + for i in img_list: + image_path = self.image_paths[i] + c2w = torch.FloatTensor(self.poses[i]) + + img = Image.open(image_path).convert('RGB') + if self.downsample != 1.0: + img = img.resize(self.img_wh, Image.LANCZOS) + img = self.transform(img) # (3, h, w) + + img = img.view(3, -1).permute(1, 0) # (h*w, 3) RGB + self.all_rgbs += [img] + rays_o, rays_d = get_rays(self.directions, c2w) # both (h*w, 3) + rays_o, rays_d = ndc_rays_blender(H, W, self.focal[0], 1.0, rays_o, + rays_d) + # viewdir = rays_d / torch.norm(rays_d, dim=-1, keepdim=True) + + self.all_rays += [torch.cat([rays_o, rays_d], 1)] # (h*w, 6) + + if not self.is_stack: + self.all_rays = torch.cat(self.all_rays, + 0) # (len(self.meta['frames])*h*w, 3) + self.all_rgbs = torch.cat(self.all_rgbs, + 0) # (len(self.meta['frames])*h*w,3) + else: + self.all_rays = torch.stack(self.all_rays, + 0) # (len(self.meta['frames]),h,w, 3) + self.all_rgbs = torch.stack(self.all_rgbs, 0).reshape( + -1, *self.img_wh[::-1], 3) # (len(self.meta['frames]),h,w,3) + + def define_transforms(self): + self.transform = T.ToTensor() + + def __len__(self): + return len(self.all_rgbs) + + def __getitem__(self, idx): + + sample = {'rays': self.all_rays[idx], 'rgbs': self.all_rgbs[idx]} + + return sample + + def get_render_pose(self, N_cameras=120): + return get_spiral(self.poses, self.near_fars, N_views=N_cameras) diff --git a/modelscope/models/cv/nerf_recon_vq_compression/dataloader/nsvf.py b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/nsvf.py new file mode 100644 index 00000000..e3cdbafa --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/nsvf.py @@ -0,0 +1,182 @@ +import os + +import torch +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms as T +from tqdm import tqdm + +from .ray_utils import * + + +def trans_t(t): + return torch.Tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, t], + [0, 0, 0, 1]]).float() + + +def rot_phi(phi): + return torch.Tensor([[1, 0, 0, 0], [0, np.cos(phi), -np.sin(phi), 0], + [0, np.sin(phi), np.cos(phi), 0], [0, 0, 0, + 1]]).float() + + +def rot_theta(th): + return torch.Tensor([[np.cos(th), 0, -np.sin(th), 0], [0, 1, 0, 0], + [np.sin(th), 0, np.cos(th), 0], [0, 0, 0, + 1]]).float() + + +def pose_spherical(theta, phi, radius): + c2w = trans_t(radius) + c2w = rot_phi(phi / 180. * np.pi) @ c2w + c2w = rot_theta(theta / 180. * np.pi) @ c2w + c2w = torch.Tensor( + np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1] + ])) @ c2w + return c2w + + +class NSVF(Dataset): + """NSVF Generic Dataset.""" + + def __init__(self, + datadir, + split='train', + downsample=1.0, + wh=[800, 800], + is_stack=False): + self.root_dir = datadir + self.split = split + self.is_stack = is_stack + self.downsample = downsample + self.img_wh = (int(wh[0] / downsample), int(wh[1] / downsample)) + self.define_transforms() + + self.white_bg = True + self.near_far = [0.5, 6.0] + self.scene_bbox = torch.from_numpy( + np.loadtxt(f'{self.root_dir}/bbox.txt')).float()[:6].view(2, 3) + self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], + [0, 0, -1, 0], [0, 0, 0, 1]]) + self.read_meta() + self.define_proj_mat() + + self.center = torch.mean(self.scene_bbox, axis=0).float().view(1, 1, 3) + self.radius = (self.scene_bbox[1] - self.center).float().view(1, 1, 3) + + def bbox2corners(self): + corners = self.scene_bbox.unsqueeze(0).repeat(4, 1, 1) + for i in range(3): + corners[i, [0, 1], i] = corners[i, [1, 0], i] + return corners.view(-1, 3) + + def read_meta(self): + with open(os.path.join(self.root_dir, 'intrinsics.txt')) as f: + focal = float(f.readline().split()[0]) + self.intrinsics = np.array([[focal, 0, 400.0], [0, focal, 400.0], + [0, 0, 1]]) + self.intrinsics[:2] *= (np.array(self.img_wh) + / np.array([800, 800])).reshape(2, 1) + + pose_files = sorted(os.listdir(os.path.join(self.root_dir, 'pose'))) + img_files = sorted(os.listdir(os.path.join(self.root_dir, 'rgb'))) + + if self.split == 'train': + pose_files = [x for x in pose_files if x.startswith('0_')] + img_files = [x for x in img_files if x.startswith('0_')] + elif self.split == 'val': + pose_files = [x for x in pose_files if x.startswith('1_')] + img_files = [x for x in img_files if x.startswith('1_')] + elif self.split == 'test': + test_pose_files = [x for x in pose_files if x.startswith('2_')] + test_img_files = [x for x in img_files if x.startswith('2_')] + if len(test_pose_files) == 0: + test_pose_files = [x for x in pose_files if x.startswith('1_')] + test_img_files = [x for x in img_files if x.startswith('1_')] + pose_files = test_pose_files + img_files = test_img_files + + # ray directions for all pixels, same for all images (same H, W, focal) + self.directions = get_ray_directions( + self.img_wh[1], + self.img_wh[0], [self.intrinsics[0, 0], self.intrinsics[1, 1]], + center=self.intrinsics[:2, 2]) # (h, w, 3) + self.directions = self.directions / torch.norm( + self.directions, dim=-1, keepdim=True) + + self.render_path = torch.stack([ + pose_spherical(angle, -30.0, 4.0) + for angle in np.linspace(-180, 180, 40 + 1)[:-1] + ], 0) + + self.poses = [] + self.all_rays = [] + self.all_rgbs = [] + + assert len(img_files) == len(pose_files) + for img_fname, pose_fname in tqdm( + zip(img_files, pose_files), + desc=f'Loading data {self.split} ({len(img_files)})'): + image_path = os.path.join(self.root_dir, 'rgb', img_fname) + img = Image.open(image_path) + if self.downsample != 1.0: + img = img.resize(self.img_wh, Image.LANCZOS) + img = self.transform(img) # (4, h, w) + img = img.view(img.shape[0], -1).permute(1, 0) # (h*w, 4) RGBA + if img.shape[-1] == 4: + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:] + ) # blend A to RGB + self.all_rgbs += [img] + + c2w = np.loadtxt(os.path.join(self.root_dir, 'pose', pose_fname)) + c2w = torch.FloatTensor(c2w) + self.poses.append(c2w) # C2W + rays_o, rays_d = get_rays(self.directions, c2w) # both (h*w, 3) + self.all_rays += [torch.cat([rays_o, rays_d], 1)] # (h*w, 8) + + self.poses = torch.stack(self.poses) + if 'train' == self.split: + if self.is_stack: + self.all_rays = torch.stack(self.all_rays, + 0).reshape(-1, *self.img_wh[::-1], + 6) + self.all_rgbs = torch.stack(self.all_rgbs, + 0).reshape(-1, *self.img_wh[::-1], + 3) + else: + self.all_rays = torch.cat(self.all_rays, 0) + self.all_rgbs = torch.cat(self.all_rgbs, 0) + else: + self.all_rays = torch.stack(self.all_rays, 0) + self.all_rgbs = torch.stack(self.all_rgbs, + 0).reshape(-1, *self.img_wh[::-1], 3) + + def define_transforms(self): + self.transform = T.ToTensor() + + def define_proj_mat(self): + self.proj_mat = torch.from_numpy( + self.intrinsics[:3, :3]).unsqueeze(0).float() @ torch.inverse( + self.poses)[:, :3] + + def world2ndc(self, points): + device = points.device + return (points - self.center.to(device)) / self.radius.to(device) + + def __len__(self): + if self.split == 'train': + return len(self.all_rays) + return len(self.all_rgbs) + + def __getitem__(self, idx): + + if self.split == 'train': # use data in the buffers + sample = {'rays': self.all_rays[idx], 'rgbs': self.all_rgbs[idx]} + + else: # create data for each image separately + + img = self.all_rgbs[idx] + rays = self.all_rays[idx] + + sample = {'rays': rays, 'rgbs': img} + return sample diff --git a/modelscope/models/cv/nerf_recon_vq_compression/dataloader/ray_utils.py b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/ray_utils.py new file mode 100644 index 00000000..39869fbc --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/ray_utils.py @@ -0,0 +1,295 @@ +import re + +import numpy as np +import torch +from kornia import create_meshgrid +from torch import searchsorted + +# from utils import index_point_feature + + +def depth2dist(z_vals, cos_angle): + # z_vals: [N_ray N_sample] + device = z_vals.device + dists = z_vals[..., 1:] - z_vals[..., :-1] + dists = torch.cat( + [dists, + torch.Tensor([1e10]).to(device).expand(dists[..., :1].shape)], + -1) # [N_rays, N_samples] + dists = dists * cos_angle.unsqueeze(-1) + return dists + + +def ndc2dist(ndc_pts, cos_angle): + dists = torch.norm(ndc_pts[:, 1:] - ndc_pts[:, :-1], dim=-1) + dists = torch.cat([dists, 1e10 * cos_angle.unsqueeze(-1)], + -1) # [N_rays, N_samples] + return dists + + +def get_ray_directions(H, W, focal, center=None): + """ + Get ray directions for all pixels in camera coordinate. + Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ + ray-tracing-generating-camera-rays/standard-coordinate-systems + Inputs: + H, W, focal: image height, width and focal length + Outputs: + directions: (H, W, 3), the direction of the rays in camera coordinate + """ + grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 + + i, j = grid.unbind(-1) + # the direction here is without +0.5 pixel centering as calibration is not so accurate + # see https://github.com/bmild/nerf/issues/24 + cent = center if center is not None else [W / 2, H / 2] + directions = torch.stack([(i - cent[0]) / focal[0], + (j - cent[1]) / focal[1], + torch.ones_like(i)], -1) # (H, W, 3) + + return directions + + +def get_ray_directions_blender(H, W, focal, center=None): + """ + Get ray directions for all pixels in camera coordinate. + Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ + ray-tracing-generating-camera-rays/standard-coordinate-systems + Inputs: + H, W, focal: image height, width and focal length + Outputs: + directions: (H, W, 3), the direction of the rays in camera coordinate + """ + grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 + i, j = grid.unbind(-1) + # the direction here is without +0.5 pixel centering as calibration is not so accurate + # see https://github.com/bmild/nerf/issues/24 + cent = center if center is not None else [W / 2, H / 2] + directions = torch.stack([(i - cent[0]) / focal[0], + -(j - cent[1]) / focal[1], -torch.ones_like(i)], + -1) # (H, W, 3) + + return directions + + +def get_rays(directions, c2w): + """ + Get ray origin and normalized directions in world coordinate for all pixels in one image. + Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ + ray-tracing-generating-camera-rays/standard-coordinate-systems + Inputs: + directions: (H, W, 3) precomputed ray directions in camera coordinate + c2w: (3, 4) transformation matrix from camera coordinate to world coordinate + Outputs: + rays_o: (H*W, 3), the origin of the rays in world coordinate + rays_d: (H*W, 3), the normalized direction of the rays in world coordinate + """ + # Rotate ray directions from camera coordinate to the world coordinate + rays_d = directions @ c2w[:3, :3].T # (H, W, 3) + # rays_d = rays_d / torch.norm(rays_d, dim=-1, keepdim=True) + # The origin of all rays is the camera origin in world coordinate + rays_o = c2w[:3, 3].expand(rays_d.shape) # (H, W, 3) + + rays_d = rays_d.view(-1, 3) + rays_o = rays_o.view(-1, 3) + + return rays_o, rays_d + + +def ndc_rays_blender(H, W, focal, near, rays_o, rays_d): + # Shift ray origins to near plane + t = -(near + rays_o[..., 2]) / rays_d[..., 2] + rays_o = rays_o + t[..., None] * rays_d + + # Projection + o0 = -1. / (W / (2. * focal)) * rays_o[..., 0] / rays_o[..., 2] + o1 = -1. / (H / (2. * focal)) * rays_o[..., 1] / rays_o[..., 2] + o2 = 1. + 2. * near / rays_o[..., 2] + + d0 = -1. / (W / (2. * focal)) * ( + rays_d[..., 0] / rays_d[..., 2] - rays_o[..., 0] / rays_o[..., 2]) + d1 = -1. / (H / (2. * focal)) * ( + rays_d[..., 1] / rays_d[..., 2] - rays_o[..., 1] / rays_o[..., 2]) + d2 = -2. * near / rays_o[..., 2] + + rays_o = torch.stack([o0, o1, o2], -1) + rays_d = torch.stack([d0, d1, d2], -1) + + return rays_o, rays_d + + +def ndc_rays(H, W, focal, near, rays_o, rays_d): + # Shift ray origins to near plane + t = (near - rays_o[..., 2]) / rays_d[..., 2] + rays_o = rays_o + t[..., None] * rays_d + + # Projection + o0 = 1. / (W / (2. * focal)) * rays_o[..., 0] / rays_o[..., 2] + o1 = 1. / (H / (2. * focal)) * rays_o[..., 1] / rays_o[..., 2] + o2 = 1. - 2. * near / rays_o[..., 2] + + d0 = 1. / (W / (2. * focal)) * ( + rays_d[..., 0] / rays_d[..., 2] - rays_o[..., 0] / rays_o[..., 2]) + d1 = 1. / (H / (2. * focal)) * ( + rays_d[..., 1] / rays_d[..., 2] - rays_o[..., 1] / rays_o[..., 2]) + d2 = 2. * near / rays_o[..., 2] + + rays_o = torch.stack([o0, o1, o2], -1) + rays_d = torch.stack([d0, d1, d2], -1) + + return rays_o, rays_d + + +# Hierarchical sampling (section 5.2) +def sample_pdf(bins, weights, N_samples, det=False, pytest=False): + device = weights.device + # Get pdf + weights = weights + 1e-5 # prevent nans + pdf = weights / torch.sum(weights, -1, keepdim=True) + cdf = torch.cumsum(pdf, -1) + cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], + -1) # (batch, len(bins)) + + # Take uniform samples + if det: + u = torch.linspace(0., 1., steps=N_samples, device=device) + u = u.expand(list(cdf.shape[:-1]) + [N_samples]) + else: + u = torch.rand(list(cdf.shape[:-1]) + [N_samples], device=device) + + # Pytest, overwrite u with numpy's fixed random numbers + if pytest: + np.random.seed(0) + new_shape = list(cdf.shape[:-1]) + [N_samples] + if det: + u = np.linspace(0., 1., N_samples) + u = np.broadcast_to(u, new_shape) + else: + u = np.random.rand(*new_shape) + u = torch.Tensor(u) + + # Invert CDF + u = u.contiguous() + inds = searchsorted(cdf.detach(), u, right=True) + below = torch.max(torch.zeros_like(inds - 1), inds - 1) + above = torch.min((cdf.shape[-1] - 1) * torch.ones_like(inds), inds) + inds_g = torch.stack([below, above], -1) # (batch, N_samples, 2) + + matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]] + cdf_g = torch.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g) + bins_g = torch.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g) + + denom = (cdf_g[..., 1] - cdf_g[..., 0]) + denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom) + t = (u - cdf_g[..., 0]) / denom + samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) + + return samples + + +def dda(rays_o, rays_d, bbox_3D): + inv_ray_d = 1.0 / (rays_d + 1e-6) + t_min = (bbox_3D[:1] - rays_o) * inv_ray_d # N_rays 3 + t_max = (bbox_3D[1:] - rays_o) * inv_ray_d + t = torch.stack((t_min, t_max)) # 2 N_rays 3 + t_min = torch.max(torch.min(t, dim=0)[0], dim=-1, keepdim=True)[0] + t_max = torch.min(torch.max(t, dim=0)[0], dim=-1, keepdim=True)[0] + return t_min, t_max + + +def ray_marcher(rays, N_samples=64, lindisp=False, perturb=0, bbox_3D=None): + """ + sample points along the rays + Inputs: + rays: () + + Returns: + + """ + + # Decompose the inputs + N_rays = rays.shape[0] + rays_o, rays_d = rays[:, 0:3], rays[:, 3:6] # both (N_rays, 3) + near, far = rays[:, 6:7], rays[:, 7:8] # both (N_rays, 1) + + if bbox_3D is not None: + # cal aabb boundles + near, far = dda(rays_o, rays_d, bbox_3D) + + # Sample depth points + z_steps = torch.linspace( + 0, 1, N_samples, device=rays.device) # (N_samples) + if not lindisp: # use linear sampling in depth space + z_vals = near * (1 - z_steps) + far * z_steps + else: # use linear sampling in disparity space + z_vals = 1 / (1 / near * (1 - z_steps) + 1 / far * z_steps) + + z_vals = z_vals.expand(N_rays, N_samples) + + if perturb > 0: # perturb sampling depths (z_vals) + z_vals_mid = 0.5 * (z_vals[:, :-1] + z_vals[:, 1:] + ) # (N_rays, N_samples-1) interval mid points + # get intervals between samples + upper = torch.cat([z_vals_mid, z_vals[:, -1:]], -1) + lower = torch.cat([z_vals[:, :1], z_vals_mid], -1) + + perturb_rand = perturb * torch.rand(z_vals.shape, device=rays.device) + z_vals = lower + (upper - lower) * perturb_rand + + # (N_rays, N_samples, 3) + xyz_coarse_sampled = rays_o.unsqueeze( + 1) + rays_d.unsqueeze(1) * z_vals.unsqueeze(2) + + return xyz_coarse_sampled, rays_o, rays_d, z_vals + + +def read_pfm(filename): + file = open(filename, 'rb') + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().decode('utf-8').rstrip() + if header == 'PF': + color = True + elif header == 'Pf': + color = False + else: + raise Exception('Not a PFM file.') + + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode('utf-8')) + if dim_match: + width, height = map(int, dim_match.groups()) + else: + raise Exception('Malformed PFM header.') + + scale = float(file.readline().rstrip()) + if scale < 0: # little-endian + endian = '<' + scale = -scale + else: + endian = '>' # big-endian + + data = np.fromfile(file, endian + 'f') + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + file.close() + return data, scale + + +def ndc_bbox(all_rays): + near_min = torch.min(all_rays[..., :3].view(-1, 3), dim=0)[0] + near_max = torch.max(all_rays[..., :3].view(-1, 3), dim=0)[0] + far_min = torch.min( + (all_rays[..., :3] + all_rays[..., 3:6]).view(-1, 3), dim=0)[0] + far_max = torch.max( + (all_rays[..., :3] + all_rays[..., 3:6]).view(-1, 3), dim=0)[0] + print( + f'===> ndc bbox near_min:{near_min} near_max:{near_max} far_min:{far_min} far_max:{far_max}' + ) + return torch.stack( + (torch.minimum(near_min, far_min), torch.maximum(near_max, far_max))) diff --git a/modelscope/models/cv/nerf_recon_vq_compression/dataloader/tankstemple.py b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/tankstemple.py new file mode 100644 index 00000000..ba6aa717 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/dataloader/tankstemple.py @@ -0,0 +1,249 @@ +import os + +import torch +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms as T +from tqdm import tqdm + +from .ray_utils import * + + +def circle(radius=3.5, h=0.0, axis='z', t0=0, r=1): + if axis == 'z': + return lambda t: [ + radius * np.cos(r * t + t0), radius * np.sin(r * t + t0), h + ] + elif axis == 'y': + return lambda t: [ + radius * np.cos(r * t + t0), h, radius * np.sin(r * t + t0) + ] + else: + return lambda t: [ + h, radius * np.cos(r * t + t0), radius * np.sin(r * t + t0) + ] + + +def cross(x, y, axis=0): + T = torch if isinstance(x, torch.Tensor) else np + return T.cross(x, y, axis) + + +def normalize(x, axis=-1, order=2): + if isinstance(x, torch.Tensor): + l2 = x.norm(p=order, dim=axis, keepdim=True) + return x / (l2 + 1e-8), l2 + + else: + l2 = np.linalg.norm(x, order, axis) + l2 = np.expand_dims(l2, axis) + l2[l2 == 0] = 1 + return x / l2, + + +def cat(x, axis=1): + if isinstance(x[0], torch.Tensor): + return torch.cat(x, dim=axis) + return np.concatenate(x, axis=axis) + + +def look_at_rotation(camera_position, + at=None, + up=None, + inverse=False, + cv=False): + """ + This function takes a vector 'camera_position' which specifies the location + of the camera in world coordinates and two vectors `at` and `up` which + indicate the position of the object and the up directions of the world + coordinate system respectively. The object is assumed to be centered at + the origin. + The output is a rotation matrix representing the transformation + from world coordinates -> view coordinates. + Input: + camera_position: 3 + at: 1 x 3 or N x 3 (0, 0, 0) in default + up: 1 x 3 or N x 3 (0, 1, 0) in default + """ + + if at is None: + at = torch.zeros_like(camera_position) + else: + at = torch.tensor(at).type_as(camera_position) + if up is None: + up = torch.zeros_like(camera_position) + up[2] = -1 + else: + up = torch.tensor(up).type_as(camera_position) + + z_axis = normalize(at - camera_position)[0] + x_axis = normalize(cross(up, z_axis))[0] + y_axis = normalize(cross(z_axis, x_axis))[0] + + R = cat([x_axis[:, None], y_axis[:, None], z_axis[:, None]], axis=1) + return R + + +def gen_path(pos_gen, at=(0, 0, 0), up=(0, -1, 0), frames=180): + c2ws = [] + for t in range(frames): + c2w = torch.eye(4) + cam_pos = torch.tensor(pos_gen(t * (360.0 / frames) / 180 * np.pi)) + cam_rot = look_at_rotation( + cam_pos, at=at, up=up, inverse=False, cv=True) + c2w[:3, 3], c2w[:3, :3] = cam_pos, cam_rot + c2ws.append(c2w) + return torch.stack(c2ws) + + +class TanksTempleDataset(Dataset): + """NSVF Generic Dataset.""" + + def __init__(self, + datadir, + split='train', + downsample=1.0, + wh=[1920, 1080], + is_stack=False): + self.root_dir = datadir + self.split = split + self.is_stack = is_stack + self.downsample = downsample + self.img_wh = (int(wh[0] / downsample), int(wh[1] / downsample)) + self.define_transforms() + + self.white_bg = True + self.near_far = [0.01, 6.0] + self.scene_bbox = torch.from_numpy( + np.loadtxt(f'{self.root_dir}/bbox.txt')).float()[:6].view(2, + 3) * 1.2 + + self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], + [0, 0, -1, 0], [0, 0, 0, 1]]) + self.read_meta() + self.define_proj_mat() + + self.center = torch.mean(self.scene_bbox, axis=0).float().view(1, 1, 3) + self.radius = (self.scene_bbox[1] - self.center).float().view(1, 1, 3) + + def bbox2corners(self): + corners = self.scene_bbox.unsqueeze(0).repeat(4, 1, 1) + for i in range(3): + corners[i, [0, 1], i] = corners[i, [1, 0], i] + return corners.view(-1, 3) + + def read_meta(self): + + self.intrinsics = np.loadtxt( + os.path.join(self.root_dir, 'intrinsics.txt')) + self.intrinsics[:2] *= (np.array(self.img_wh) + / np.array([1920, 1080])).reshape(2, 1) + pose_files = sorted(os.listdir(os.path.join(self.root_dir, 'pose'))) + img_files = sorted(os.listdir(os.path.join(self.root_dir, 'rgb'))) + + if self.split == 'train': + pose_files = [x for x in pose_files if x.startswith('0_')] + img_files = [x for x in img_files if x.startswith('0_')] + elif self.split == 'val': + pose_files = [x for x in pose_files if x.startswith('1_')] + img_files = [x for x in img_files if x.startswith('1_')] + elif self.split == 'test': + test_pose_files = [x for x in pose_files if x.startswith('2_')] + test_img_files = [x for x in img_files if x.startswith('2_')] + if len(test_pose_files) == 0: + test_pose_files = [x for x in pose_files if x.startswith('1_')] + test_img_files = [x for x in img_files if x.startswith('1_')] + pose_files = test_pose_files + img_files = test_img_files + + # ray directions for all pixels, same for all images (same H, W, focal) + self.directions = get_ray_directions( + self.img_wh[1], + self.img_wh[0], [self.intrinsics[0, 0], self.intrinsics[1, 1]], + center=self.intrinsics[:2, 2]) # (h, w, 3) + self.directions = self.directions / torch.norm( + self.directions, dim=-1, keepdim=True) + + self.poses = [] + self.all_rays = [] + self.all_rgbs = [] + + assert len(img_files) == len(pose_files) + for img_fname, pose_fname in tqdm( + zip(img_files, pose_files), + desc=f'Loading data {self.split} ({len(img_files)})'): + image_path = os.path.join(self.root_dir, 'rgb', img_fname) + img = Image.open(image_path) + if self.downsample != 1.0: + img = img.resize(self.img_wh, Image.LANCZOS) + img = self.transform(img) # (4, h, w) + img = img.view(img.shape[0], -1).permute(1, 0) # (h*w, 4) RGBA + if img.shape[-1] == 4: + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:] + ) # blend A to RGB + self.all_rgbs.append(img) + + c2w = np.loadtxt(os.path.join(self.root_dir, 'pose', + pose_fname)) # @ cam_trans + c2w = torch.FloatTensor(c2w) + self.poses.append(c2w) # C2W + rays_o, rays_d = get_rays(self.directions, c2w) # both (h*w, 3) + self.all_rays += [torch.cat([rays_o, rays_d], 1)] # (h*w, 8) + + self.poses = torch.stack(self.poses) + + center = torch.mean(self.scene_bbox, dim=0) + radius = torch.norm(self.scene_bbox[1] - center) * 1.2 + up = torch.mean(self.poses[:, :3, 1], dim=0).tolist() + pos_gen = circle(radius=radius, h=-0.2 * up[1], axis='y') + self.render_path = gen_path(pos_gen, up=up, frames=200) + self.render_path[:, :3, 3] += center + + if 'train' == self.split: + if self.is_stack: + self.all_rays = torch.stack(self.all_rays, 0).reshape( + -1, *self.img_wh[::-1], + 6) # (len(self.meta['frames])*h*w, 3) + self.all_rgbs = torch.stack(self.all_rgbs, 0).reshape( + -1, *self.img_wh[::-1], + 3) # (len(self.meta['frames])*h*w, 3) + else: + self.all_rays = torch.cat( + self.all_rays, 0) # (len(self.meta['frames])*h*w, 3) + self.all_rgbs = torch.cat( + self.all_rgbs, 0) # (len(self.meta['frames])*h*w, 3) + else: + self.all_rays = torch.stack(self.all_rays, + 0) # (len(self.meta['frames]),h*w, 3) + self.all_rgbs = torch.stack(self.all_rgbs, 0).reshape( + -1, *self.img_wh[::-1], 3) # (len(self.meta['frames]),h,w,3) + + def define_transforms(self): + self.transform = T.ToTensor() + + def define_proj_mat(self): + self.proj_mat = torch.from_numpy( + self.intrinsics[:3, :3]).unsqueeze(0).float() @ torch.inverse( + self.poses)[:, :3] + + def world2ndc(self, points): + device = points.device + return (points - self.center.to(device)) / self.radius.to(device) + + def __len__(self): + if self.split == 'train': + return len(self.all_rays) + return len(self.all_rgbs) + + def __getitem__(self, idx): + + if self.split == 'train': # use data in the buffers + sample = {'rays': self.all_rays[idx], 'rgbs': self.all_rgbs[idx]} + + else: # create data for each image separately + + img = self.all_rgbs[idx] + rays = self.all_rays[idx] + + sample = {'rays': rays, 'rgbs': img} + return sample diff --git a/modelscope/models/cv/nerf_recon_vq_compression/nerf_recon_vq_compression.py b/modelscope/models/cv/nerf_recon_vq_compression/nerf_recon_vq_compression.py new file mode 100644 index 00000000..041a7af8 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/nerf_recon_vq_compression.py @@ -0,0 +1,116 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import glob +import os +import time +from functools import partial + +import cv2 +import numpy as np +import torch +import tqdm + +from modelscope.metainfo import Models +from modelscope.models.base import Tensor, TorchModel +from modelscope.models.builder import MODELS +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger +from .dataloader import dataset_dict +from .network.tensoRF import TensorVM, raw2alpha +from .network.tensoRF_VQ import TensorVMSplitVQ +from .renderer import OctreeRender_trilinear_fast +from .renderer import evaluation as evaluation_render +from .renderer import render_path + +logger = get_logger() + +__all__ = ['NeRFReconVQCompression'] + + +@MODELS.register_module( + Tasks.nerf_recon_vq_compression, + module_name=Models.nerf_recon_vq_compression) +class NeRFReconVQCompression(TorchModel): + + def __init__(self, model_dir=None, **kwargs): + super().__init__(model_dir, **kwargs) + + if not torch.cuda.is_available(): + raise Exception('GPU is required') + + self.device = torch.device('cuda') + self.data_type = kwargs['dataset_name'] + self.data_dir = kwargs['data_dir'] + self.downsample = kwargs['downsample'] + self.ndc_ray = kwargs['ndc_ray'] + self.ckpt_path = os.path.join(model_dir, kwargs['ckpt_path']) + + if self.ckpt_path == '' or self.ckpt_path is None: + self.ckpt_path = os.path.join(model_dir, 'ficus_demo.pt') + if not os.path.exists(self.ckpt_path): + raise Exception('ckpt path not found') + + # load model + ckpt = torch.load(self.ckpt_path, map_location=self.device) + model_kwargs = ckpt['kwargs'] + model_kwargs['device'] = self.device + self.model = TensorVMSplitVQ(**model_kwargs) + self.model.extreme_load(ckpt) + + self.renderer = OctreeRender_trilinear_fast + + # load data + dataset = dataset_dict[self.data_type] + self.test_dataset = dataset( + self.data_dir, + split='test', + downsample=self.downsample, + is_stack=True) + + def evaluation(self, render_dir, N_vis=-1): + white_bg = self.test_dataset.white_bg + ndc_ray = self.ndc_ray + evaluation_test = partial( + evaluation_render, + test_dataset=self.test_dataset, + renderer=self.renderer, + white_bg=white_bg, + ndc_ray=ndc_ray, + device=self.device, + compute_extra_metrics=True, + im_save=True) + + logfolder = render_dir + os.makedirs(f'{logfolder}/evalution_test', exist_ok=True) + PSNRs = evaluation_test( + tensorf=self.model, + N_vis=N_vis, + savePath=f'{logfolder}/evalution_test') + logger.info( + f'VQRF-Evaluation: {self.ckpt_path} mean PSNR: {np.mean(PSNRs)}') + + def render_path(self, render_dir, N_vis=120): + white_bg = self.test_dataset.white_bg + ndc_ray = self.ndc_ray + + logfolder = render_dir + os.makedirs(f'{logfolder}/render_path', exist_ok=True) + + render_poses = self.get_render_pose(N_cameras=N_vis) + render_path( + self.test_dataset, + self.model, + render_poses, + self.renderer, + savePath=f'{logfolder}/render_path', + white_bg=white_bg, + ndc_ray=ndc_ray, + device=self.device) + logger.info( + f'VQRF-Render: {self.ckpt_path} render path video result saved in {logfolder}/render_path' + ) + + def get_render_pose(self, N_cameras=120): + if self.data_type == 'blender': + return self.test_dataset.get_render_pose(N_cameras=N_cameras) + elif self.data_type == 'llff': + return self.test_dataset.get_render_pose(N_cameras=N_cameras) diff --git a/modelscope/models/cv/nerf_recon_vq_compression/network/__init__.py b/modelscope/models/cv/nerf_recon_vq_compression/network/__init__.py new file mode 100644 index 00000000..4bd3630b --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/network/__init__.py @@ -0,0 +1,2 @@ +from .tensorBase import * +from .tensoRF import TensorVMSplit diff --git a/modelscope/models/cv/nerf_recon_vq_compression/network/tensoRF.py b/modelscope/models/cv/nerf_recon_vq_compression/network/tensoRF.py new file mode 100644 index 00000000..7ec0d867 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/network/tensoRF.py @@ -0,0 +1,579 @@ +from .tensorBase import * + + +class TensorVM(TensorBase): + + def __init__(self, aabb, gridSize, device, **kargs): + super(TensorVM, self).__init__(aabb, gridSize, device, **kargs) + + def init_svd_volume(self, res, device): + self.plane_coef = torch.nn.Parameter(0.1 * torch.randn( + (3, self.app_n_comp + self.density_n_comp, res, res), + device=device)) + self.line_coef = torch.nn.Parameter(0.1 * torch.randn( + (3, self.app_n_comp + self.density_n_comp, res, 1), device=device)) + self.basis_mat = torch.nn.Linear( + self.app_n_comp * 3, self.app_dim, bias=False, device=device) + + def get_optparam_groups(self, + lr_init_spatialxyz=0.02, + lr_init_network=0.001): + grad_vars = [{ + 'params': self.line_coef, + 'lr': lr_init_spatialxyz + }, { + 'params': self.plane_coef, + 'lr': lr_init_spatialxyz + }, { + 'params': self.basis_mat.parameters(), + 'lr': lr_init_network + }] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{ + 'params': self.renderModule.parameters(), + 'lr': lr_init_network + }] + return grad_vars + + def compute_features(self, xyz_sampled): + + coordinate_plane = torch.stack( + (xyz_sampled[..., self.matMode[0]], xyz_sampled[..., + self.matMode[1]], + xyz_sampled[..., self.matMode[2]])).detach() + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach() + + plane_feats = F.grid_sample( + self.plane_coef[:, -self.density_n_comp:], + coordinate_plane, + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + line_feats = F.grid_sample( + self.line_coef[:, -self.density_n_comp:], + coordinate_line, + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + + sigma_feature = torch.sum(plane_feats * line_feats, dim=0) + + plane_feats = F.grid_sample( + self.plane_coef[:, :self.app_n_comp], + coordinate_plane, + align_corners=True).view(3 * self.app_n_comp, -1) + line_feats = F.grid_sample( + self.line_coef[:, :self.app_n_comp], + coordinate_line, + align_corners=True).view(3 * self.app_n_comp, -1) + + app_features = self.basis_mat((plane_feats * line_feats).T) + + return sigma_feature, app_features + + def compute_densityfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + (xyz_sampled[..., self.matMode[0]], xyz_sampled[..., + self.matMode[1]], + xyz_sampled[..., self.matMode[2]])).detach().view(3, -1, 1, 2) + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach().view(3, -1, 1, 2) + + plane_feats = F.grid_sample( + self.plane_coef[:, -self.density_n_comp:], + coordinate_plane, + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + line_feats = F.grid_sample( + self.line_coef[:, -self.density_n_comp:], + coordinate_line, + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + + sigma_feature = torch.sum(plane_feats * line_feats, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + (xyz_sampled[..., self.matMode[0]], xyz_sampled[..., + self.matMode[1]], + xyz_sampled[..., self.matMode[2]])).detach().view(3, -1, 1, 2) + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach().view(3, -1, 1, 2) + + plane_feats = F.grid_sample( + self.plane_coef[:, :self.app_n_comp], + coordinate_plane, + align_corners=True).view(3 * self.app_n_comp, -1) + line_feats = F.grid_sample( + self.line_coef[:, :self.app_n_comp], + coordinate_line, + align_corners=True).view(3 * self.app_n_comp, -1) + + app_features = self.basis_mat((plane_feats * line_feats).T) + + return app_features + + def vectorDiffs(self, vector_comps): + total = 0 + + for idx in range(len(vector_comps)): + n_comp, n_size = vector_comps[idx].shape[:-1] + + dotp = torch.matmul( + vector_comps[idx].view(n_comp, n_size), + vector_comps[idx].view(n_comp, n_size).transpose(-1, -2)) + non_diagonal = dotp.view(-1)[1:].view(n_comp - 1, + n_comp + 1)[..., :-1] + total = total + torch.mean(torch.abs(non_diagonal)) + return total + + def vector_comp_diffs(self): + + return self.vectorDiffs( + self.line_coef[:, -self.density_n_comp:]) + self.vectorDiffs( + self.line_coef[:, :self.app_n_comp]) + + @torch.no_grad() + def up_sampling_VM(self, plane_coef, line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + plane_coef[i] = torch.nn.Parameter( + F.interpolate( + plane_coef[i].data, + size=(res_target[mat_id_1], res_target[mat_id_0]), + mode='bilinear', + align_corners=True)) + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(res_target[vec_id], 1), + mode='bilinear', + align_corners=True)) + return plane_coef, line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + # assuming xyz have the same scale + scale = res_target[0] / self.line_coef.shape[2] + plane_coef = F.interpolate( + self.plane_coef.detach().data, + scale_factor=scale, + mode='bilinear', + align_corners=True) + line_coef = F.interpolate( + self.line_coef.detach().data, + size=(res_target[0], 1), + mode='bilinear', + align_corners=True) + self.plane_coef, self.line_coef = torch.nn.Parameter( + plane_coef), torch.nn.Parameter(line_coef) + self.compute_stepSize(res_target) + print(f'upsamping to {res_target}') + + +class TensorVMSplit(TensorBase): + + def __init__(self, aabb, gridSize, device, **kargs): + super(TensorVMSplit, self).__init__(aabb, gridSize, device, **kargs) + + def init_svd_volume(self, res, device): + self.density_plane, self.density_line = self.init_one_svd( + self.density_n_comp, self.gridSize, 0.1, device) + self.app_plane, self.app_line = self.init_one_svd( + self.app_n_comp, self.gridSize, 0.1, device) + self.basis_mat = torch.nn.Linear( + sum(self.app_n_comp), self.app_dim, bias=False).to(device) + print(self.basis_mat) + + def init_one_svd(self, n_component, gridSize, scale, device): + plane_coef, line_coef = [], [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + plane_coef.append( + torch.nn.Parameter(scale * torch.randn( + (1, n_component[i], gridSize[mat_id_1], + gridSize[mat_id_0])))) # + line_coef.append( + torch.nn.Parameter(scale * torch.randn( + (1, n_component[i], gridSize[vec_id], 1)))) + + return torch.nn.ParameterList(plane_coef).to( + device), torch.nn.ParameterList(line_coef).to(device) + + def get_optparam_groups(self, + lr_init_spatialxyz=0.02, + lr_init_network=0.001): + grad_vars = [{ + 'params': self.density_line, + 'lr': lr_init_spatialxyz + }, { + 'params': self.density_plane, + 'lr': lr_init_spatialxyz + }, { + 'params': self.app_line, + 'lr': lr_init_spatialxyz + }, { + 'params': self.app_plane, + 'lr': lr_init_spatialxyz + }, { + 'params': self.basis_mat.parameters(), + 'lr': lr_init_network + }] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{ + 'params': self.renderModule.parameters(), + 'lr': lr_init_network + }] + return grad_vars + + def vectorDiffs(self, vector_comps): + total = 0 + + for idx in range(len(vector_comps)): + n_comp, n_size = vector_comps[idx].shape[1:-1] + + dotp = torch.matmul( + vector_comps[idx].view(n_comp, n_size), + vector_comps[idx].view(n_comp, n_size).transpose(-1, -2)) + non_diagonal = dotp.view(-1)[1:].view(n_comp - 1, + n_comp + 1)[..., :-1] + total = total + torch.mean(torch.abs(non_diagonal)) + return total + + def vector_comp_diffs(self): + return self.vectorDiffs(self.density_line) + self.vectorDiffs( + self.app_line) + + def density_L1(self): + total = 0 + for idx in range(len(self.density_plane)): + total = total + torch.mean(torch.abs( + self.density_plane[idx])) + torch.mean( + torch.abs(self.density_line[idx])) + return total + + def TV_loss_density(self, reg): + total = 0 + for idx in range(len(self.density_plane)): + total = total + reg(self.density_plane[idx]) * 1e-2 + return total + + def TV_loss_app(self, reg): + total = 0 + for idx in range(len(self.app_plane)): + total = total + reg(self.app_plane[idx]) * 1e-2 + return total + + def compute_densityfeature(self, xyz_sampled): + + # plane + line basis + coordinate_plane = torch.stack( + (xyz_sampled[..., self.matMode[0]], xyz_sampled[..., + self.matMode[1]], + xyz_sampled[..., self.matMode[2]])).detach().view(3, -1, 1, 2) + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach().view(3, -1, 1, 2) + + sigma_feature = torch.zeros((xyz_sampled.shape[0], ), + device=xyz_sampled.device) + for idx_plane in range(len(self.density_plane)): + plane_coef_point = F.grid_sample( + self.density_plane[idx_plane], + coordinate_plane[[idx_plane]], + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = F.grid_sample( + self.density_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + sigma_feature = sigma_feature + torch.sum( + plane_coef_point * line_coef_point, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + + # plane + line basis + coordinate_plane = torch.stack( + (xyz_sampled[..., self.matMode[0]], xyz_sampled[..., + self.matMode[1]], + xyz_sampled[..., self.matMode[2]])).detach().view(3, -1, 1, 2) + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach().view(3, -1, 1, 2) + # import ipdb; ipdb.set_trace() + plane_coef_point, line_coef_point = [], [] + for idx_plane in range(len(self.app_plane)): + plane_coef_point.append( + F.grid_sample( + self.app_plane[idx_plane], + coordinate_plane[[idx_plane]], + align_corners=True).view(-1, *xyz_sampled.shape[:1])) + line_coef_point.append( + F.grid_sample( + self.app_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True).view(-1, *xyz_sampled.shape[:1])) + plane_coef_point, line_coef_point = torch.cat( + plane_coef_point), torch.cat(line_coef_point) + + return self.basis_mat((plane_coef_point * line_coef_point).T) + + @torch.no_grad() + def up_sampling_VM(self, plane_coef, line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + plane_coef[i] = torch.nn.Parameter( + F.interpolate( + plane_coef[i].data, + size=(res_target[mat_id_1], res_target[mat_id_0]), + mode='bilinear', + align_corners=True)) + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(res_target[vec_id], 1), + mode='bilinear', + align_corners=True)) + + return plane_coef, line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + self.app_plane, self.app_line = self.up_sampling_VM( + self.app_plane, self.app_line, res_target) + self.density_plane, self.density_line = self.up_sampling_VM( + self.density_plane, self.density_line, res_target) + + self.update_stepSize(res_target) + print(f'upsamping to {res_target}') + + @torch.no_grad() + def shrink(self, new_aabb): + print('====> shrinking ...') + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, ( + xyz_max - self.aabb[0]) / self.units + # print(new_aabb, self.aabb) + # print(t_l, b_r,self.alphaMask.alpha_volume.shape) + t_l, b_r = torch.round( + torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for i in range(len(self.vecMode)): + mode0 = self.vecMode[i] + self.density_line[i] = torch.nn.Parameter( + self.density_line[i].data[..., t_l[mode0]:b_r[mode0], :]) + self.app_line[i] = torch.nn.Parameter( + self.app_line[i].data[..., t_l[mode0]:b_r[mode0], :]) + mode0, mode1 = self.matMode[i] + self.density_plane[i] = torch.nn.Parameter( + self.density_plane[i].data[..., t_l[mode1]:b_r[mode1], + t_l[mode0]:b_r[mode0]]) + self.app_plane[i] = torch.nn.Parameter( + self.app_plane[i].data[..., t_l[mode1]:b_r[mode1], + t_l[mode0]:b_r[mode0]]) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / ( + self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print('aabb', new_aabb, '\ncorrect aabb', correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.aabb = new_aabb + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + +class TensorCP(TensorBase): + + def __init__(self, aabb, gridSize, device, **kargs): + super(TensorCP, self).__init__(aabb, gridSize, device, **kargs) + + def init_svd_volume(self, res, device): + self.density_line = self.init_one_svd(self.density_n_comp[0], + self.gridSize, 0.2, device) + self.app_line = self.init_one_svd(self.app_n_comp[0], self.gridSize, + 0.2, device) + self.basis_mat = torch.nn.Linear( + self.app_n_comp[0], self.app_dim, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, scale, device): + line_coef = [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + line_coef.append( + torch.nn.Parameter(scale * torch.randn( + (1, n_component, gridSize[vec_id], 1)))) + return torch.nn.ParameterList(line_coef).to(device) + + def get_optparam_groups(self, + lr_init_spatialxyz=0.02, + lr_init_network=0.001): + grad_vars = [{ + 'params': self.density_line, + 'lr': lr_init_spatialxyz + }, { + 'params': self.app_line, + 'lr': lr_init_spatialxyz + }, { + 'params': self.basis_mat.parameters(), + 'lr': lr_init_network + }] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{ + 'params': self.renderModule.parameters(), + 'lr': lr_init_network + }] + return grad_vars + + def compute_densityfeature(self, xyz_sampled): + + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach().view(3, -1, 1, 2) + + line_coef_point = F.grid_sample( + self.density_line[0], coordinate_line[[0]], + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.density_line[1], coordinate_line[[1]], + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.density_line[2], coordinate_line[[2]], + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + sigma_feature = torch.sum(line_coef_point, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + + coordinate_line = torch.stack( + (xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], xyz_sampled[..., + self.vecMode[2]])) + coordinate_line = torch.stack( + (torch.zeros_like(coordinate_line), coordinate_line), + dim=-1).detach().view(3, -1, 1, 2) + + line_coef_point = F.grid_sample( + self.app_line[0], coordinate_line[[0]], + align_corners=True).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.app_line[1], coordinate_line[[1]], align_corners=True).view( + -1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.app_line[2], coordinate_line[[2]], align_corners=True).view( + -1, *xyz_sampled.shape[:1]) + + return self.basis_mat(line_coef_point.T) + + @torch.no_grad() + def up_sampling_Vector(self, density_line_coef, app_line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + density_line_coef[i] = torch.nn.Parameter( + F.interpolate( + density_line_coef[i].data, + size=(res_target[vec_id], 1), + mode='bilinear', + align_corners=True)) + app_line_coef[i] = torch.nn.Parameter( + F.interpolate( + app_line_coef[i].data, + size=(res_target[vec_id], 1), + mode='bilinear', + align_corners=True)) + + return density_line_coef, app_line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + self.density_line, self.app_line = self.up_sampling_Vector( + self.density_line, self.app_line, res_target) + + self.update_stepSize(res_target) + print(f'upsamping to {res_target}') + + @torch.no_grad() + def shrink(self, new_aabb): + print('====> shrinking ...') + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, ( + xyz_max - self.aabb[0]) / self.units + + t_l, b_r = torch.round( + torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for i in range(len(self.vecMode)): + mode0 = self.vecMode[i] + self.density_line[i] = torch.nn.Parameter( + self.density_line[i].data[..., t_l[mode0]:b_r[mode0], :]) + self.app_line[i] = torch.nn.Parameter( + self.app_line[i].data[..., t_l[mode0]:b_r[mode0], :]) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / ( + self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print('aabb', new_aabb, '\ncorrect aabb', correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.aabb = new_aabb + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + def density_L1(self): + total = 0 + for idx in range(len(self.density_line)): + total = total + torch.mean(torch.abs(self.density_line[idx])) + return total + + def TV_loss_density(self, reg): + total = 0 + for idx in range(len(self.density_line)): + total = total + reg(self.density_line[idx]) * 1e-3 + return total + + def TV_loss_app(self, reg): + total = 0 + for idx in range(len(self.app_line)): + total = total + reg(self.app_line[idx]) * 1e-3 + return total diff --git a/modelscope/models/cv/nerf_recon_vq_compression/network/tensoRF_VQ.py b/modelscope/models/cv/nerf_recon_vq_compression/network/tensoRF_VQ.py new file mode 100644 index 00000000..ee58de56 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/network/tensoRF_VQ.py @@ -0,0 +1,292 @@ +import os +import random +from typing import Callable, Iterator, List, Optional, Union + +import torch.nn as nn +from tqdm import tqdm + +from .tensorBase import * +from .tensoRF import TensorVMSplit +from .weighted_vq import VectorQuantize + + +class Timing: + """ + Timing environment + usage: + with Timing("message"): + your commands here + will print CUDA runtime in ms + """ + + def __init__(self, name, debug=False): + self.name = name + self.debug = debug + + def __enter__(self): + if not self.debug: + return + + self.start = torch.cuda.Event(enable_timing=True) + self.end = torch.cuda.Event(enable_timing=True) + self.start.record() + + def __exit__(self, type, value, traceback): + if not self.debug: + return + + self.end.record() + torch.cuda.synchronize() + print(self.name, 'elapsed', self.start.elapsed_time(self.end), 'ms') + + +def dec2bin(x, bits): + mask = 2**torch.arange(bits - 1, -1, -1).to(x.device, x.dtype) + return x.unsqueeze(-1).bitwise_and(mask).ne(0).float() + + +def bin2dec(b, bits): + mask = 2**torch.arange(bits - 1, -1, -1).to(b.device, b.dtype) + return torch.sum(mask * b, -1) + + +class TensorVMSplitVQ(TensorVMSplit): + + def __init__(self, aabb, gridSize, device, **kargs): + super(TensorVMSplitVQ, self).__init__(aabb, gridSize, device, **kargs) + self.codebook_size = kargs['codebook_size'] + print('codebook size: ' + str(self.codebook_size)) + self.use_cosine_sim = kargs['use_cosine_sim'] == 1 + self.codebook_dim = None if kargs['codebook_dim'] == 0 else kargs[ + 'codebook_dim'] + self.vq = nn.ModuleList([ + VectorQuantize( + dim=self.app_n_comp[0], + codebook_size=self.codebook_size, # codebook size + decay=0.8, # specify number of quantizer + commitment_weight=1.0, + use_cosine_sim=self.use_cosine_sim, + codebook_dim=self.codebook_dim, + threshold_ema_dead_code=2.0, + ).to(self.device), + VectorQuantize( + dim=self.app_n_comp[1], + codebook_size=self.codebook_size, # codebook size + decay=0.8, # specify number of quantizer + commitment_weight=1.0, + use_cosine_sim=self.use_cosine_sim, + codebook_dim=self.codebook_dim, + threshold_ema_dead_code=2.0, + ).to(self.device), + VectorQuantize( + dim=self.app_n_comp[2], + codebook_size=self.codebook_size, # codebook size + decay=0.8, # specify number of quantizer + commitment_weight=1.0, + use_cosine_sim=self.use_cosine_sim, + codebook_dim=self.codebook_dim, + threshold_ema_dead_code=2.0, + ).to(self.device) + ]) + self.den_vq = nn.ModuleList([ + VectorQuantize( + dim=self.density_n_comp[0], + codebook_size=self.codebook_size, # codebook size + decay=0.8, # specify number of quantizer + commitment_weight=1.0, + use_cosine_sim=self.use_cosine_sim, + codebook_dim=self.codebook_dim, + threshold_ema_dead_code=2.0, + ).to(self.device), + VectorQuantize( + dim=self.density_n_comp[1], + codebook_size=self.codebook_size, # codebook size + decay=0.8, # specify number of quantizer + commitment_weight=1.0, + use_cosine_sim=self.use_cosine_sim, + codebook_dim=self.codebook_dim, + threshold_ema_dead_code=2.0, + ).to(self.device), + VectorQuantize( + dim=self.density_n_comp[2], + codebook_size=self.codebook_size, # codebook size + decay=0.8, # specify number of quantizer + commitment_weight=1.0, + use_cosine_sim=self.use_cosine_sim, + codebook_dim=self.codebook_dim, + threshold_ema_dead_code=2.0, + ).to(self.device) + ]) + self.importance = kargs.get('importance', None) + self.plane_mask = kargs.get('plane_mask', None) + self.all_indices = kargs.get('all_indices', None) + + def extreme_load(self, ckpt): + if 'alphaMask.aabb' in ckpt.keys(): + length = np.prod(ckpt['alphaMask.shape']) + alpha_volume = torch.from_numpy( + np.unpackbits(ckpt['alphaMask.mask'])[:length].reshape( + ckpt['alphaMask.shape'])) + self.alphaMask = AlphaGridMask( + self.device, ckpt['alphaMask.aabb'].to(self.device), + alpha_volume.float().to(self.device)) + + # 1. load non-vq part + self.density_line.load_state_dict(ckpt['density_line']) + self.app_line.load_state_dict(ckpt['app_line']) + self.basis_mat.load_state_dict(ckpt['basis_mat']) + self.renderModule.load_state_dict(ckpt['mlp']) + + # 2. load vq part + # load vq_mask, keep_mask + self.plane_mask = [] + for i in range(3): + mask_shape = self.app_plane[i].shape[-2:] + vq_mask = np.unpackbits( + ckpt[f'vq_mask_{i}'], + count=np.prod(mask_shape)).reshape(mask_shape).astype(bool) + keep_mask = np.unpackbits( + ckpt[f'keep_mask_{i}'], + count=np.prod(mask_shape)).reshape(mask_shape).astype(bool) + self.plane_mask.append((vq_mask, keep_mask)) + + # recover app_plane, density_plane + import math + bits = int(math.log2(self.codebook_size)) + for idx_plane in range(3): + (vq_mask, keep_mask) = self.plane_mask[idx_plane] + # load appearance keep data from quantized data + int_repr = ckpt[f'quant_keep_data_{idx_plane}.int_repr'] + scale = ckpt[f'quant_keep_data_{idx_plane}.scale'] + zero_points = ckpt[f'quant_keep_data_{idx_plane}.zero_points'] + dequant = (int_repr - zero_points) * scale + keep_data = dequant.T.reshape( + *self.app_plane[idx_plane][:, :, keep_mask].shape) + self.app_plane[idx_plane].data[:, :, keep_mask] = keep_data + + # load appearance vq data from codebook + codebook = ckpt[f'codebook_{idx_plane}'].float() # + vq_count = int(vq_mask.sum()) + unpack1 = np.unpackbits( + ckpt[f'vq_indice_{idx_plane}'], count=vq_count * bits) + unpack2 = bin2dec( + torch.from_numpy(unpack1).reshape(vq_count, bits).long(), + bits=bits) + vq_data = codebook[0, unpack2, :] # N*len + vq_data = vq_data.T.reshape( + *(self.app_plane[idx_plane][:, :, vq_mask].shape)) + self.app_plane[idx_plane].data[:, :, vq_mask] = vq_data + + for idx_plane in range(3): + (vq_mask, keep_mask) = self.plane_mask[idx_plane] + # load density keep data from quantized data + int_repr = ckpt[f'quant_den_data_{idx_plane}.int_repr'] + scale = ckpt[f'quant_den_data_{idx_plane}.scale'] + zero_points = ckpt[f'quant_den_data_{idx_plane}.zero_points'] + dequant = (int_repr - zero_points) * scale + keep_data = dequant.T.reshape( + *self.density_plane[idx_plane][:, :, keep_mask].shape) + self.density_plane[idx_plane].data[:, :, keep_mask] = keep_data + + # load density vq data from codebook + codebook = ckpt[f'codebook_den_{idx_plane}'].float() # + vq_count = int(vq_mask.sum()) + unpack1 = np.unpackbits( + ckpt[f'den_vq_indice_{idx_plane}'], count=vq_count * bits) + unpack2 = bin2dec( + torch.from_numpy(unpack1).reshape(vq_count, bits).long(), + bits=bits) + vq_data = codebook[0, unpack2, :] # N*len + vq_data = vq_data.T.reshape( + *(self.density_plane[idx_plane][:, :, vq_mask].shape)) + self.density_plane[idx_plane].data[:, :, vq_mask] = vq_data + + def forward(self, + rays_chunk, + white_bg=True, + is_train=False, + ndc_ray=False, + N_samples=-1, + isvq=False): + # sample points + viewdirs = rays_chunk[:, 3:6] + if ndc_ray: + xyz_sampled, z_vals, ray_valid = self.sample_ray_ndc( + rays_chunk[:, :3], + viewdirs, + is_train=is_train, + N_samples=N_samples) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like( + z_vals[:, :1])), + dim=-1) + rays_norm = torch.norm(viewdirs, dim=-1, keepdim=True) + dists = dists * rays_norm + viewdirs = viewdirs / rays_norm + else: + xyz_sampled, z_vals, ray_valid = self.sample_ray( + rays_chunk[:, :3], + viewdirs, + is_train=is_train, + N_samples=N_samples) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like( + z_vals[:, :1])), + dim=-1) + viewdirs = viewdirs.view(-1, 1, 3).expand(xyz_sampled.shape) + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_sampled[ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= (~alpha_mask) + ray_valid = ~ray_invalid + + sigma = torch.zeros(xyz_sampled.shape[:-1], device=xyz_sampled.device) + rgb = torch.zeros((*xyz_sampled.shape[:2], 3), + device=xyz_sampled.device) + + if ray_valid.any(): + xyz_sampled = self.normalize_coord(xyz_sampled) + sigma_feature = self.compute_densityfeature(xyz_sampled[ray_valid]) + + validsigma = self.feature2density(sigma_feature) + sigma[ray_valid] = validsigma + + alpha, weight, bg_weight = raw2alpha(sigma, + dists * self.distance_scale) + + app_mask = weight > self.rayMarch_weight_thres + + if app_mask.any(): + app_features = self.compute_appfeature(xyz_sampled[app_mask]) + valid_rgbs = self.renderModule(xyz_sampled[app_mask], + viewdirs[app_mask], app_features) + rgb[app_mask] = valid_rgbs + + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[..., None] * rgb, -2) + + if white_bg or (is_train and torch.rand((1, )) < 0.5): + rgb_map = rgb_map + (1. - acc_map[..., None]) + + rgb_map = rgb_map.clamp(0, 1) + + with torch.no_grad(): + depth_map = torch.sum(weight * z_vals, -1) + depth_map = depth_map + (1. - acc_map) * rays_chunk[..., -1] + + return rgb_map, depth_map + + +def getsize(compressed_file, tag='MB'): + size = os.path.getsize(compressed_file) + if tag == 'B': + pass + elif tag == 'KB': + size = size / 1024 + elif tag == 'MB': + size = size / 1024 / 1024 + elif tag == 'GB': + size = size / 1024 / 1024 / 1024 + return f'{size} {tag}' diff --git a/modelscope/models/cv/nerf_recon_vq_compression/network/tensorBase.py b/modelscope/models/cv/nerf_recon_vq_compression/network/tensorBase.py new file mode 100644 index 00000000..2e87227a --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/network/tensorBase.py @@ -0,0 +1,526 @@ +import time + +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F + + +def positional_encoding(positions, freqs): + + freq_bands = (2**torch.arange(freqs).float()).to(positions.device) # (F,) + pts = (positions[..., None] * freq_bands).reshape( + positions.shape[:-1] + (freqs * positions.shape[-1], )) # (..., DF) + pts = torch.cat([torch.sin(pts), torch.cos(pts)], dim=-1) + return pts + + +def raw2alpha(sigma, dist): + # sigma, dist [N_rays, N_samples] + alpha = 1. - torch.exp(-sigma * dist) + + T = torch.cumprod( + torch.cat([ + torch.ones(alpha.shape[0], 1).to(alpha.device), 1. - alpha + 1e-10 + ], -1), -1) + + weights = alpha * T[:, :-1] # [N_rays, N_samples] + return alpha, weights, T[:, -1:] + + +def RGBRender(xyz_sampled, viewdirs, features): + + rgb = features + return rgb + + +class AlphaGridMask(torch.nn.Module): + + def __init__(self, device, aabb, alpha_volume): + super(AlphaGridMask, self).__init__() + self.device = device + + self.aabb = aabb.to(self.device) + self.aabbSize = self.aabb[1] - self.aabb[0] + self.invgridSize = 1.0 / self.aabbSize * 2 + self.alpha_volume = alpha_volume.view(1, 1, *alpha_volume.shape[-3:]) + self.gridSize = torch.LongTensor([ + alpha_volume.shape[-1], alpha_volume.shape[-2], + alpha_volume.shape[-3] + ]).to(self.device) + + def sample_alpha(self, xyz_sampled): + xyz_sampled = self.normalize_coord(xyz_sampled) + alpha_vals = F.grid_sample( + self.alpha_volume, + xyz_sampled.view(1, -1, 1, 1, 3), + align_corners=True).view(-1) + + return alpha_vals + + def normalize_coord(self, xyz_sampled): + return (xyz_sampled - self.aabb[0]) * self.invgridSize - 1 + + +class MLPRender_Fea(torch.nn.Module): + + def __init__(self, inChanel, viewpe=6, feape=6, featureC=128): + super(MLPRender_Fea, self).__init__() + + self.in_mlpC = 2 * viewpe * 3 + 2 * feape * inChanel + 3 + inChanel + self.viewpe = viewpe + self.feape = feape + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, 3) + + self.mlp = torch.nn.Sequential(layer1, + torch.nn.ReLU(inplace=True), layer2, + torch.nn.ReLU(inplace=True), layer3) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features): + indata = [features, viewdirs] + if self.feape > 0: + indata += [positional_encoding(features, self.feape)] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + + return rgb + + +class MLPRender_PE(torch.nn.Module): + + def __init__(self, inChanel, viewpe=6, pospe=6, featureC=128): + super(MLPRender_PE, self).__init__() + + self.in_mlpC = (3 + 2 * viewpe * 3) + (3 + 2 * pospe * 3) + inChanel # + self.viewpe = viewpe + self.pospe = pospe + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, 3) + + self.mlp = torch.nn.Sequential(layer1, + torch.nn.ReLU(inplace=True), layer2, + torch.nn.ReLU(inplace=True), layer3) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features): + indata = [features, viewdirs] + if self.pospe > 0: + indata += [positional_encoding(pts, self.pospe)] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + + return rgb + + +class MLPRender(torch.nn.Module): + + def __init__(self, inChanel, viewpe=6, featureC=128): + super(MLPRender, self).__init__() + + self.in_mlpC = (3 + 2 * viewpe * 3) + inChanel + self.viewpe = viewpe + + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, 3) + + self.mlp = torch.nn.Sequential(layer1, + torch.nn.ReLU(inplace=True), layer2, + torch.nn.ReLU(inplace=True), layer3) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features): + indata = [features, viewdirs] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + + return rgb + + +class TensorBase(torch.nn.Module): + + def __init__(self, + aabb, + gridSize, + device, + density_n_comp=8, + appearance_n_comp=24, + app_dim=27, + shadingMode='MLP_PE', + alphaMask=None, + near_far=[2.0, 6.0], + density_shift=-10, + alphaMask_thres=0.001, + distance_scale=25, + rayMarch_weight_thres=0.0001, + pos_pe=6, + view_pe=6, + fea_pe=6, + featureC=128, + step_ratio=2.0, + fea2denseAct='softplus', + **kargs): + super(TensorBase, self).__init__() + + self.density_n_comp = density_n_comp + self.app_n_comp = appearance_n_comp + self.app_dim = app_dim + self.aabb = aabb + self.alphaMask = alphaMask + self.device = device + + self.density_shift = density_shift + self.alphaMask_thres = alphaMask_thres + self.distance_scale = distance_scale + self.rayMarch_weight_thres = rayMarch_weight_thres + self.fea2denseAct = fea2denseAct + + self.near_far = near_far + self.step_ratio = step_ratio + + self.update_stepSize(gridSize) + + self.matMode = [[0, 1], [0, 2], [1, 2]] + self.vecMode = [2, 1, 0] + self.comp_w = [1, 1, 1] + + self.init_svd_volume(gridSize[0], device) + + self.shadingMode, self.pos_pe, self.view_pe = shadingMode, pos_pe, view_pe + self.fea_pe, self.featureC = fea_pe, featureC + self.init_render_func(shadingMode, pos_pe, view_pe, fea_pe, featureC, + device) + + def init_render_func(self, shadingMode, pos_pe, view_pe, fea_pe, featureC, + device): + if shadingMode == 'MLP_PE': + self.renderModule = MLPRender_PE(self.app_dim, view_pe, pos_pe, + featureC).to(device) + elif shadingMode == 'MLP_Fea': + self.renderModule = MLPRender_Fea(self.app_dim, view_pe, fea_pe, + featureC).to(device) + elif shadingMode == 'MLP': + self.renderModule = MLPRender(self.app_dim, view_pe, + featureC).to(device) + elif shadingMode == 'RGB': + assert self.app_dim == 3 + self.renderModule = RGBRender + else: + print('Unrecognized shading module') + exit() + print(self.renderModule) + + def update_stepSize(self, gridSize): + self.aabbSize = self.aabb[1] - self.aabb[0] + self.invaabbSize = 2.0 / self.aabbSize + self.gridSize = torch.LongTensor(gridSize).to(self.device) + self.units = self.aabbSize / (self.gridSize - 1) + self.stepSize = torch.mean(self.units) * self.step_ratio + self.aabbDiag = torch.sqrt(torch.sum(torch.square(self.aabbSize))) + self.nSamples = int((self.aabbDiag / self.stepSize).item()) + 1 + + def init_svd_volume(self, res, device): + pass + + def compute_features(self, xyz_sampled): + pass + + def compute_densityfeature(self, xyz_sampled): + pass + + def compute_appfeature(self, xyz_sampled): + pass + + def normalize_coord(self, xyz_sampled): + return (xyz_sampled - self.aabb[0]) * self.invaabbSize - 1 + + def get_optparam_groups(self, lr_init_spatial=0.02, lr_init_network=0.001): + pass + + def get_kwargs(self): + return { + 'aabb': self.aabb, + 'gridSize': self.gridSize.tolist(), + 'density_n_comp': self.density_n_comp, + 'appearance_n_comp': self.app_n_comp, + 'app_dim': self.app_dim, + 'density_shift': self.density_shift, + 'alphaMask_thres': self.alphaMask_thres, + 'distance_scale': self.distance_scale, + 'rayMarch_weight_thres': self.rayMarch_weight_thres, + 'fea2denseAct': self.fea2denseAct, + 'near_far': self.near_far, + 'step_ratio': self.step_ratio, + 'shadingMode': self.shadingMode, + 'pos_pe': self.pos_pe, + 'view_pe': self.view_pe, + 'fea_pe': self.fea_pe, + 'featureC': self.featureC + } + + def save(self, path): + kwargs = self.get_kwargs() + ckpt = {'kwargs': kwargs, 'state_dict': self.state_dict()} + if self.alphaMask is not None: + alpha_volume = self.alphaMask.alpha_volume.bool().cpu().numpy() + ckpt.update({'alphaMask.shape': alpha_volume.shape}) + ckpt.update( + {'alphaMask.mask': np.packbits(alpha_volume.reshape(-1))}) + ckpt.update({'alphaMask.aabb': self.alphaMask.aabb.cpu()}) + torch.save(ckpt, path) + + def load(self, ckpt): + if 'alphaMask.aabb' in ckpt.keys(): + length = np.prod(ckpt['alphaMask.shape']) + alpha_volume = torch.from_numpy( + np.unpackbits(ckpt['alphaMask.mask'])[:length].reshape( + ckpt['alphaMask.shape'])) + self.alphaMask = AlphaGridMask( + self.device, ckpt['alphaMask.aabb'].to(self.device), + alpha_volume.float().to(self.device)) + self.load_state_dict(ckpt['state_dict']) + + def sample_ray_ndc(self, rays_o, rays_d, is_train=True, N_samples=-1): + N_samples = N_samples if N_samples > 0 else self.nSamples + near, far = self.near_far + interpx = torch.linspace(near, far, N_samples).unsqueeze(0).to(rays_o) + if is_train: + interpx += torch.rand_like(interpx).to(rays_o) * ( + (far - near) / N_samples) + + rays_pts = rays_o[..., + None, :] + rays_d[..., None, :] * interpx[..., None] + mask_outbbox = ((self.aabb[0] > rays_pts) + | (rays_pts > self.aabb[1])).any(dim=-1) + return rays_pts, interpx, ~mask_outbbox + + def sample_ray(self, rays_o, rays_d, is_train=True, N_samples=-1): + N_samples = N_samples if N_samples > 0 else self.nSamples + stepsize = self.stepSize + near, far = self.near_far + vec = torch.where(rays_d == 0, torch.full_like(rays_d, 1e-6), rays_d) + rate_a = (self.aabb[1] - rays_o) / vec + rate_b = (self.aabb[0] - rays_o) / vec + t_min = torch.minimum(rate_a, rate_b).amax(-1).clamp(min=near, max=far) + + rng = torch.arange(N_samples)[None].float() + if is_train: + rng = rng.repeat(rays_d.shape[-2], 1) + rng += torch.rand_like(rng[:, [0]]) + step = stepsize * rng.to(rays_o.device) + interpx = (t_min[..., None] + step) + + rays_pts = rays_o[..., + None, :] + rays_d[..., None, :] * interpx[..., None] + mask_outbbox = ((self.aabb[0] > rays_pts) + | (rays_pts > self.aabb[1])).any(dim=-1) + + return rays_pts, interpx, ~mask_outbbox + + def shrink(self, new_aabb, voxel_size): + pass + + @torch.no_grad() + def getDenseAlpha(self, gridSize=None): + gridSize = self.gridSize if gridSize is None else gridSize + + samples = torch.stack( + torch.meshgrid( + torch.linspace(0, 1, gridSize[0]), + torch.linspace(0, 1, gridSize[1]), + torch.linspace(0, 1, gridSize[2]), + ), -1).to(self.device) + dense_xyz = self.aabb[0] * (1 - samples) + self.aabb[1] * samples + + alpha = torch.zeros_like(dense_xyz[..., 0]) + for i in range(gridSize[0]): + alpha[i] = self.compute_alpha(dense_xyz[i].view(-1, 3), + self.stepSize).view( + (gridSize[1], gridSize[2])) + return alpha, dense_xyz + + @torch.no_grad() + def updateAlphaMask(self, gridSize=(200, 200, 200)): + + alpha, dense_xyz = self.getDenseAlpha(gridSize) + dense_xyz = dense_xyz.transpose(0, 2).contiguous() + alpha = alpha.clamp(0, 1).transpose(0, 2).contiguous()[None, None] + total_voxels = gridSize[0] * gridSize[1] * gridSize[2] + + ks = 3 + alpha = F.max_pool3d( + alpha, kernel_size=ks, padding=ks // 2, + stride=1).view(gridSize[::-1]) + alpha[alpha >= self.alphaMask_thres] = 1 + alpha[alpha < self.alphaMask_thres] = 0 + + self.alphaMask = AlphaGridMask(self.device, self.aabb, alpha) + + valid_xyz = dense_xyz[alpha > 0.5] + + xyz_min = valid_xyz.amin(0) + xyz_max = valid_xyz.amax(0) + + new_aabb = torch.stack((xyz_min, xyz_max)) + + total = torch.sum(alpha) + print(f'bbox: {xyz_min, xyz_max} alpha rest %%%f' % + (total / total_voxels * 100)) + return new_aabb + + @torch.no_grad() + def filtering_rays(self, + all_rays, + all_rgbs, + N_samples=256, + chunk=10240 * 5, + bbox_only=False): + print('========> filtering rays ...') + tt = time.time() + + N = torch.tensor(all_rays.shape[:-1]).prod() + + mask_filtered = [] + idx_chunks = torch.split(torch.arange(N), chunk) + for idx_chunk in idx_chunks: + rays_chunk = all_rays[idx_chunk].to(self.device) + + rays_o, rays_d = rays_chunk[..., :3], rays_chunk[..., 3:6] + if bbox_only: + vec = torch.where(rays_d == 0, torch.full_like(rays_d, 1e-6), + rays_d) + rate_a = (self.aabb[1] - rays_o) / vec + rate_b = (self.aabb[0] - rays_o) / vec + t_min = torch.minimum(rate_a, rate_b).amax(-1) + t_max = torch.maximum(rate_a, rate_b).amin(-1) + mask_inbbox = t_max > t_min + + else: + xyz_sampled, _, _ = self.sample_ray( + rays_o, rays_d, N_samples=N_samples, is_train=False) + mask_inbbox = (self.alphaMask.sample_alpha(xyz_sampled).view( + xyz_sampled.shape[:-1]) > 0).any(-1) + + mask_filtered.append(mask_inbbox.cpu()) + + mask_filtered = torch.cat(mask_filtered).view(all_rgbs.shape[:-1]) + + print(f'Ray filtering done! takes {time.time()-tt} s.' + f' ray mask ratio: {torch.sum(mask_filtered) / N}') + return all_rays[mask_filtered], all_rgbs[mask_filtered] + + def feature2density(self, density_features): + if self.fea2denseAct == 'softplus': + return F.softplus(density_features + self.density_shift) + elif self.fea2denseAct == 'relu': + return F.relu(density_features) + + def compute_alpha(self, xyz_locs, length=1): + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_locs) + alpha_mask = alphas > 0 + else: + alpha_mask = torch.ones_like(xyz_locs[:, 0], dtype=bool) + + sigma = torch.zeros(xyz_locs.shape[:-1], device=xyz_locs.device) + + if alpha_mask.any(): + xyz_sampled = self.normalize_coord(xyz_locs[alpha_mask]) + sigma_feature = self.compute_densityfeature(xyz_sampled) + validsigma = self.feature2density(sigma_feature) + sigma[alpha_mask] = validsigma + + alpha = 1 - torch.exp(-sigma * length).view(xyz_locs.shape[:-1]) + + return alpha + + def forward(self, + rays_chunk, + white_bg=True, + is_train=False, + ndc_ray=False, + N_samples=-1): + + # sample points + viewdirs = rays_chunk[:, 3:6] + if ndc_ray: + xyz_sampled, z_vals, ray_valid = self.sample_ray_ndc( + rays_chunk[:, :3], + viewdirs, + is_train=is_train, + N_samples=N_samples) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like( + z_vals[:, :1])), + dim=-1) + rays_norm = torch.norm(viewdirs, dim=-1, keepdim=True) + dists = dists * rays_norm + viewdirs = viewdirs / rays_norm + else: + xyz_sampled, z_vals, ray_valid = self.sample_ray( + rays_chunk[:, :3], + viewdirs, + is_train=is_train, + N_samples=N_samples) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like( + z_vals[:, :1])), + dim=-1) + viewdirs = viewdirs.view(-1, 1, 3).expand(xyz_sampled.shape) + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_sampled[ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= (~alpha_mask) + ray_valid = ~ray_invalid + + sigma = torch.zeros(xyz_sampled.shape[:-1], device=xyz_sampled.device) + rgb = torch.zeros((*xyz_sampled.shape[:2], 3), + device=xyz_sampled.device) + + if ray_valid.any(): + xyz_sampled = self.normalize_coord(xyz_sampled) + sigma_feature = self.compute_densityfeature(xyz_sampled[ray_valid]) + + validsigma = self.feature2density(sigma_feature) + sigma[ray_valid] = validsigma + + alpha, weight, bg_weight = raw2alpha(sigma, + dists * self.distance_scale) + + app_mask = weight > self.rayMarch_weight_thres + + if app_mask.any(): + app_features = self.compute_appfeature(xyz_sampled[app_mask]) + valid_rgbs = self.renderModule(xyz_sampled[app_mask], + viewdirs[app_mask], app_features) + rgb[app_mask] = valid_rgbs + + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[..., None] * rgb, -2) + + if white_bg or (is_train and torch.rand((1, )) < 0.5): + rgb_map = rgb_map + (1. - acc_map[..., None]) + + rgb_map = rgb_map.clamp(0, 1) + + with torch.no_grad(): + depth_map = torch.sum(weight * z_vals, -1) + depth_map = depth_map + (1. - acc_map) * rays_chunk[..., -1] + + return rgb_map, depth_map diff --git a/modelscope/models/cv/nerf_recon_vq_compression/network/weighted_vq.py b/modelscope/models/cv/nerf_recon_vq_compression/network/weighted_vq.py new file mode 100644 index 00000000..eea10f18 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/network/weighted_vq.py @@ -0,0 +1,504 @@ +from contextlib import contextmanager + +import torch +import torch.distributed as distributed +import torch.nn.functional as F +from einops import rearrange, repeat +from torch import einsum, nn +from torch.cuda.amp import autocast + + +def exists(val): + return val is not None + + +def default(val, d): + return val if exists(val) else d + + +def noop(*args, **kwargs): + pass + + +def l2norm(t): + return F.normalize(t, p=2, dim=-1) + + +def log(t, eps=1e-20): + return torch.log(t.clamp(min=eps)) + + +def uniform_init(*shape): + t = torch.empty(shape) + nn.init.kaiming_uniform_(t) + return t + + +def gumbel_noise(t): + noise = torch.zeros_like(t).uniform_(0, 1) + return -log(-log(noise)) + + +def gumbel_sample(t, temperature=1., dim=-1): + if temperature == 0: + return t.argmax(dim=dim) + + return ((t / temperature) + gumbel_noise(t)).argmax(dim=dim) + + +def ema_inplace(moving_avg, new, decay): + moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay)) + + +def laplace_smoothing(x, n_categories, eps=1e-5): + return (x + eps) / (x.sum() + n_categories * eps) + + +def sample_vectors(samples, num): + num_samples, device = samples.shape[0], samples.device + if num_samples >= num: + indices = torch.randperm(num_samples, device=device)[:num] + else: + indices = torch.randint(0, num_samples, (num, ), device=device) + + return samples[indices] + + +def batched_sample_vectors(samples, num): + return torch.stack( + [sample_vectors(sample, num) for sample in samples.unbind(dim=0)], + dim=0) + + +def pad_shape(shape, size, dim=0): + return [size if i == dim else s for i, s in enumerate(shape)] + + +def sample_multinomial(total_count, probs): + device = probs.device + probs = probs.cpu() + + total_count = probs.new_full((), total_count) + remainder = probs.new_ones(()) + sample = torch.empty_like(probs, dtype=torch.long) + + for i, p in enumerate(probs): + s = torch.binomial(total_count, p / remainder) + sample[i] = s + total_count -= s + remainder -= p + + return sample.to(device) + + +def all_gather_sizes(x, dim): + size = torch.tensor(x.shape[dim], dtype=torch.long, device=x.device) + all_sizes = [ + torch.empty_like(size) for _ in range(distributed.get_world_size()) + ] + distributed.all_gather(all_sizes, size) + + return torch.stack(all_sizes) + + +def all_gather_variably_sized(x, sizes, dim=0): + rank = distributed.get_rank() + all_x = [] + + for i, size in enumerate(sizes): + t = x if i == rank else x.new_empty(pad_shape(x.shape, size, dim)) + distributed.broadcast(t, src=i, async_op=True) + all_x.append(t) + + distributed.barrier() + return all_x + + +def sample_vectors_distributed(local_samples, num): + rank = distributed.get_rank() + all_num_samples = all_gather_sizes(local_samples, dim=0) + + if rank == 0: + samples_per_rank = sample_multinomial( + num, all_num_samples / all_num_samples.sum()) + else: + samples_per_rank = torch.empty_like(all_num_samples) + + distributed.broadcast(samples_per_rank, src=0) + samples_per_rank = samples_per_rank.tolist() + + local_samples = batched_sample_vectors(local_samples, + samples_per_rank[rank]) + all_samples = all_gather_variably_sized( + local_samples, samples_per_rank, dim=0) + return torch.cat(all_samples, dim=0) + + +def batched_bincount(x, *, minlength): + batch, dtype, device = x.shape[0], x.dtype, x.device + target = torch.zeros(batch, minlength, dtype=dtype, device=device) + values = torch.ones_like(x) + target.scatter_add_(-1, x, values) + return target + + +def kmeans(samples, + num_clusters, + num_iters=10, + use_cosine_sim=False, + sample_fn=batched_sample_vectors, + all_reduce_fn=noop): + num_codebooks, dim, dtype = samples.shape[0], samples.shape[ + -1], samples.dtype + + means = sample_fn(samples, num_clusters) + + for _ in range(num_iters): + if use_cosine_sim: + dists = samples @ rearrange(means, 'h n d -> h d n') + else: + dists = -torch.cdist(samples, means, p=2) + + buckets = torch.argmax(dists, dim=-1) + bins = batched_bincount(buckets, minlength=num_clusters) + all_reduce_fn(bins) + + zero_mask = bins == 0 + bins_min_clamped = bins.masked_fill(zero_mask, 1) + + new_means = buckets.new_zeros( + num_codebooks, num_clusters, dim, dtype=dtype) + + new_means.scatter_add_(1, repeat(buckets, 'h n -> h n d', d=dim), + samples) + new_means = new_means / rearrange(bins_min_clamped, '... -> ... 1') + all_reduce_fn(new_means) + + if use_cosine_sim: + new_means = l2norm(new_means) + + means = torch.where( + rearrange(zero_mask, '... -> ... 1'), means, new_means) + + return means, bins + + +def batched_embedding(indices, embeds): + batch, dim = indices.shape[1], embeds.shape[-1] + indices = repeat(indices, 'h b n -> h b n d', d=dim) + embeds = repeat(embeds, 'h c d -> h b c d', b=batch) + return embeds.gather(2, indices) + + +# regularization losses + + +def orthogonal_loss_fn(t): + # eq (2) from https://arxiv.org/abs/2112.00384 + h, n = t.shape[:2] + normed_codes = l2norm(t) + identity = repeat(torch.eye(n, device=t.device), 'i j -> h i j', h=h) + cosine_sim = einsum('h i d, h j d -> h i j', normed_codes, normed_codes) + return ((cosine_sim - identity)**2).sum() / (h * n**2) + + +# distance types + + +class EuclideanCodebook(nn.Module): + + def __init__(self, + dim, + codebook_size, + num_codebooks=1, + kmeans_init=False, + kmeans_iters=10, + decay=0.8, + eps=1e-5, + threshold_ema_dead_code=2, + use_ddp=False, + learnable_codebook=False, + sample_codebook_temp=0): + super().__init__() + self.decay = decay + init_fn = uniform_init if not kmeans_init else torch.zeros + embed = init_fn(num_codebooks, codebook_size, dim) + + self.codebook_size = codebook_size + self.num_codebooks = num_codebooks + + self.kmeans_iters = kmeans_iters + self.eps = eps + self.threshold_ema_dead_code = threshold_ema_dead_code + self.sample_codebook_temp = sample_codebook_temp + + self.sample_fn = sample_vectors_distributed if use_ddp else batched_sample_vectors + self.all_reduce_fn = distributed.all_reduce if use_ddp else noop + + self.register_buffer('initted', torch.Tensor([not kmeans_init])) + self.register_buffer('cluster_size', + torch.zeros(num_codebooks, codebook_size)) + self.register_buffer('embed_avg', embed.clone()) + + self.learnable_codebook = learnable_codebook + if learnable_codebook: + self.embed = nn.Parameter(embed) + else: + self.register_buffer('embed', embed) + + @torch.jit.ignore + def init_embed_(self, data): + if self.initted: + return + + embed, cluster_size = kmeans( + data, + self.codebook_size, + self.kmeans_iters, + sample_fn=self.sample_fn, + all_reduce_fn=self.all_reduce_fn) + + self.embed.data.copy_(embed) + self.embed_avg.data.copy_(embed.clone()) + self.cluster_size.data.copy_(cluster_size) + self.initted.data.copy_(torch.Tensor([True])) + + def replace(self, batch_samples, batch_mask): + batch_samples = l2norm(batch_samples) + + for ind, (samples, mask) in enumerate( + zip(batch_samples.unbind(dim=0), batch_mask.unbind(dim=0))): + if not torch.any(mask): + continue + + sampled = self.sample_fn( + rearrange(samples, '... -> 1 ...'), + mask.sum().item()) + self.embed.data[ind][mask] = rearrange(sampled, '1 ... -> ...') + + def expire_codes_(self, batch_samples, verbose): + if self.threshold_ema_dead_code == 0: + return + + expired_codes = self.cluster_size < self.threshold_ema_dead_code + + if not torch.any(expired_codes): + return + if verbose: + print(f'expire code count: {expired_codes.sum()}') + batch_samples = rearrange(batch_samples, 'h ... d -> h (...) d') + self.replace(batch_samples, batch_mask=expired_codes) + + @autocast(enabled=False) + def forward(self, x, weight=None, verbose=False): + if weight is not None: + weight = weight * weight.numel() / weight.sum() + needs_codebook_dim = x.ndim < 4 + + x = x.float() + + if needs_codebook_dim: + x = rearrange(x, '... -> 1 ...') + + shape, dtype = x.shape, x.dtype + flatten = rearrange(x, 'h ... d -> h (...) d') + + self.init_embed_(flatten) + + embed = self.embed if not self.learnable_codebook else self.embed.detach( + ) + + dist = -torch.cdist(flatten, embed, p=2) + + embed_ind = gumbel_sample( + dist, dim=-1, temperature=self.sample_codebook_temp) + embed_onehot = F.one_hot(embed_ind, self.codebook_size).type(dtype) + embed_ind = embed_ind.view(*shape[:-1]) + + quantize = batched_embedding(embed_ind, self.embed) + + if self.training: + + if weight is not None: + cluster_size = (embed_onehot * weight).sum(dim=1) + else: + cluster_size = embed_onehot.sum(dim=1) + self.all_reduce_fn(cluster_size) + ema_inplace(self.cluster_size, cluster_size, self.decay) + + if weight is not None: + + embed_sum = einsum('h n d, h n c -> h c d', flatten * weight, + embed_onehot) + else: + embed_sum = einsum('h n d, h n c -> h c d', flatten, + embed_onehot) + self.all_reduce_fn(embed_sum) + cluster_size = laplace_smoothing( + self.cluster_size, self.codebook_size, + self.eps) * self.cluster_size.sum() + + # embed_normalized = self.embed_avg / rearrange(cluster_size, '... -> ... 1') + # print("embed_normalized: ",embed_normalized, + # "\n embed_avg: ",self.embed_avg, + # "\n cluster_size: ", cluster_size) + # self.embed.data.copy_(embed_normalized) + # print("before ema: self.embed:", self.embed, "embed_sum: ", embed_sum) + ema_inplace(self.embed, + embed_sum / rearrange(cluster_size, '... -> ... 1'), + self.decay) + # print("after ema: self.embed:", self.embed, "embed_sum: ", embed_sum) + self.expire_codes_(x, verbose) + # print("after expire: self.embed:", self.embed, "embed_sum: ", embed_sum) + + if needs_codebook_dim: + quantize, embed_ind = map(lambda t: rearrange(t, '1 ... -> ...'), + (quantize, embed_ind)) + + return quantize, embed_ind + + +# main class + + +class VectorQuantize(nn.Module): + + def __init__(self, + dim, + codebook_size, + codebook_dim=None, + heads=1, + separate_codebook_per_head=False, + decay=0.8, + eps=1e-5, + kmeans_init=False, + kmeans_iters=10, + use_cosine_sim=False, + threshold_ema_dead_code=0, + channel_last=True, + accept_image_fmap=False, + commitment_weight=1., + orthogonal_reg_weight=0., + orthogonal_reg_active_codes_only=False, + orthogonal_reg_max_codes=None, + sample_codebook_temp=0., + sync_codebook=False): + super().__init__() + self.heads = heads + self.separate_codebook_per_head = separate_codebook_per_head + + codebook_dim = default(codebook_dim, dim) + codebook_input_dim = codebook_dim * heads + + requires_projection = codebook_input_dim != dim + self.project_in = nn.Linear( + dim, codebook_input_dim) if requires_projection else nn.Identity() + self.project_out = nn.Linear( + codebook_input_dim, dim) if requires_projection else nn.Identity() + + self.eps = eps + self.commitment_weight = commitment_weight + + has_codebook_orthogonal_loss = orthogonal_reg_weight > 0 + self.orthogonal_reg_weight = orthogonal_reg_weight + self.orthogonal_reg_active_codes_only = orthogonal_reg_active_codes_only + self.orthogonal_reg_max_codes = orthogonal_reg_max_codes + + codebook_class = EuclideanCodebook + + self._codebook = codebook_class( + dim=codebook_dim, + num_codebooks=heads if separate_codebook_per_head else 1, + codebook_size=codebook_size, + kmeans_init=kmeans_init, + kmeans_iters=kmeans_iters, + decay=decay, + eps=eps, + threshold_ema_dead_code=threshold_ema_dead_code, + use_ddp=sync_codebook, + learnable_codebook=has_codebook_orthogonal_loss, + sample_codebook_temp=sample_codebook_temp) + + self.codebook_size = codebook_size + + self.accept_image_fmap = accept_image_fmap + self.channel_last = channel_last + + @property + def codebook(self): + codebook = self._codebook.embed + if self.separate_codebook_per_head: + return codebook + + return rearrange(codebook, '1 ... -> ...') + + def forward(self, x, weight=None, verbose=False): + device, heads, is_multiheaded = x.device, self.heads, self.heads > 1 + + need_transpose = not self.channel_last and not self.accept_image_fmap + + if self.accept_image_fmap: + height, width = x.shape[-2:] + x = rearrange(x, 'b c h w -> b (h w) c') + + if need_transpose: + x = rearrange(x, 'b d n -> b n d') + + x = self.project_in(x) + + if is_multiheaded: + ein_rhs_eq = 'h b n d' if self.separate_codebook_per_head else '1 (b h) n d' + x = rearrange(x, f'b n (h d) -> {ein_rhs_eq}', h=heads) + + quantize, embed_ind = self._codebook(x, weight, verbose) + + if self.training: + quantize = x + (quantize - x).detach() + + loss = torch.tensor([0.], device=device, requires_grad=self.training) + + if self.training: + if self.commitment_weight > 0: + commit_loss = F.mse_loss(quantize.detach(), x) + loss = loss + commit_loss * self.commitment_weight + + if self.orthogonal_reg_weight > 0: + codebook = self._codebook.embed + + if self.orthogonal_reg_active_codes_only: + # only calculate orthogonal loss for the activated codes for this batch + unique_code_ids = torch.unique(embed_ind) + codebook = codebook[unique_code_ids] + + num_codes = codebook.shape[0] + if exists(self.orthogonal_reg_max_codes + ) and num_codes > self.orthogonal_reg_max_codes: + rand_ids = torch.randperm( + num_codes, + device=device)[:self.orthogonal_reg_max_codes] + codebook = codebook[rand_ids] + + orthogonal_reg_loss = orthogonal_loss_fn(codebook) + loss = loss + orthogonal_reg_loss * self.orthogonal_reg_weight + + if is_multiheaded: + if self.separate_codebook_per_head: + quantize = rearrange(quantize, 'h b n d -> b n (h d)', h=heads) + embed_ind = rearrange(embed_ind, 'h b n -> b n h', h=heads) + else: + quantize = rearrange( + quantize, '1 (b h) n d -> b n (h d)', h=heads) + embed_ind = rearrange(embed_ind, '1 (b h) n -> b n h', h=heads) + + quantize = self.project_out(quantize) + + if need_transpose: + quantize = rearrange(quantize, 'b n d -> b d n') + + if self.accept_image_fmap: + quantize = rearrange( + quantize, 'b (h w) c -> b c h w', h=height, w=width) + embed_ind = rearrange( + embed_ind, 'b (h w) ... -> b h w ...', h=height, w=width) + + return quantize, embed_ind, loss diff --git a/modelscope/models/cv/nerf_recon_vq_compression/renderer.py b/modelscope/models/cv/nerf_recon_vq_compression/renderer.py new file mode 100644 index 00000000..03a96870 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/renderer.py @@ -0,0 +1,211 @@ +import os +import sys + +import imageio +import numpy as np +import torch +from tqdm.auto import tqdm + +from .dataloader.ray_utils import get_rays, ndc_rays_blender +from .network.tensoRF import (AlphaGridMask, TensorCP, TensorVM, TensorVMSplit, + raw2alpha) +from .network.tensoRF_VQ import TensorVMSplitVQ +from .utils import rgb_lpips, rgb_ssim, visualize_depth_numpy + + +def OctreeRender_trilinear_fast(rays, + tensorf, + chunk=4096, + N_samples=-1, + ndc_ray=False, + white_bg=True, + is_train=False, + device='cuda', + **kwargs): + + rgbs, depth_maps = [], [] + N_rays_all = rays.shape[0] + for chunk_idx in range(N_rays_all // chunk + int(N_rays_all % chunk > 0)): + rays_chunk = rays[chunk_idx * chunk:(chunk_idx + 1) * chunk].to(device) + + rgb_map, depth_map = tensorf( + rays_chunk, + is_train=is_train, + white_bg=white_bg, + ndc_ray=ndc_ray, + N_samples=N_samples, + **kwargs) + + rgbs.append(rgb_map) + depth_maps.append(depth_map) + + return torch.cat(rgbs), None, torch.cat(depth_maps), None, None + + +@torch.no_grad() +def evaluation(test_dataset, + tensorf, + renderer, + savePath=None, + N_vis=5, + prtx='', + N_samples=-1, + white_bg=False, + ndc_ray=False, + compute_extra_metrics=True, + device='cuda', + im_save=False): + if prtx is not None and len(prtx) > 0: + prtx = prtx + '_' + result_path = f'{savePath}/{prtx}res.txt' + PSNRs, rgb_maps, depth_maps = [], [], [] + ssims, l_alex, l_vgg = [], [], [] + if savePath is not None: + os.makedirs(savePath, exist_ok=True) + os.makedirs(savePath + '/rgbd', exist_ok=True) + + try: + tqdm._instances.clear() + except Exception: + pass + + near_far = test_dataset.near_far + img_eval_interval = 1 if N_vis < 0 else max( + test_dataset.all_rays.shape[0] // N_vis, 1) + # img_eval_interval = max(img_eval_interval, test_dataset.all_rays.shape[0]//49) + idxs = list(range(0, test_dataset.all_rays.shape[0], img_eval_interval)) + for idx, samples in tqdm( + enumerate(test_dataset.all_rays[0::img_eval_interval]), + file=sys.stdout): + + W, H = test_dataset.img_wh + rays = samples.view(-1, samples.shape[-1]) + + rgb_map, _, depth_map, _, _ = renderer( + rays, + tensorf, + chunk=4096, + N_samples=N_samples, + ndc_ray=ndc_ray, + white_bg=white_bg, + device=device) + rgb_map = rgb_map.clamp(0.0, 1.0) + + rgb_map, depth_map = rgb_map.reshape(H, W, 3).cpu(), depth_map.reshape( + H, W).cpu() + + depth_map, _ = visualize_depth_numpy(depth_map.numpy(), near_far) + if len(test_dataset.all_rgbs): + gt_rgb = test_dataset.all_rgbs[idxs[idx]].view(H, W, 3) + loss = torch.mean((rgb_map - gt_rgb)**2) + PSNRs.append(-10.0 * np.log(loss.item()) / np.log(10.0)) + + if compute_extra_metrics: + ssim = rgb_ssim(rgb_map, gt_rgb, 1) + l_a = rgb_lpips(gt_rgb.numpy(), rgb_map.numpy(), 'alex', + tensorf.device) + l_v = rgb_lpips(gt_rgb.numpy(), rgb_map.numpy(), 'vgg', + tensorf.device) + ssims.append(ssim) + l_alex.append(l_a) + l_vgg.append(l_v) + + rgb_map = (rgb_map.numpy() * 255).astype('uint8') + # rgb_map = np.concatenate((rgb_map, depth_map), axis=1) + rgb_maps.append(rgb_map) + depth_maps.append(depth_map) + if savePath is not None and im_save: + imageio.imwrite(f'{savePath}/{prtx}{idx:03d}.png', rgb_map) + rgb_map = np.concatenate((rgb_map, depth_map), axis=1) + imageio.imwrite(f'{savePath}/rgbd/{prtx}{idx:03d}.png', rgb_map) + if savePath is not None: + imageio.mimwrite( + f'{savePath}/{prtx}video.mp4', + np.stack(rgb_maps), + fps=30, + quality=10) + imageio.mimwrite( + f'{savePath}/{prtx}depthvideo.mp4', + np.stack(depth_maps), + fps=30, + quality=10) + + if PSNRs: + psnr = np.mean(np.asarray(PSNRs)) + if compute_extra_metrics: + ssim = np.mean(np.asarray(ssims)) + l_a = np.mean(np.asarray(l_alex)) + l_v = np.mean(np.asarray(l_vgg)) + if savePath is not None: + np.savetxt(result_path, np.asarray([psnr, ssim, l_a, l_v])) + else: + if savePath is not None: + np.savetxt(result_path, np.asarray([psnr])) + + return PSNRs + + +@torch.no_grad() +def render_path(test_dataset, + tensorf, + c2ws, + renderer, + savePath=None, + prtx='', + N_samples=-1, + white_bg=False, + ndc_ray=False, + device='cuda'): + rgb_maps, depth_maps = [], [] + os.makedirs(savePath, exist_ok=True) + os.makedirs(savePath + '/rgbd', exist_ok=True) + + try: + tqdm._instances.clear() + except Exception: + pass + + near_far = test_dataset.near_far + for idx, c2w in tqdm(enumerate(c2ws)): + + W, H = test_dataset.img_wh + + c2w = torch.FloatTensor(c2w) + rays_o, rays_d = get_rays(test_dataset.directions, + c2w) # both (h*w, 3) + if ndc_ray: + rays_o, rays_d = ndc_rays_blender(H, W, test_dataset.focal[0], 1.0, + rays_o, rays_d) + rays = torch.cat([rays_o, rays_d], 1) # (h*w, 6) + + rgb_map, _, depth_map, _, _ = renderer( + rays, + tensorf, + chunk=8192, + N_samples=N_samples, + ndc_ray=ndc_ray, + white_bg=white_bg, + device=device) + rgb_map = rgb_map.clamp(0.0, 1.0) + + rgb_map, depth_map = rgb_map.reshape(H, W, 3).cpu(), depth_map.reshape( + H, W).cpu() + + depth_map, _ = visualize_depth_numpy(depth_map.numpy(), near_far) + + rgb_map = (rgb_map.numpy() * 255).astype('uint8') + # rgb_map = np.concatenate((rgb_map, depth_map), axis=1) + rgb_maps.append(rgb_map) + depth_maps.append(depth_map) + if savePath is not None: + imageio.imwrite(f'{savePath}/{prtx}{idx:03d}.png', rgb_map) + rgb_map = np.concatenate((rgb_map, depth_map), axis=1) + imageio.imwrite(f'{savePath}/rgbd/{prtx}{idx:03d}.png', rgb_map) + + imageio.mimwrite( + f'{savePath}/{prtx}video.mp4', np.stack(rgb_maps), fps=30, quality=8) + imageio.mimwrite( + f'{savePath}/{prtx}depthvideo.mp4', + np.stack(depth_maps), + fps=30, + quality=8) diff --git a/modelscope/models/cv/nerf_recon_vq_compression/utils.py b/modelscope/models/cv/nerf_recon_vq_compression/utils.py new file mode 100644 index 00000000..2addb5f8 --- /dev/null +++ b/modelscope/models/cv/nerf_recon_vq_compression/utils.py @@ -0,0 +1,269 @@ +import cv2 +import numpy as np +import plyfile +import scipy.signal +import skimage.measure +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.transforms as T +from PIL import Image + + +def mse2psnr(x): + return -10. * torch.log(x) / torch.log(torch.Tensor([10.])) + + +def visualize_depth_numpy(depth, minmax=None, cmap=cv2.COLORMAP_JET): + """ + depth: (H, W) + """ + + x = np.nan_to_num(depth) # change nan to 0 + if minmax is None: + mi = np.min(x[x > 0]) # get minimum positive depth (ignore background) + ma = np.max(x) + else: + mi, ma = minmax + + x = (x - mi) / (ma - mi + 1e-8) # normalize to 0~1 + x = (255 * x).astype(np.uint8) + x_ = cv2.applyColorMap(x, cmap) + return x_, [mi, ma] + + +def init_log(log, keys): + for key in keys: + log[key] = torch.tensor([0.0], dtype=float) + return log + + +def visualize_depth(depth, minmax=None, cmap=cv2.COLORMAP_JET): + """ + depth: (H, W) + """ + if type(depth) is not np.ndarray: + depth = depth.cpu().numpy() + + x = np.nan_to_num(depth) # change nan to 0 + if minmax is None: + mi = np.min(x[x > 0]) # get minimum positive depth (ignore background) + ma = np.max(x) + else: + mi, ma = minmax + + x = (x - mi) / (ma - mi + 1e-8) # normalize to 0~1 + x = (255 * x).astype(np.uint8) + x_ = Image.fromarray(cv2.applyColorMap(x, cmap)) + x_ = T.ToTensor()(x_) # (3, H, W) + return x_, [mi, ma] + + +def N_to_reso(n_voxels, bbox): + xyz_min, xyz_max = bbox + dim = len(xyz_min) + voxel_size = ((xyz_max - xyz_min).prod() / n_voxels).pow(1 / dim) + return ((xyz_max - xyz_min) / voxel_size).long().tolist() + + +def cal_n_samples(reso, step_ratio=0.5): + return int(np.linalg.norm(reso) / step_ratio) + + +__LPIPS__ = {} + + +def init_lpips(net_name, device): + assert net_name in ['alex', 'vgg'] + import lpips + print(f'init_lpips: lpips_{net_name}') + return lpips.LPIPS(net=net_name, version='0.1').eval().to(device) + + +def rgb_lpips(np_gt, np_im, net_name, device): + if net_name not in __LPIPS__: + __LPIPS__[net_name] = init_lpips(net_name, device) + gt = torch.from_numpy(np_gt).permute([2, 0, 1]).contiguous().to(device) + im = torch.from_numpy(np_im).permute([2, 0, 1]).contiguous().to(device) + return __LPIPS__[net_name](gt, im, normalize=True).item() + + +def findItem(items, target): + for one in items: + if one[:len(target)] == target: + return one + return None + + +''' Evaluation metrics (ssim, lpips) +''' + + +def rgb_ssim(img0, + img1, + max_val, + filter_size=11, + filter_sigma=1.5, + k1=0.01, + k2=0.03, + return_map=False): + # Modified from https://github.com/google/mipnerf/blob/16e73dfdb52044dcceb47cda5243a686391a6e0f/internal/math.py#L58 + assert len(img0.shape) == 3 + assert img0.shape[-1] == 3 + assert img0.shape == img1.shape + + # Construct a 1D Gaussian blur filter. + hw = filter_size // 2 + shift = (2 * hw - filter_size + 1) / 2 + f_i = ((np.arange(filter_size) - hw + shift) / filter_sigma)**2 + filt = np.exp(-0.5 * f_i) + filt /= np.sum(filt) + + # Blur in x and y (faster than the 2D convolution). + def convolve2d(z, f): + return scipy.signal.convolve2d(z, f, mode='valid') + + def filt_fn(z): + return np.stack([ + convolve2d(convolve2d(z[..., i], filt[:, None]), filt[None, :]) + for i in range(z.shape[-1]) + ], -1) + + mu0 = filt_fn(img0) + mu1 = filt_fn(img1) + mu00 = mu0 * mu0 + mu11 = mu1 * mu1 + mu01 = mu0 * mu1 + sigma00 = filt_fn(img0**2) - mu00 + sigma11 = filt_fn(img1**2) - mu11 + sigma01 = filt_fn(img0 * img1) - mu01 + + # Clip the variances and covariances to valid values. + # Variance must be non-negative: + sigma00 = np.maximum(0., sigma00) + sigma11 = np.maximum(0., sigma11) + sigma01 = np.sign(sigma01) * np.minimum( + np.sqrt(sigma00 * sigma11), np.abs(sigma01)) + c1 = (k1 * max_val)**2 + c2 = (k2 * max_val)**2 + numer = (2 * mu01 + c1) * (2 * sigma01 + c2) + denom = (mu00 + mu11 + c1) * (sigma00 + sigma11 + c2) + ssim_map = numer / denom + ssim = np.mean(ssim_map) + return ssim_map if return_map else ssim + + +class TVLoss(nn.Module): + + def __init__(self, TVLoss_weight=1): + super(TVLoss, self).__init__() + self.TVLoss_weight = TVLoss_weight + + def forward(self, x): + batch_size = x.size()[0] + h_x = x.size()[2] + w_x = x.size()[3] + count_h = self._tensor_size(x[:, :, 1:, :]) + count_w = self._tensor_size(x[:, :, :, 1:]) + h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, :h_x - 1, :]), 2).sum() + w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, :w_x - 1]), 2).sum() + return self.TVLoss_weight * 2 * (h_tv / count_h + + w_tv / count_w) / batch_size + + def _tensor_size(self, t): + return t.size()[1] * t.size()[2] * t.size()[3] + + +def convert_sdf_samples_to_ply( + pytorch_3d_sdf_tensor, + ply_filename_out, + bbox, + level=0.5, + offset=None, + scale=None, +): + """ + Convert sdf samples to .ply + + :param pytorch_3d_sdf_tensor: a torch.FloatTensor of shape (n,n,n) + :voxel_grid_origin: a list of three floats: the bottom, left, down origin of the voxel grid + :voxel_size: float, the size of the voxels + :ply_filename_out: string, path of the filename to save to + + This function adapted from: https://github.com/RobotLocomotion/spartan + """ + + numpy_3d_sdf_tensor = pytorch_3d_sdf_tensor.numpy() + voxel_size = list( + (bbox[1] - bbox[0]) / np.array(pytorch_3d_sdf_tensor.shape)) + + verts, faces, normals, values = skimage.measure.marching_cubes( + numpy_3d_sdf_tensor, level=level, spacing=voxel_size) + faces = faces[..., ::-1] # inverse face orientation + + # transform from voxel coordinates to camera coordinates + # note x and y are flipped in the output of marching_cubes + mesh_points = np.zeros_like(verts) + mesh_points[:, 0] = bbox[0, 0] + verts[:, 0] + mesh_points[:, 1] = bbox[0, 1] + verts[:, 1] + mesh_points[:, 2] = bbox[0, 2] + verts[:, 2] + + # apply additional offset and scale + if scale is not None: + mesh_points = mesh_points / scale + if offset is not None: + mesh_points = mesh_points - offset + + # try writing to the ply file + + num_verts = verts.shape[0] + num_faces = faces.shape[0] + + verts_tuple = np.zeros((num_verts, ), + dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) + + for i in range(0, num_verts): + verts_tuple[i] = tuple(mesh_points[i, :]) + + faces_building = [] + for i in range(0, num_faces): + faces_building.append(((faces[i, :].tolist(), ))) + faces_tuple = np.array( + faces_building, dtype=[('vertex_indices', 'i4', (3, ))]) + + el_verts = plyfile.PlyElement.describe(verts_tuple, 'vertex') + el_faces = plyfile.PlyElement.describe(faces_tuple, 'face') + + ply_data = plyfile.PlyData([el_verts, el_faces]) + print('saving mesh to %s' % (ply_filename_out)) + ply_data.write(ply_filename_out) + + +class Timing: + """ + Timing environment + usage: + with Timing("message"): + your commands here + will print CUDA runtime in ms + """ + + def __init__(self, name, debug=False): + self.name = name + self.debug = debug + + def __enter__(self): + if not self.debug: + return + + self.start = torch.cuda.Event(enable_timing=True) + self.end = torch.cuda.Event(enable_timing=True) + self.start.record() + + def __exit__(self, type, value, traceback): + if not self.debug: + return + + self.end.record() + torch.cuda.synchronize() + print(self.name, 'elapsed', self.start.elapsed_time(self.end), 'ms') diff --git a/modelscope/models/multi_modal/stable_diffusion/stable_diffusion.py b/modelscope/models/multi_modal/stable_diffusion/stable_diffusion.py index 88cb4969..6b829485 100644 --- a/modelscope/models/multi_modal/stable_diffusion/stable_diffusion.py +++ b/modelscope/models/multi_modal/stable_diffusion/stable_diffusion.py @@ -6,6 +6,7 @@ from typing import Callable, List, Optional, Union import torch import torch.nn.functional as F from diffusers import AutoencoderKL, DDPMScheduler, UNet2DConditionModel +from packaging import version from transformers import CLIPTextModel, CLIPTokenizer from modelscope.metainfo import Models @@ -34,6 +35,7 @@ class StableDiffusion(TorchModel): """ super().__init__(model_dir, *args, **kwargs) revision = kwargs.pop('revision', None) + xformers_enable = kwargs.pop('xformers_enable', False) self.lora_tune = kwargs.pop('lora_tune', False) self.dreambooth_tune = kwargs.pop('dreambooth_tune', False) @@ -66,6 +68,18 @@ class StableDiffusion(TorchModel): self.unet.requires_grad_(False) self.unet = self.unet.to(self.device) + # xformers accelerate memory efficient attention + if xformers_enable: + import xformers + + xformers_version = version.parse(xformers.__version__) + if xformers_version == version.parse('0.0.16'): + logger.warn( + 'xFormers 0.0.16 cannot be used for training in some GPUs. ' + 'If you observe problems during training, please update xFormers to at least 0.0.17.' + ) + self.unet.enable_xformers_memory_efficient_attention() + def tokenize_caption(self, captions): """ Convert caption text to token data. diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index 543a2330..3a0ead63 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -76,6 +76,7 @@ if TYPE_CHECKING: DocumentGroundedDialogRerankModel) from .xlm_roberta import XLMRobertaConfig, XLMRobertaModel from .llama import LlamaForTextGeneration, LlamaConfig, LlamaModel, LlamaTokenizer, LlamaTokenizerFast + from .llama2 import Llama2ForTextGeneration, Llama2Config, Llama2Model, Llama2Tokenizer, Llama2TokenizerFast else: _import_structure = { @@ -172,6 +173,10 @@ else: 'LlamaForTextGeneration', 'LlamaConfig', 'LlamaModel', 'LlamaTokenizer', 'LlamaTokenizerFast' ], + 'llama2': [ + 'Llama2ForTextGeneration', 'Llama2Config', 'Llama2Model', + 'Llama2Tokenizer', 'Llama2TokenizerFast' + ], } import sys diff --git a/modelscope/models/nlp/chatglm/configuration.py b/modelscope/models/nlp/chatglm/configuration.py index 18fdca0f..5ecf3484 100644 --- a/modelscope/models/nlp/chatglm/configuration.py +++ b/modelscope/models/nlp/chatglm/configuration.py @@ -1,9 +1,10 @@ """ ChatGLM model configuration """ from transformers.configuration_utils import PretrainedConfig -from transformers.utils import logging -logger = logging.get_logger(__name__) +from modelscope.utils import logger as logging + +logger = logging.get_logger() class ChatGLMConfig(PretrainedConfig): diff --git a/modelscope/models/nlp/chatglm/quantization.py b/modelscope/models/nlp/chatglm/quantization.py index 9994d9c4..4e568c71 100644 --- a/modelscope/models/nlp/chatglm/quantization.py +++ b/modelscope/models/nlp/chatglm/quantization.py @@ -6,9 +6,10 @@ from typing import List import torch from torch.nn import Linear from torch.nn.parameter import Parameter -from transformers.utils import logging -logger = logging.get_logger(__name__) +from modelscope.utils import logger as logging + +logger = logging.get_logger() try: from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up diff --git a/modelscope/models/nlp/chatglm/text_generation.py b/modelscope/models/nlp/chatglm/text_generation.py index 53f8f105..95ea33db 100644 --- a/modelscope/models/nlp/chatglm/text_generation.py +++ b/modelscope/models/nlp/chatglm/text_generation.py @@ -24,11 +24,12 @@ from transformers.modeling_outputs import ( from transformers.modeling_utils import PreTrainedModel from transformers.utils import (add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_model_forward, logging) + add_start_docstrings_to_model_forward) from modelscope.metainfo import Models from modelscope.models import MODELS, Model, TorchModel from modelscope.outputs import OutputKeys +from modelscope.utils import logger as logging from modelscope.utils.constant import Tasks from .configuration import ChatGLMConfig from .tokenization import ChatGLMTokenizer @@ -41,7 +42,7 @@ if sys.platform != 'darwin': torch._C._jit_override_can_fuse_on_cpu(True) torch._C._jit_override_can_fuse_on_gpu(True) -logger = logging.get_logger(__name__) +logger = logging.get_logger() _CHECKPOINT_FOR_DOC = 'THUDM/ChatGLM-6B' _CONFIG_FOR_DOC = 'ChatGLM6BConfig' @@ -931,7 +932,6 @@ class ChatGLMModel(ChatGLMPreTrainedModel): self.num_attention_heads, self.hidden_size // self.num_attention_heads) # seq_len, b, nh, hidden_size - print('#########################:', past_key_values.device) past_key_values = self.dropout(past_key_values) past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2) # past_key_values = [(v[0], v[1]) for v in past_key_values] diff --git a/modelscope/models/nlp/chatglm/tokenization.py b/modelscope/models/nlp/chatglm/tokenization.py index 77bcde55..f5f8cd0c 100644 --- a/modelscope/models/nlp/chatglm/tokenization.py +++ b/modelscope/models/nlp/chatglm/tokenization.py @@ -6,9 +6,11 @@ import numpy as np import sentencepiece as spm from transformers.tokenization_utils import PreTrainedTokenizer from transformers.tokenization_utils_base import BatchEncoding, EncodedInput -from transformers.utils import PaddingStrategy, logging +from transformers.utils import PaddingStrategy -logger = logging.get_logger(__name__) +from modelscope.utils import logger as logging + +logger = logging.get_logger() PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { 'THUDM/chatglm-6b': 2048, diff --git a/modelscope/models/nlp/chatglm2/configuration.py b/modelscope/models/nlp/chatglm2/configuration.py index b10db870..ab40de0e 100644 --- a/modelscope/models/nlp/chatglm2/configuration.py +++ b/modelscope/models/nlp/chatglm2/configuration.py @@ -1,12 +1,14 @@ """ ChatGLM model configuration """ -from transformers.configuration_utils import PretrainedConfig -from transformers.utils import logging +from transformers import PretrainedConfig -logger = logging.get_logger(__name__) +from modelscope.utils import logger as logging + +logger = logging.get_logger() class ChatGLM2Config(PretrainedConfig): + model_type = 'chatglm' def __init__(self, num_layers=28, @@ -24,7 +26,6 @@ class ChatGLM2Config(PretrainedConfig): post_layer_norm=True, add_bias_linear=False, add_qkv_bias=False, - interleaved_qkv=False, bias_dropout_fusion=True, multi_query_attention=False, multi_query_group_num=1, @@ -32,8 +33,11 @@ class ChatGLM2Config(PretrainedConfig): attention_softmax_in_fp32=True, fp32_residual_connection=False, quantization_bit=0, + pre_seq_len=None, + prefix_projection=False, **kwargs): self.num_layers = num_layers + self.vocab_size = padded_vocab_size self.padded_vocab_size = padded_vocab_size self.hidden_size = hidden_size self.ffn_hidden_size = ffn_hidden_size @@ -55,4 +59,6 @@ class ChatGLM2Config(PretrainedConfig): self.attention_softmax_in_fp32 = attention_softmax_in_fp32 self.fp32_residual_connection = fp32_residual_connection self.quantization_bit = quantization_bit + self.pre_seq_len = pre_seq_len + self.prefix_projection = prefix_projection super().__init__(**kwargs) diff --git a/modelscope/models/nlp/chatglm2/quantization.py b/modelscope/models/nlp/chatglm2/quantization.py index 612c9e4b..a1e8b8f2 100644 --- a/modelscope/models/nlp/chatglm2/quantization.py +++ b/modelscope/models/nlp/chatglm2/quantization.py @@ -1,15 +1,14 @@ import base64 import bz2 import ctypes -from functools import partial from typing import List import torch -from torch.nn import Linear from torch.nn.parameter import Parameter -from transformers.utils import logging -logger = logging.get_logger(__name__) +from modelscope.utils import logger as logging + +logger = logging.get_logger() try: from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up diff --git a/modelscope/models/nlp/chatglm2/text_generation.py b/modelscope/models/nlp/chatglm2/text_generation.py index 3c510c7a..1052b875 100644 --- a/modelscope/models/nlp/chatglm2/text_generation.py +++ b/modelscope/models/nlp/chatglm2/text_generation.py @@ -2,10 +2,9 @@ import copy import math -import re import sys import warnings -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple import torch import torch.nn.functional as F @@ -20,12 +19,13 @@ from transformers.generation.utils import (GenerationConfig, from transformers.modeling_outputs import (BaseModelOutputWithPast, CausalLMOutputWithPast) from transformers.modeling_utils import PreTrainedModel -from transformers.utils import logging +from modelscope import Model, TorchModel from modelscope.metainfo import Models -from modelscope.models import MODELS, Model, TorchModel from modelscope.outputs import OutputKeys +from modelscope.utils import logger as logging from modelscope.utils.constant import Tasks +from ... import MODELS from .configuration import ChatGLM2Config # flags required to enable jit fusion kernels @@ -36,7 +36,7 @@ if sys.platform != 'darwin': torch._C._jit_override_can_fuse_on_cpu(True) torch._C._jit_override_can_fuse_on_gpu(True) -logger = logging.get_logger(__name__) +logger = logging.get_logger() _CHECKPOINT_FOR_DOC = 'THUDM/ChatGLM2-6B' _CONFIG_FOR_DOC = 'ChatGLM6BConfig' @@ -61,17 +61,50 @@ class InvalidScoreLogitsProcessor(LogitsProcessor): return scores +class PrefixEncoder(torch.nn.Module): + """ + The torch.nn model to encode the prefix + Input shape: (batch-size, prefix-length) + Output shape: (batch-size, prefix-length, 2*layers*hidden) + """ + + def __init__(self, config: ChatGLM2Config): + super().__init__() + self.prefix_projection = config.prefix_projection + if self.prefix_projection: + # Use a two-layer MLP to encode the prefix + kv_size = config.num_layers * config.kv_channels * config.multi_query_group_num * 2 + self.embedding = torch.nn.Embedding(config.pre_seq_len, kv_size) + self.trans = torch.nn.Sequential( + torch.nn.Linear(kv_size, config.hidden_size), torch.nn.Tanh(), + torch.nn.Linear(config.hidden_size, kv_size)) + else: + self.embedding = torch.nn.Embedding( + config.pre_seq_len, config.num_layers * config.kv_channels + * config.multi_query_group_num * 2) + + def forward(self, prefix: torch.Tensor): + if self.prefix_projection: + prefix_tokens = self.embedding(prefix) + past_key_values = self.trans(prefix_tokens) + else: + past_key_values = self.embedding(prefix) + return past_key_values + + def split_tensor_along_last_dim( tensor: torch.Tensor, num_partitions: int, contiguous_split_chunks: bool = False, ) -> List[torch.Tensor]: """Split a tensor along its last dimension. + Arguments: tensor: input tensor. num_partitions: number of partitions to split the tensor contiguous_split_chunks: If True, make each chunk contiguous in memory. + Returns: A list of Tensors """ @@ -92,7 +125,7 @@ class RotaryEmbedding(nn.Module): def __init__(self, dim, original_impl=False, device=None, dtype=None): super().__init__() inv_freq = 1.0 / (10000**( - torch.arange(0, dim, 2, device=device, dtype=dtype) / dim)) + torch.arange(0, dim, 2, device=device).to(dtype=dtype) / dim)) self.register_buffer('inv_freq', inv_freq) self.dim = dim self.original_impl = original_impl @@ -104,6 +137,7 @@ class RotaryEmbedding(nn.Module): device: torch.device, base: int = 10000): """Enhanced Transformer with Rotary Position Embedding. + Derived from: https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/ transformers/rope/__init__.py. MIT License: https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/license. @@ -325,6 +359,7 @@ class CoreAttention(torch.nn.Module): class SelfAttention(torch.nn.Module): """Parallel self-attention layer abstract class. + Self-attention layer takes input with size [s, b, h] and returns output of the same size. """ @@ -421,9 +456,9 @@ class SelfAttention(torch.nn.Module): self.num_multi_query_groups_per_partition, self.hidden_size_per_attention_head)) else: - new_tensor_shape = mixed_x_layer.size()[:-1] + ( - self.num_attention_heads_per_partition, # noqa - 3 * self.hidden_size_per_attention_head) # noqa + new_tensor_shape = mixed_x_layer.size()[:-1] + \ + (self.num_attention_heads_per_partition, # noqa + 3 * self.hidden_size_per_attention_head) # noqa mixed_x_layer = mixed_x_layer.view(*new_tensor_shape) # [sq, b, np, 3 * hn] --> 3 [sq, b, np, hn] @@ -436,11 +471,11 @@ class SelfAttention(torch.nn.Module): key_layer = apply_rotary_pos_emb(key_layer, rotary_pos_emb) # adjust key and value for inference + if kv_cache is not None: + cache_k, cache_v = kv_cache + key_layer = torch.cat((cache_k, key_layer), dim=0) + value_layer = torch.cat((cache_v, value_layer), dim=0) if use_cache: - if kv_cache is not None: - cache_k, cache_v = kv_cache - key_layer = torch.cat((cache_k, key_layer), dim=0) - value_layer = torch.cat((cache_v, value_layer), dim=0) kv_cache = (key_layer, value_layer) else: kv_cache = None @@ -487,6 +522,7 @@ def _config_to_kwargs(args): class MLP(torch.nn.Module): """MLP. + MLP will take the input with h hidden state, project it to 4*h hidden dimension, perform nonlinear transformation, and project the state back into h hidden dimension. @@ -530,6 +566,7 @@ class MLP(torch.nn.Module): class GLMBlock(torch.nn.Module): """A single transformer layer. + Transformer layer takes input with size [s, b, h] and returns an output of the same size. """ @@ -642,6 +679,8 @@ class GLMTransformer(torch.nn.Module): device=device, dtype=config.torch_dtype) + self.gradient_checkpointing = False + def _get_layer(self, layer_number): return self.layers[layer_number] @@ -657,6 +696,13 @@ class GLMTransformer(torch.nn.Module): if not kv_caches: kv_caches = [None for _ in range(self.num_layers)] presents = () if use_cache else None + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning_once( + '`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...' + ) + use_cache = False + all_self_attentions = None all_hidden_states = () if output_hidden_states else None for index in range(self.num_layers): @@ -664,13 +710,18 @@ class GLMTransformer(torch.nn.Module): all_hidden_states = all_hidden_states + (hidden_states, ) layer = self._get_layer(index) - - hidden_states, kv_cache = layer( - hidden_states, - attention_mask, - rotary_pos_emb, - kv_cache=kv_caches[index], - use_cache=use_cache) + if self.gradient_checkpointing and self.training: + layer_ret = torch.utils.checkpoint.checkpoint( + layer, hidden_states, attention_mask, rotary_pos_emb, + kv_caches[index], use_cache) + else: + layer_ret = layer( + hidden_states, + attention_mask, + rotary_pos_emb, + kv_cache=kv_caches[index], + use_cache=use_cache) + hidden_states, kv_cache = layer_ret if use_cache: presents = presents + (kv_cache, ) @@ -724,7 +775,7 @@ class ChatGLMPreTrainedModel(TorchModel, PreTrainedModel): dim=-1) # noqa if padding_mask is not None: full_attention_mask = full_attention_mask * padding_mask.unsqueeze( - 1) # noqa + 1) if not past_length and padding_mask is not None: full_attention_mask -= padding_mask.unsqueeze(-1) - 1 full_attention_mask = (full_attention_mask < 0.5).bool() @@ -739,7 +790,7 @@ class ChatGLMPreTrainedModel(TorchModel, PreTrainedModel): return position_ids def _set_gradient_checkpointing(self, module, value=False): - if isinstance(module, ChatGLMModel): + if isinstance(module, GLMTransformer): module.gradient_checkpointing = value @classmethod @@ -801,6 +852,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel): if device is not None: init_kwargs['device'] = device self.embedding = init_method(Embedding, config, **init_kwargs) + self.num_layers = config.num_layers + self.multi_query_group_num = config.multi_query_group_num + self.kv_channels = config.kv_channels # Rotary positional embeddings self.seq_length = config.seq_length @@ -821,7 +875,30 @@ class ChatGLMModel(ChatGLMPreTrainedModel): bias=False, dtype=config.torch_dtype, **init_kwargs) - self.gradient_checkpointing = False + self.pre_seq_len = config.pre_seq_len + self.prefix_projection = config.prefix_projection + if self.pre_seq_len is not None: + for param in self.parameters(): + param.requires_grad = False + self.prefix_tokens = torch.arange(self.pre_seq_len).long() + self.prefix_encoder = PrefixEncoder(config) + self.dropout = torch.nn.Dropout(0.1) + + def get_input_embeddings(self): + return self.embedding.word_embeddings + + def get_prompt(self, batch_size, device, dtype=torch.half): + prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size, + -1).to(device) + past_key_values = self.prefix_encoder(prefix_tokens).type(dtype) + past_key_values = past_key_values.view(batch_size, self.pre_seq_len, + self.num_layers * 2, + self.multi_query_group_num, + self.kv_channels) + # seq_len, b, nh, hidden_size + past_key_values = self.dropout(past_key_values) + past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2) + return past_key_values def forward( self, @@ -847,6 +924,21 @@ class ChatGLMModel(ChatGLMPreTrainedModel): if inputs_embeds is None: inputs_embeds = self.embedding(input_ids) + if self.pre_seq_len is not None: + if past_key_values is None: + past_key_values = self.get_prompt( + batch_size=batch_size, + device=input_ids.device, + dtype=inputs_embeds.dtype) + if attention_mask is not None: + attention_mask = torch.cat( + [ + attention_mask.new_ones( # noqa + (batch_size, self.pre_seq_len)), + attention_mask # noqa + ], # noqa + dim=-1) # noqa + if full_attention_mask is None: if (attention_mask is not None and not attention_mask.all()) or (past_key_values @@ -923,7 +1015,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): attention_mask, # noqa attention_mask.new_ones( (attention_mask.shape[0], 1)) # noqa - ], + ], # noqa dim=-1) # noqa # update position ids @@ -1032,6 +1124,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct beam_idx at every generation step. + Output shares the same memory storage as `past`. """ return tuple(( @@ -1048,11 +1141,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): tokenizer, query: str, history: List[Tuple[str, str]] = None): - prompt = '' - for i, (old_query, response) in enumerate(history): - prompt += '[Round {}]\n\n问:{}\n\n答:{}\n\n'.format( - i + 1, old_query, response) - prompt += '[Round {}]\n\n问:{}\n\n答:'.format(len(history) + 1, query) + prompt = tokenizer.build_prompt(query, history=history) inputs = tokenizer([prompt], return_tensors='pt') inputs = inputs.to(self.device) return inputs @@ -1076,17 +1165,17 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): return inputs @torch.no_grad() - def chat(self, - tokenizer, - query: str, - history: List[Tuple[str, str]] = None, - max_length: int = 2048, - num_beams=1, - do_sample=True, - top_p=0.8, - temperature=0.8, - logits_processor=None, - **kwargs): + def _chat(self, + tokenizer, + query: str, + history: List[Tuple[str, str]] = None, + max_length: int = 8192, + num_beams=1, + do_sample=True, + top_p=0.8, + temperature=0.8, + logits_processor=None, + **kwargs): if history is None: history = [] if logits_processor is None: @@ -1107,7 +1196,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): response = tokenizer.decode(outputs) response = self.process_response(response) history = history + [(query, response)] - return {OutputKeys.RESPONSE: response, OutputKeys.HISTORY: history} + return response, history @torch.no_grad() def stream_chat(self, @@ -1115,7 +1204,7 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): query: str, history: List[Tuple[str, str]] = None, past_key_values=None, - max_length: int = 2048, + max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8, @@ -1142,6 +1231,8 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): tokenizer, query, history=history) if past_key_values is not None: past_length = past_key_values[0][0].shape[0] + if self.transformer.pre_seq_len is not None: + past_length -= self.transformer.pre_seq_len inputs.position_ids += past_length attention_mask = inputs.attention_mask attention_mask = torch.cat( @@ -1157,12 +1248,13 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): outputs, past_key_values = outputs outputs = outputs.tolist()[0][len(inputs['input_ids'][0]):] response = tokenizer.decode(outputs) - response = self.process_response(response) - new_history = history + [(query, response)] - if return_past_key_values: - yield response, new_history, past_key_values - else: - yield response, new_history + if response and response[-1] != '�': + response = self.process_response(response) + new_history = history + [(query, response)] + if return_past_key_values: + yield response, new_history, past_key_values + else: + yield response, new_history @torch.no_grad() def stream_generate( @@ -1298,3 +1390,39 @@ class ChatGLM2ForConditionalGeneration(ChatGLMPreTrainedModel): device=device, **kwargs) return self + + def chat(self, input: Dict, tokenizer) -> Dict: + text = input['text'] + history = input['history'] + # args + if 'max_length' in input: + max_length = input['max_length'] + else: + max_length = 2048 + + if 'temperature' in input: + temperature = input['temperature'] + else: + temperature = 0.95 + + if 'num_beams' in input: + num_beams = input['num_beams'] + else: + num_beams = 1 + + if 'do_sample' in input: + do_sample = input['do_sample'] + else: + do_sample = True + + if type(history) == torch.Tensor: + history = history.tolist() + response, history = self._chat( + tokenizer, + text, + history, + max_length=max_length, + temperature=temperature, + num_beams=num_beams, + do_sample=do_sample) + return {OutputKeys.RESPONSE: response, OutputKeys.HISTORY: history} diff --git a/modelscope/models/nlp/chatglm2/tokenization.py b/modelscope/models/nlp/chatglm2/tokenization.py index 5036d881..7014dc9c 100644 --- a/modelscope/models/nlp/chatglm2/tokenization.py +++ b/modelscope/models/nlp/chatglm2/tokenization.py @@ -1,13 +1,10 @@ -"""Tokenization classes for ChatGLM.""" import os from typing import Dict, List, Optional, Union from sentencepiece import SentencePieceProcessor -from transformers.tokenization_utils import PreTrainedTokenizer +from transformers import PreTrainedTokenizer from transformers.tokenization_utils_base import BatchEncoding, EncodedInput -from transformers.utils import PaddingStrategy, logging - -logger = logging.get_logger(__name__) +from transformers.utils import PaddingStrategy class SPTokenizer: @@ -21,7 +18,7 @@ class SPTokenizer: self.n_words: int = self.sp_model.vocab_size() self.bos_id: int = self.sp_model.bos_id() self.eos_id: int = self.sp_model.eos_id() - self.pad_id: int = self.sp_model.eos_id() + self.pad_id: int = self.sp_model.unk_id() assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() special_tokens = ['[MASK]', '[gMASK]', '[sMASK]', 'sop', 'eop'] @@ -62,7 +59,9 @@ class SPTokenizer: def convert_id_to_token(self, index): """Converts an index (integer) in a token (str) using the vocab.""" - if index in self.index_special_tokens: + if index in self.index_special_tokens or index in [ + self.eos_id, self.bos_id, self.pad_id + ] or index < 0: return '' return self.sp_model.IdToPiece(index) @@ -76,6 +75,7 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer): super().__init__(padding_side=padding_side, **kwargs) self.name = 'GLMTokenizer' + self.vocab_file = vocab_file self.tokenizer = SPTokenizer(vocab_file) self.special_tokens = { '': self.tokenizer.bos_id, @@ -91,12 +91,16 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer): @property def pad_token(self) -> str: - return '' + return '' @property def pad_token_id(self): return self.get_command('') + @property + def eos_token(self) -> str: + return '' + @property def eos_token_id(self): return self.get_command('') @@ -131,11 +135,13 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory, filename_prefix=None): """ Save the vocabulary and special tokens file to a directory. + Args: save_directory (`str`): The directory in which to save the vocabulary. filename_prefix (`str`, *optional*): An optional prefix to add to the named of the saved files. + Returns: `Tuple(str)`: Paths to the files saved. """ @@ -157,6 +163,16 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer): prefix_tokens = [self.get_command('[gMASK]'), self.get_command('sop')] return prefix_tokens + def build_prompt(self, query, history=None): + if history is None: + history = [] + prompt = '' + for i, (old_query, response) in enumerate(history): + prompt += '[Round {}]\n\n问:{}\n\n答:{}\n\n'.format( + i + 1, old_query, response) + prompt += '[Round {}]\n\n问:{}\n\n答:'.format(len(history) + 1, query) + return prompt + def build_inputs_with_special_tokens( self, token_ids_0: List[int], @@ -164,13 +180,16 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer): """ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A BERT sequence has the following format: + - single sequence: `[CLS] X [SEP]` - pair of sequences: `[CLS] A [SEP] B [SEP]` + Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added. token_ids_1 (`List[int]`, *optional*): Optional second list of IDs for sequence pairs. + Returns: `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens. """ @@ -192,16 +211,19 @@ class ChatGLM2Tokenizer(PreTrainedTokenizer): ) -> dict: """ Pad encoded inputs (on left/right and up to predefined length or max length in the batch) + Args: encoded_inputs: Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`). max_length: maximum length of the returned list and optionally padding length (see below). Will truncate by taking into account the special tokens. padding_strategy: PaddingStrategy to use for padding. + - PaddingStrategy.LONGEST Pad to the longest sequence in the batch - PaddingStrategy.MAX_LENGTH: Pad to the max length (default) - PaddingStrategy.DO_NOT_PAD: Do not pad The tokenizer padding sides are defined in self.padding_side: + - 'left': pads on the left of the sequences - 'right': pads on the right of the sequences pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value. diff --git a/modelscope/models/nlp/fid_plug/backbone.py b/modelscope/models/nlp/fid_plug/backbone.py index 70c45633..5dcddcc1 100644 --- a/modelscope/models/nlp/fid_plug/backbone.py +++ b/modelscope/models/nlp/fid_plug/backbone.py @@ -26,10 +26,11 @@ import torch.nn.functional as F from torch import Tensor, nn from torch.nn.init import xavier_uniform_ from transformers import (BertConfig, BertModel, BertTokenizer, RobertaConfig, - RobertaModel, RobertaTokenizer, logging) + RobertaModel, RobertaTokenizer) from transformers.activations import ACT2FN from transformers.modeling_utils import PreTrainedModel +from modelscope.utils import logger as logging from .configuration import PlugConfig CONFIG_NAME = 'config.json' @@ -729,7 +730,7 @@ class PlugForConditionalGeneration(PlugPreTrainedModel): def __init__(self, config, checkpoint=None, dataset: str = 'default'): super().__init__(config) - self.logger = logging.get_logger(__name__) + self.logger = logging.get_logger() self.config = config if config.encoder == 'roberta': tokenizer = RobertaTokenizer.from_pretrained( diff --git a/modelscope/models/nlp/llama/backbone.py b/modelscope/models/nlp/llama/backbone.py index 120581a9..16be099f 100755 --- a/modelscope/models/nlp/llama/backbone.py +++ b/modelscope/models/nlp/llama/backbone.py @@ -35,7 +35,7 @@ from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger from .configuration import LlamaConfig -logger = get_logger(__name__) +logger = get_logger() _CONFIG_FOR_DOC = 'LlamaConfig' diff --git a/modelscope/models/nlp/llama/tokenization.py b/modelscope/models/nlp/llama/tokenization.py index b3d24dd9..cd423683 100644 --- a/modelscope/models/nlp/llama/tokenization.py +++ b/modelscope/models/nlp/llama/tokenization.py @@ -29,7 +29,7 @@ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer from modelscope.utils.logger import get_logger # This file is mainly copied from the llama code of transformers -logger = get_logger(__name__) +logger = get_logger() VOCAB_FILES_NAMES = {'vocab_file': 'tokenizer.model'} diff --git a/modelscope/models/nlp/llama/tokenization_fast.py b/modelscope/models/nlp/llama/tokenization_fast.py index 7aa0ac1b..13696b59 100644 --- a/modelscope/models/nlp/llama/tokenization_fast.py +++ b/modelscope/models/nlp/llama/tokenization_fast.py @@ -31,7 +31,7 @@ if is_sentencepiece_available(): else: LlamaTokenizer = None -logger = get_logger(__name__) +logger = get_logger() VOCAB_FILES_NAMES = { 'vocab_file': 'tokenizer.model', 'tokenizer_file': 'tokenizer.json' diff --git a/modelscope/models/nlp/llama2/__init__.py b/modelscope/models/nlp/llama2/__init__.py new file mode 100644 index 00000000..12a295b6 --- /dev/null +++ b/modelscope/models/nlp/llama2/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .configuration import Llama2Config + from .text_generation import Llama2ForTextGeneration + from .backbone import Llama2Model + from .tokenization import Llama2Tokenizer + from .tokenization_fast import Llama2TokenizerFast +else: + _import_structure = { + 'configuration': ['Llama2Config'], + 'text_generation': ['Llama2ForTextGeneration'], + 'backbone': ['Llama2Model'], + 'tokenization': ['Llama2Tokenizer'], + 'tokenization_fast': ['Llama2TokenizerFast'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/nlp/llama2/backbone.py b/modelscope/models/nlp/llama2/backbone.py new file mode 100755 index 00000000..ee0d742b --- /dev/null +++ b/modelscope/models/nlp/llama2/backbone.py @@ -0,0 +1,795 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" PyTorch LLaMA model.""" +import math +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch import nn +from transformers.activations import ACT2FN +from transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.modeling_utils import PreTrainedModel + +from modelscope import Model, TorchModel +from modelscope.metainfo import Models +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger +from ... import MODELS +from .configuration import Llama2Config + +logger = get_logger() + +_CONFIG_FOR_DOC = 'Llama2Config' + + +# This file is mainly copied from the llama code of transformers +# Copied from transformers.models.bart.modeling_bart._make_causal_mask +def _make_causal_mask(input_ids_shape: torch.Size, + dtype: torch.dtype, + device: torch.device, + past_key_values_length: int = 0): + """ + Make causal mask used for bi-directional self-attention. + """ + bsz, tgt_len = input_ids_shape + mask = torch.full((tgt_len, tgt_len), + torch.finfo(dtype).min, + device=device) + mask_cond = torch.arange(mask.size(-1), device=device) + mask.masked_fill_(mask_cond < (mask_cond + 1).view(mask.size(-1), 1), 0) + mask = mask.to(dtype) + + if past_key_values_length > 0: + _tmp_value = torch.zeros( + tgt_len, past_key_values_length, dtype=dtype, device=device) + mask = torch.cat([_tmp_value, mask], dim=-1) + return mask[None, None, :, :].expand(bsz, 1, tgt_len, + tgt_len + past_key_values_length) + + +# Copied from transformers.models.bart.modeling_bart._expand_mask +def _expand_mask(mask: torch.Tensor, + dtype: torch.dtype, + tgt_len: Optional[int] = None): + """ + Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. + """ + bsz, src_len = mask.size() + tgt_len = tgt_len if tgt_len is not None else src_len + + expanded_mask = mask[:, None, None, :].expand(bsz, 1, tgt_len, + src_len).to(dtype) + + inverted_mask = 1.0 - expanded_mask + + return inverted_mask.masked_fill( + inverted_mask.to(torch.bool), + torch.finfo(dtype).min) + + +class LlamaRMSNorm(nn.Module): + + def __init__(self, hidden_size, eps=1e-6): + """ + LlamaRMSNorm is equivalent to T5LayerNorm + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + + +class LlamaRotaryEmbedding(torch.nn.Module): + + def __init__(self, + dim, + max_position_embeddings=2048, + base=10000, + device=None): + super().__init__() + + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + inv_freq = 1.0 / (self.base**(torch.arange( + 0, self.dim, 2).float().to(device) / self.dim)) # noqa + self.register_buffer('inv_freq', inv_freq) + + # Build here to make `torch.jit.trace` work. + self._set_cos_sin_cache( + seq_len=max_position_embeddings, + device=self.inv_freq.device, + dtype=torch.get_default_dtype()) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange( + self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype) + + freqs = torch.einsum('i,j->ij', t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer( + 'cos_cached', + emb.cos()[None, None, :, :].to(dtype), + persistent=False) + self.register_buffer( + 'sin_cached', + emb.sin()[None, None, :, :].to(dtype), + persistent=False) + + def forward(self, x, seq_len=None): + # x: [bs, num_attention_heads, seq_len, head_size] + if seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache( + seq_len=seq_len, device=x.device, dtype=x.dtype) + + return ( + self.cos_cached[:, :, :seq_len, ...].to(dtype=x.dtype), + self.sin_cached[:, :, :seq_len, ...].to(dtype=x.dtype), + ) + + +class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding): + """LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev""" + + def __init__(self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0): + self.scaling_factor = scaling_factor + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange( + self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype) + t = t / self.scaling_factor + + freqs = torch.einsum('i,j->ij', t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer( + 'cos_cached', + emb.cos()[None, None, :, :].to(dtype), + persistent=False) + self.register_buffer( + 'sin_cached', + emb.sin()[None, None, :, :].to(dtype), + persistent=False) + + +class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding): + """LlamaRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla""" + + def __init__(self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0): + self.scaling_factor = scaling_factor + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + + if seq_len > self.max_position_embeddings: + base = self.base * ( + (self.scaling_factor * seq_len / self.max_position_embeddings) + - (self.scaling_factor - 1))**( + self.dim / (self.dim - 2)) + inv_freq = 1.0 / (base**(torch.arange( + 0, self.dim, 2).float().to(device) / self.dim)) # noqa + self.register_buffer('inv_freq', inv_freq) + + t = torch.arange( + self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype) + + freqs = torch.einsum('i,j->ij', t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer( + 'cos_cached', + emb.cos()[None, None, :, :].to(dtype), + persistent=False) + self.register_buffer( + 'sin_cached', + emb.sin()[None, None, :, :].to(dtype), + persistent=False) + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., :x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2:] + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb(q, k, cos, sin, position_ids): + # The first two dimensions of cos and sin are always 1, so we can `squeeze` them. + cos = cos.squeeze(1).squeeze(0) # [seq_len, dim] + sin = sin.squeeze(1).squeeze(0) # [seq_len, dim] + cos = cos[position_ids].unsqueeze(1) # [bs, 1, seq_len, dim] + sin = sin[position_ids].unsqueeze(1) # [bs, 1, seq_len, dim] + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + + +class LlamaMLP(nn.Module): + + def __init__(self, config): + super().__init__() + self.pretraining_tp = config.pretraining_tp + self.hidden_size = config.hidden_size + self.intermediate_size = config.intermediate_size + self.gate_proj = nn.Linear( + self.hidden_size, self.intermediate_size, bias=False) + self.up_proj = nn.Linear( + self.hidden_size, self.intermediate_size, bias=False) + self.down_proj = nn.Linear( + self.intermediate_size, self.hidden_size, bias=False) + self.act_fn = ACT2FN[config.hidden_act] + + def forward(self, x): + if self.pretraining_tp > 1: + slice = self.intermediate_size // self.pretraining_tp + gate_proj_slices = self.gate_proj.weight.split(slice, dim=0) + up_proj_slices = self.up_proj.weight.split(slice, dim=0) + down_proj_slices = self.down_proj.weight.split(slice, dim=1) + + gate_proj = torch.cat([ + F.linear(x, gate_proj_slices[i]) + for i in range(self.pretraining_tp) + ], + dim=-1) # noqa + up_proj = torch.cat([ + F.linear(x, up_proj_slices[i]) + for i in range(self.pretraining_tp) + ], + dim=-1) # noqa + + intermediate_states = (self.act_fn(gate_proj) * up_proj).split( + slice, dim=2) + down_proj = [ + F.linear(intermediate_states[i], down_proj_slices[i]) + for i in range(self.pretraining_tp) + ] + down_proj = sum(down_proj) + else: + down_proj = self.down_proj( + self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + + return down_proj + + +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, + None, :, :].expand(batch, + num_key_value_heads, + n_rep, slen, head_dim) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, + head_dim) + + +class LlamaAttention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config: Llama2Config): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.hidden_size // self.num_heads + self.num_key_value_heads = config.num_key_value_heads + self.num_key_value_groups = self.num_heads // self.num_key_value_heads + self.pretraining_tp = config.pretraining_tp + self.max_position_embeddings = config.max_position_embeddings + + if (self.head_dim * self.num_heads) != self.hidden_size: + raise ValueError( + f'hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}' + f' and `num_heads`: {self.num_heads}).') + self.q_proj = nn.Linear( + self.hidden_size, self.num_heads * self.head_dim, bias=False) + self.k_proj = nn.Linear( + self.hidden_size, + self.num_key_value_heads * self.head_dim, + bias=False) + self.v_proj = nn.Linear( + self.hidden_size, + self.num_key_value_heads * self.head_dim, + bias=False) + self.o_proj = nn.Linear( + self.num_heads * self.head_dim, self.hidden_size, bias=False) + self._init_rope() + + def _init_rope(self): + if self.config.rope_scaling is None: + self.rotary_emb = LlamaRotaryEmbedding( + self.head_dim, + max_position_embeddings=self.max_position_embeddings) + else: + scaling_type = self.config.rope_scaling['type'] + scaling_factor = self.config.rope_scaling['factor'] + if scaling_type == 'linear': + self.rotary_emb = LlamaLinearScalingRotaryEmbedding( + self.head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor) + elif scaling_type == 'dynamic': + self.rotary_emb = LlamaDynamicNTKScalingRotaryEmbedding( + self.head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor) + else: + raise ValueError(f'Unknown RoPE scaling type {scaling_type}') + + def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, + self.head_dim).transpose(1, 2).contiguous() + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: bool = False, + use_cache: bool = False, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], + Optional[Tuple[torch.Tensor]]]: + bsz, q_len, _ = hidden_states.size() + + if self.pretraining_tp > 1: + key_value_slicing = (self.num_key_value_heads + * self.head_dim) // self.pretraining_tp + query_slices = self.q_proj.weight.split( + (self.num_heads * self.head_dim) // self.pretraining_tp, dim=0) + key_slices = self.k_proj.weight.split(key_value_slicing, dim=0) + value_slices = self.v_proj.weight.split(key_value_slicing, dim=0) + + query_states = [ + F.linear(hidden_states, query_slices[i]) + for i in range(self.pretraining_tp) + ] + query_states = torch.cat(query_states, dim=-1) + + key_states = [ + F.linear(hidden_states, key_slices[i]) + for i in range(self.pretraining_tp) + ] + key_states = torch.cat(key_states, dim=-1) + + value_states = [ + F.linear(hidden_states, value_slices[i]) + for i in range(self.pretraining_tp) + ] + value_states = torch.cat(value_states, dim=-1) + + else: + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view(bsz, q_len, self.num_heads, + self.head_dim).transpose(1, 2) + key_states = key_states.view(bsz, q_len, self.num_key_value_heads, + self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, q_len, self.num_key_value_heads, + self.head_dim).transpose(1, 2) + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + query_states, key_states = apply_rotary_pos_emb( + query_states, key_states, cos, sin, position_ids) + + if past_key_value is not None: + # reuse k, v, self_attention + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + + past_key_value = (key_states, value_states) if use_cache else None + + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + attn_weights = torch.matmul(query_states, key_states.transpose( + 2, 3)) / math.sqrt(self.head_dim) + + if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): + raise ValueError( + f'Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is' + f' {attn_weights.size()}') + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f'Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}' + ) + attn_weights = attn_weights + attention_mask + + # upcast attention to fp32 + attn_weights = nn.functional.softmax( + attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_output = torch.matmul(attn_weights, value_states) + + if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): + raise ValueError( + f'`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is' + f' {attn_output.size()}') + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) + + if self.pretraining_tp > 1: + attn_output = attn_output.split( + self.hidden_size // self.pretraining_tp, dim=2) + o_proj_slices = self.o_proj.weight.split( + self.hidden_size // self.pretraining_tp, dim=1) + attn_output = sum([ + F.linear(attn_output[i], o_proj_slices[i]) + for i in range(self.pretraining_tp) + ]) + else: + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + +class LlamaDecoderLayer(nn.Module): + + def __init__(self, config: Llama2Config): + super().__init__() + self.hidden_size = config.hidden_size + self.self_attn = LlamaAttention(config=config) + self.mlp = LlamaMLP(config) + self.input_layernorm = LlamaRMSNorm( + config.hidden_size, eps=config.rms_norm_eps) + self.post_attention_layernorm = LlamaRMSNorm( + config.hidden_size, eps=config.rms_norm_eps) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, + torch.FloatTensor]]]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`, *optional*): attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + """ + + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + hidden_states = residual + hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + + outputs = (hidden_states, ) + + if output_attentions: + outputs += (self_attn_weights, ) + + if use_cache: + outputs += (present_key_value, ) + + return outputs + + +class LlamaPreTrainedModel(TorchModel, PreTrainedModel): + config_class = Llama2Config + base_model_prefix = 'model' + supports_gradient_checkpointing = True + _no_split_modules = ['LlamaDecoderLayer'] + _skip_keys_device_placement = 'past_key_values' + + def __init__(self, config, **kwargs): + super().__init__(config.name_or_path, **kwargs) + super(Model, self).__init__(config) + + def _init_weights(self, module): + std = self.config.initializer_range + if isinstance(module, nn.Linear): + module.weight.data.normal_(mean=0.0, std=std) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.Embedding): + module.weight.data.normal_(mean=0.0, std=std) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + + def _set_gradient_checkpointing(self, module, value=False): + if isinstance(module, Llama2Model): + module.gradient_checkpointing = value + + @classmethod + def _instantiate(cls, **kwargs): + """Instantiate the model. + + Args: + kwargs: Input args. + model_dir: The model dir used to load the checkpoint and the label information. + num_labels: An optional arg to tell the model how many classes to initialize. + Method will call utils.parse_label_mapping if num_labels not supplied. + If num_labels is not found, the model will use the default setting (2 classes). + + Returns: + The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained + """ + + model_dir = kwargs.pop('model_dir', None) + if model_dir is None: + config = Llama2Config(**kwargs) + model = cls(config) + else: + model = super(Model, cls).from_pretrained( + pretrained_model_name_or_path=model_dir, **kwargs) + model.model_dir = model_dir + return model + + +@MODELS.register_module(Tasks.backbone, module_name=Models.llama2) +class Llama2Model(LlamaPreTrainedModel): + """ + Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`LlamaDecoderLayer`] + + Args: + config: Llama2Config + """ + + def __init__(self, config: Llama2Config): + super().__init__(config) + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + + self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, + self.padding_idx) + self.layers = nn.ModuleList([ + LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers) + ]) + self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) + + self.gradient_checkpointing = False + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.embed_tokens + + def set_input_embeddings(self, value): + self.embed_tokens = value + + # Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask + def _prepare_decoder_attention_mask(self, attention_mask, input_shape, + inputs_embeds, past_key_values_length): + # create causal mask + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + combined_attention_mask = None + if input_shape[-1] > 1: + combined_attention_mask = _make_causal_mask( + input_shape, + inputs_embeds.dtype, + device=inputs_embeds.device, + past_key_values_length=past_key_values_length, + ) + + if attention_mask is not None: + # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] + expanded_attn_mask = _expand_mask( + attention_mask, inputs_embeds.dtype, + tgt_len=input_shape[-1]).to(inputs_embeds.device) + combined_attention_mask = ( + expanded_attn_mask if combined_attention_mask is None else + expanded_attn_mask + combined_attention_mask) + + return combined_attention_mask + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, BaseModelOutputWithPast]: + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + 'You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time' + ) + elif input_ids is not None: + batch_size, seq_length = input_ids.shape + elif inputs_embeds is not None: + batch_size, seq_length, _ = inputs_embeds.shape + else: + raise ValueError( + 'You have to specify either decoder_input_ids or decoder_inputs_embeds' + ) + + seq_length_with_past = seq_length + past_key_values_length = 0 + + if past_key_values is not None: + past_key_values_length = past_key_values[0][0].shape[2] + seq_length_with_past = seq_length_with_past + past_key_values_length + + if position_ids is None: + device = input_ids.device if input_ids is not None else inputs_embeds.device + position_ids = torch.arange( + past_key_values_length, + seq_length + past_key_values_length, + dtype=torch.long, + device=device) + position_ids = position_ids.unsqueeze(0).view(-1, seq_length) + else: + position_ids = position_ids.view(-1, seq_length).long() + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + # embed positions + if attention_mask is None: + attention_mask = torch.ones((batch_size, seq_length_with_past), + dtype=torch.bool, + device=inputs_embeds.device) + attention_mask = self._prepare_decoder_attention_mask( + attention_mask, (batch_size, seq_length), inputs_embeds, + past_key_values_length) + + hidden_states = inputs_embeds + + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning_once( + '`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...' + ) + use_cache = False + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = () if use_cache else None + + for idx, decoder_layer in enumerate(self.layers): + if output_hidden_states: + all_hidden_states += (hidden_states, ) + + past_key_value = past_key_values[ + idx] if past_key_values is not None else None + + if self.gradient_checkpointing and self.training: + + def create_custom_forward(module): + + def custom_forward(*inputs): + # None for past_key_value + return module(*inputs, output_attentions, None) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(decoder_layer), + hidden_states, + attention_mask, + position_ids, + None, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache += ( + layer_outputs[2 if output_attentions else 1], ) + + if output_attentions: + all_self_attns += (layer_outputs[1], ) + + hidden_states = self.norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states, ) + + next_cache = next_decoder_cache if use_cache else None + if not return_dict: + return tuple( + v for v in + [hidden_states, next_cache, all_hidden_states, all_self_attns] + if v is not None) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) diff --git a/modelscope/models/nlp/llama2/configuration.py b/modelscope/models/nlp/llama2/configuration.py new file mode 100644 index 00000000..c9f38fe4 --- /dev/null +++ b/modelscope/models/nlp/llama2/configuration.py @@ -0,0 +1,165 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" LLaMA model configuration""" + +from transformers.configuration_utils import PretrainedConfig + +from modelscope.utils.logger import get_logger + +logger = get_logger() + +LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {} + + +class Llama2Config(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the LLaMA-7B. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 32000): + Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`LlamaModel`] + hidden_size (`int`, *optional*, defaults to 4096): + Dimension of the hidden representations. + intermediate_size (`int`, *optional*, defaults to 11008): + Dimension of the MLP representations. + num_hidden_layers (`int`, *optional*, defaults to 32): + Number of hidden layers in the Transformer encoder. + num_attention_heads (`int`, *optional*, defaults to 32): + Number of attention heads for each attention layer in the Transformer encoder. + num_key_value_heads (`int`, *optional*): + This is the number of key_value heads that should be used to implement Grouped Query Attention. If + `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if + `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When + converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed + by meanpooling all the original heads within that group. For more details checkout [this + paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to + `num_attention_heads`. + pretraining_tp (`int`, *optional*, defaults to `1`): + Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this + document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is + necessary to ensure exact reproducibility of the pretraining results. Please refer to [this + issue](https://github.com/pytorch/pytorch/issues/76232). + hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + max_position_embeddings (`int`, *optional*, defaults to 2048): + The maximum sequence length that this model might ever be used with. Typically set this to something large + just in case (e.g., 512 or 1024 or 2048). + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + rms_norm_eps (`float`, *optional*, defaults to 1e-12): + The epsilon used by the rms normalization layers. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if `config.is_decoder=True`. + tie_word_embeddings(`bool`, *optional*, defaults to `False`): + Whether to tie weight embeddings + rope_scaling (`Dict`, *optional*): + Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports three scaling + strategies: linear and dynamic. Their scaling factor must be an float greater than 1. The expected format + is `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update + `max_position_embeddings` to the expected new maximum. See the following thread for more information on how + these scaling strategies behave: + https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an + experimental feature, subject to breaking API changes in future versions. + """ + model_type = 'llama' + keys_to_ignore_at_inference = ['past_key_values'] + + def __init__( + self, + vocab_size=32000, + hidden_size=4096, + intermediate_size=11008, + num_hidden_layers=32, + num_attention_heads=32, + num_key_value_heads=None, + hidden_act='silu', + max_position_embeddings=2048, + initializer_range=0.02, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=0, + bos_token_id=1, + eos_token_id=2, + pretraining_tp=1, + tie_word_embeddings=False, + rope_scaling=None, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.pretraining_tp = pretraining_tp + self.use_cache = use_cache + self.rope_scaling = rope_scaling + self._rope_scaling_validation() + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) + + def _rope_scaling_validation(self): + """ + Validate the `rope_scaling` configuration. + """ + if self.rope_scaling is None: + return + + if not isinstance(self.rope_scaling, + dict) or len(self.rope_scaling) != 2: + raise ValueError( + '`rope_scaling` must be a dictionary with with two fields, `name` and `factor`, ' + f'got {self.rope_scaling}') + rope_scaling_type = self.rope_scaling.get('type', None) + rope_scaling_factor = self.rope_scaling.get('factor', None) + if rope_scaling_type is None or rope_scaling_type not in [ + 'linear', 'dynamic' + ]: + raise ValueError( + f"`rope_scaling`'s name field must be one of ['linear', 'dynamic'], got {rope_scaling_type}" + ) + if rope_scaling_factor is None or not isinstance( + rope_scaling_factor, float) or rope_scaling_factor <= 1.0: + raise ValueError( + f"`rope_scaling`'s factor field must be an float > 1, got {rope_scaling_factor}" + ) diff --git a/modelscope/models/nlp/llama2/text_generation.py b/modelscope/models/nlp/llama2/text_generation.py new file mode 100644 index 00000000..71ccaffe --- /dev/null +++ b/modelscope/models/nlp/llama2/text_generation.py @@ -0,0 +1,268 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch import nn +from torch.nn import CrossEntropyLoss +from transformers.modeling_outputs import CausalLMOutputWithPast + +from modelscope.metainfo import Models +from modelscope.outputs import OutputKeys +from modelscope.utils.constant import Tasks +from ... import MODELS +from .backbone import Llama2Model, LlamaPreTrainedModel + + +def get_chat_prompt(system: str, text: str, history: List[Tuple[str, str]], + max_length: int, tokenizer): + system_prompt = f'[INST] <>\n{system}\n<>\n\n' + system_ids = tokenizer(system_prompt, return_tensors='pt').input_ids + + text_prompt = f'{text.strip()} [/INST]' + text_ids = tokenizer(text_prompt, return_tensors='pt').input_ids + + prompt_length = system_ids.shape[-1] + text_ids.shape[-1] + if prompt_length > max_length: + raise RuntimeError( + f'prepend prompt length {prompt_length} is bigger than max_length {max_length}' + ) + + history_prompt = '' + history_ids_list = [] + # traverse history in reverse order + for user, bot in history[::-1]: + assert isinstance(user, str) + assert isinstance(bot, str) + round_prompt = f'{user.strip()} [/INST] {bot.strip()} [INST] ' + round_ids = tokenizer(round_prompt, return_tensors='pt').input_ids + if prompt_length + round_ids.shape[-1] > max_length: + # excess history should not be appended to the prompt + break + else: + history_prompt = round_prompt + history_prompt + history_ids_list = [round_ids] + history_ids_list + prompt_length += round_ids.shape[-1] + + prompt_list = [system_prompt, history_prompt, text_prompt] + prompt_ids_list = [system_ids] + history_ids_list + [text_ids] + + return ''.join(prompt_list), torch.cat(prompt_ids_list, dim=1) + + +# This file is mainly copied from the llama code of transformers +@MODELS.register_module(Tasks.text_generation, module_name=Models.llama2) +class Llama2ForTextGeneration(LlamaPreTrainedModel): + _tied_weights_keys = ['lm_head.weight'] + + def __init__(self, config): + super().__init__(config) + self.model = Llama2Model(config) + self.pretraining_tp = config.pretraining_tp + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear( + config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + + def set_decoder(self, decoder): + self.model = decoder + + def get_decoder(self): + return self.model + + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + r""" + Args: + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. + + Returns: + + """ + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else + self.config.output_hidden_states) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) + outputs = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + ) + + hidden_states = outputs[0] + if self.pretraining_tp > 1: + lm_head_slices = self.lm_head.weight.split( + self.vocab_size // self.pretraining_tp, dim=0) + logits = [ + F.linear(hidden_states, lm_head_slices[i]) + for i in range(self.pretraining_tp) + ] + logits = torch.cat(logits, dim=-1) + else: + logits = self.lm_head(hidden_states) + logits = logits.float() + + loss = None + if labels is not None: + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = CrossEntropyLoss() + shift_logits = shift_logits.view(-1, self.config.vocab_size) + shift_labels = shift_labels.view(-1) + # Enable model parallelism + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + + if not return_dict: + output = (logits, ) + outputs[1:] + return (loss, ) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + def prepare_inputs_for_generation(self, + input_ids, + past_key_values=None, + attention_mask=None, + inputs_embeds=None, + **kwargs): + if past_key_values: + input_ids = input_ids[:, -1:] + + position_ids = kwargs.get('position_ids', None) + if attention_mask is not None and position_ids is None: + # create position_ids on the fly for batch generation + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + if past_key_values: + position_ids = position_ids[:, -1].unsqueeze(-1) + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {'inputs_embeds': inputs_embeds} + else: + model_inputs = {'input_ids': input_ids} + + model_inputs.update({ + 'position_ids': position_ids, + 'past_key_values': past_key_values, + 'use_cache': kwargs.get('use_cache'), + 'attention_mask': attention_mask, + }) + return model_inputs + + @staticmethod + def _reorder_cache(past_key_values, beam_idx): + reordered_past = () + for layer_past in past_key_values: + reordered_past += (tuple( + past_state.index_select(0, beam_idx.to(past_state.device)) + for past_state in layer_past), ) + return reordered_past + + def chat(self, input: Dict, tokenizer) -> Dict: + import copy + gen_kwargs = copy.copy(input) + if 'text' not in input: + text: str = '' + else: + text: str = input['text'] + gen_kwargs.pop('text') + + if 'system' not in input: + system: str = '' + else: + system: str = input['system'] + gen_kwargs.pop('system') + + if 'history' not in input: + history = [] + else: + history: List[Tuple] = copy.copy(input['history']) + gen_kwargs.pop('history') + + if 'max_length' not in gen_kwargs: + gen_kwargs['max_length'] = 4096 + + prompt, prompt_ids = get_chat_prompt( + system=system, + text=text, + history=history, + max_length=gen_kwargs['max_length'], + tokenizer=tokenizer) + input_ids = prompt_ids.to(self.device) + generate_ids = self.generate(input_ids, **gen_kwargs) + # remove input tokens + generate_ids = generate_ids[:, input_ids.shape[1]:] + response = tokenizer.batch_decode( + generate_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=False)[0] + response = response.strip() + history.append((text, response)) + + return {OutputKeys.RESPONSE: response, OutputKeys.HISTORY: history} diff --git a/modelscope/models/nlp/llama2/tokenization.py b/modelscope/models/nlp/llama2/tokenization.py new file mode 100644 index 00000000..bb276621 --- /dev/null +++ b/modelscope/models/nlp/llama2/tokenization.py @@ -0,0 +1,410 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes for LLaMA.""" +import os +from shutil import copyfile +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +import sentencepiece as spm +from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer + +from modelscope.utils.logger import get_logger + +if TYPE_CHECKING: + from transformers.pipelines.conversational import Conversation + +logger = get_logger() + +VOCAB_FILES_NAMES = {'vocab_file': 'tokenizer.model'} + +PRETRAINED_VOCAB_FILES_MAP = { + 'vocab_file': { + 'hf-internal-testing/llama-tokenizer': + 'https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer.model', + }, + 'tokenizer_file': { + 'hf-internal-testing/llama-tokenizer': + 'https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer_config.json', + }, +} +PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { + 'hf-internal-testing/llama-tokenizer': 2048, +} +SPIECE_UNDERLINE = '▁' + +B_INST, E_INST = '[INST]', '[/INST]' +B_SYS, E_SYS = '<>\n', '\n<>\n\n' + +# fmt: off +DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your\ +answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure\ +that your responses are socially unbiased and positive in nature. + +If a question does not make any sense, or is not factually coherent, explain why instead of answering something not\ +correct. If you don't know the answer to a question, please don't share false information.""" +# fmt: on + + +class Llama2Tokenizer(PreTrainedTokenizer): + """ + Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is + no padding token in the original model. + + Args: + vocab_file (`str`): + Path to the vocabulary file. + legacy (`bool`, *optional*, defaults to `True`): + Whether or not the `legacy` behaviour of the tokenizer should be used. Legacy is before the merge of #24622 + which includes fixes to properly handle tokens that appear after special tokens. A simple example: + + - `legacy=True`: + ```python + >>> from transformers import T5Tokenizer + + >>> tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=True) + >>> tokenizer.encode("Hello .") + [8774, 32099, 3, 5, 1] + ``` + - `legacy=False`: + ```python + >>> from transformers import T5Tokenizer + + >>> tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False) + >>> tokenizer.encode("Hello .") # the extra space `[3]` is no longer here + [8774, 32099, 5, 1] + ``` + Checkout the pull request and the issue [here](https://github.com/huggingface/transformers/pull/24565) for + more details. + + """ + + vocab_files_names = VOCAB_FILES_NAMES + pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP + max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES + model_input_names = ['input_ids', 'attention_mask'] + + def __init__( + self, + vocab_file, + unk_token='', + bos_token='', + eos_token='', + pad_token=None, + sp_model_kwargs: Optional[Dict[str, Any]] = None, + add_bos_token=True, + add_eos_token=False, + clean_up_tokenization_spaces=False, + legacy=True, + **kwargs, + ): + self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs + bos_token = AddedToken( + bos_token, lstrip=False, rstrip=False) if isinstance( + bos_token, str) else bos_token + eos_token = AddedToken( + eos_token, lstrip=False, rstrip=False) if isinstance( + eos_token, str) else eos_token + unk_token = AddedToken( + unk_token, lstrip=False, rstrip=False) if isinstance( + unk_token, str) else unk_token + pad_token = AddedToken( + pad_token, lstrip=False, rstrip=False) if isinstance( + pad_token, str) else pad_token + super().__init__( + bos_token=bos_token, + eos_token=eos_token, + unk_token=unk_token, + pad_token=pad_token, + add_bos_token=add_bos_token, + add_eos_token=add_eos_token, + sp_model_kwargs=self.sp_model_kwargs, + clean_up_tokenization_spaces=clean_up_tokenization_spaces, + legacy=legacy, + **kwargs, + ) + if legacy: + logger.warning_once( + f'You are using the legacy behaviour of the {self.__class__}. ' + f'This means that tokens that come after special ' + f'tokens will not be properly handled. We recommend you to' + ' read the related pull request available at https://github.com/huggingface/transformers/pull/24565' + ) + self.legacy = legacy + self.vocab_file = vocab_file + self.add_bos_token = add_bos_token + self.add_eos_token = add_eos_token + self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) + self.sp_model.Load(vocab_file) + + def __getstate__(self): + state = self.__dict__.copy() + state['sp_model'] = None + state['sp_model_proto'] = self.sp_model.serialized_model_proto() + return state + + def __setstate__(self, d): + self.__dict__ = d + self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) + self.sp_model.LoadFromSerializedProto(self.sp_model_proto) + + @property + def vocab_size(self): + """Returns vocab size""" + return self.sp_model.get_piece_size() + + def get_vocab(self): + """Returns vocab as a dict""" + vocab = { + self.convert_ids_to_tokens(i): i + for i in range(self.vocab_size) + } + vocab.update(self.added_tokens_encoder) + return vocab + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize + def tokenize(self, text, **kwargs) -> List[str]: + # Replace the SPIECE_UNDERLINE with a space to make sure SPIECE_UNDERLINE is only used at + # the beginning of the text + if not self.legacy: + text = SPIECE_UNDERLINE + text.replace(SPIECE_UNDERLINE, ' ') + return super().tokenize(text, **kwargs) + + # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._tokenize + def _tokenize(self, text): + """ + Returns a tokenized string. + + Since the sentencepiece internal model always adds a SPIECE_UNDERLINE, at the beginning of the provided text, + we need to remove it by hand when the current text is a subsequence. This happens whenever the `self.tokenize` + function is called with specials tokens: the input is split on the special tokens, and each subsequence is + passed to `_tokenize`. Thus if a subsequence did not start with a `" "` or SPIECE_UNDERLINE, we have to remove + the extra `SPIECE_UNDERLINE` prepended. + """ + if not self.legacy: + is_first = text.startswith(SPIECE_UNDERLINE) + if is_first: + text = text[1:] + + tokens = self.sp_model.encode(text, out_type=str) + + if not self.legacy and not is_first and not text.startswith( + ' ') and tokens[0].startswith(SPIECE_UNDERLINE): + tokens = ([tokens[0][1:]] + if len(tokens[0]) > 1 else []) + tokens[1:] + return tokens + + def _convert_token_to_id(self, token): + """Converts a token (str) in an id using the vocab.""" + return self.sp_model.piece_to_id(token) + + def _convert_id_to_token(self, index): + """Converts an index (integer) in a token (str) using the vocab.""" + token = self.sp_model.IdToPiece(index) + return token + + def convert_tokens_to_string(self, tokens): + """Converts a sequence of tokens (string) in a single string.""" + current_sub_tokens = [] + out_string = '' + prev_is_special = False + for i, token in enumerate(tokens): + # make sure that special tokens are not decoded using sentencepiece model + if token in self.all_special_tokens: + if not prev_is_special and i != 0: + out_string += ' ' + out_string += self.sp_model.decode(current_sub_tokens) + token + prev_is_special = True + current_sub_tokens = [] + else: + current_sub_tokens.append(token) + prev_is_special = False + out_string += self.sp_model.decode(current_sub_tokens) + return out_string + + def save_vocabulary(self, + save_directory, + filename_prefix: Optional[str] = None) -> Tuple[str]: + """ + Save the vocabulary and special tokens file to a directory. + + Args: + save_directory (`str`): + The directory in which to save the vocabulary. + + Returns: + `Tuple(str)`: Paths to the files saved. + """ + if not os.path.isdir(save_directory): + logger.error( + f'Vocabulary path ({save_directory}) should be a directory') + return + out_vocab_file = os.path.join( + save_directory, (filename_prefix + '-' if filename_prefix else '') + + VOCAB_FILES_NAMES['vocab_file']) + + if os.path.abspath(self.vocab_file) != os.path.abspath( + out_vocab_file) and os.path.isfile(self.vocab_file): + copyfile(self.vocab_file, out_vocab_file) + elif not os.path.isfile(self.vocab_file): + with open(out_vocab_file, 'wb') as fi: + content_spiece_model = self.sp_model.serialized_model_proto() + fi.write(content_spiece_model) + + return (out_vocab_file, ) + + def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): + bos_token_id = [self.bos_token_id] if self.add_bos_token else [] + eos_token_id = [self.eos_token_id] if self.add_eos_token else [] + + output = bos_token_id + token_ids_0 + eos_token_id + + if token_ids_1 is not None: + output = output + bos_token_id + token_ids_1 + eos_token_id + + return output + + def get_special_tokens_mask( + self, + token_ids_0: List[int], + token_ids_1: Optional[List[int]] = None, + already_has_special_tokens: bool = False) -> List[int]: + """ + Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding + special tokens using the tokenizer `prepare_for_model` method. + + Args: + token_ids_0 (`List[int]`): + List of IDs. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (`bool`, *optional*, defaults to `False`): + Whether or not the token list is already formatted with special tokens for the model. + + Returns: + `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + """ + if already_has_special_tokens: + return super().get_special_tokens_mask( + token_ids_0=token_ids_0, + token_ids_1=token_ids_1, + already_has_special_tokens=True) + + bos_token_id = [1] if self.add_bos_token else [] + eos_token_id = [1] if self.add_eos_token else [] + + if token_ids_1 is None: + return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id + return bos_token_id + ( + [0] * len(token_ids_0)) + eos_token_id + bos_token_id + ( + [0] * len(token_ids_1)) + eos_token_id # noqa + + def create_token_type_ids_from_sequences( + self, + token_ids_0: List[int], + token_ids_1: Optional[List[int]] = None) -> List[int]: + """ + Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT + sequence pair mask has the following format: + + ``` + 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 + | first sequence | second sequence | + ``` + + if token_ids_1 is None, only returns the first portion of the mask (0s). + + Args: + token_ids_0 (`List[int]`): + List of ids. + token_ids_1 (`List[int]`, *optional*): + Optional second list of IDs for sequence pairs. + + Returns: + `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s). + """ + bos_token_id = [self.bos_token_id] if self.add_bos_token else [] + eos_token_id = [self.eos_token_id] if self.add_eos_token else [] + + output = [0] * len(bos_token_id + token_ids_0 + eos_token_id) + + if token_ids_1 is not None: + output += [1] * len(bos_token_id + token_ids_1 + eos_token_id) + + return output + + def _build_conversation_input_ids( + self, conversation: 'Conversation') -> List[int]: + """Builds the input ids for a conversation. + This is the format used in the provided examples. System prompts should be manually added at the beginning of + the conversation. If no system prompt is given, the `DEFAULT_SYSTEM_PROMPT` will be used. + ``` + [INST] B_SYS SytemPrompt E_SYS Prompt [/INST] Answer + [INST] Prompt [/INST] Answer + [INST] Prompt [/INST] + ``` + + If you want to use your own system prompt, make sure to use both `B_SYS` and `E_SYS` use the following: + ```python + >>> from transformers import Conversation + + >>> Conversation( + ... "<>\n Only answer with emojis, and charades\n<>\n\nHow can I build a house in 10 septs?" + ... ) + ``` + Args: + conversation (`Conversation`): + Conversation to build input ids for. + Returns: + `List[int]`: + Input ids for the conversation. + """ + dialogue = list(conversation.iter_texts()) + if not all([is_user for is_user, msg in dialogue[::2]]) or not all( + [not is_user for is_user, msg in dialogue[1::2]]): # noqa + raise ValueError( + "The model only supports 'user' and 'assistant' roles, " + 'starting with user and alternating (u/a/u/a/u...)') + + dialog_tokens: List[int] = [] + if len(conversation.past_user_inputs) > 0: + if not conversation.past_user_inputs[0].startswith( + B_SYS) or E_SYS not in conversation.past_user_inputs[0]: + conversation.past_user_inputs[0] = ( + B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + + conversation.past_user_inputs[0]) + elif not dialogue[0][1].startswith( + B_SYS) or E_SYS not in dialogue[0][1]: + dialogue[0] = (dialogue[0][0], B_SYS + DEFAULT_SYSTEM_PROMPT + + E_SYS + dialogue[0][1]) + + dialog_tokens += sum( + [[self.bos_token_id] + self.encode( + f'{B_INST} {(prompt[1]).strip()} {E_INST} {(answer[1]).strip()} ', + add_special_tokens=False) + [self.eos_token_id] + for prompt, answer in zip(dialogue[::2], dialogue[1::2])], + [], + ) + if not (dialogue[-1][0]): + raise ValueError( + f"Last message must be from user, got {dialogue[-1]['role']}") + dialog_tokens += [self.bos_token_id] + self.encode( + f'{B_INST} {(dialogue[-1][1]).strip()} {E_INST}', + add_special_tokens=False) + return dialog_tokens diff --git a/modelscope/models/nlp/llama2/tokenization_fast.py b/modelscope/models/nlp/llama2/tokenization_fast.py new file mode 100644 index 00000000..13862955 --- /dev/null +++ b/modelscope/models/nlp/llama2/tokenization_fast.py @@ -0,0 +1,251 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2020 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from shutil import copyfile +from typing import TYPE_CHECKING, Optional, Tuple + +from tokenizers import processors +from transformers.tokenization_utils_fast import PreTrainedTokenizerFast +from transformers.utils import is_sentencepiece_available +from transformers.utils.versions import require_version + +from modelscope.utils import logger as logging + +if TYPE_CHECKING: + from transformers.pipelines.conversational import Conversation + +require_version('tokenizers>=0.13.3') + +if is_sentencepiece_available(): + from .tokenization import Llama2Tokenizer +else: + Llama2Tokenizer = None + +logger = logging.get_logger() +VOCAB_FILES_NAMES = { + 'vocab_file': 'tokenizer.model', + 'tokenizer_file': 'tokenizer.json' +} + +B_INST, E_INST = '[INST]', '[/INST]' +B_SYS, E_SYS = '<>\n', '\n<>\n\n' + +# fmt: off +DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your\ +answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure\ +that your responses are socially unbiased and positive in nature. + +If a question does not make any sense, or is not factually coherent, explain why instead of answering something not\ +correct. If you don't know the answer to a question, please don't share false information.""" +# fmt: on + + +class Llama2TokenizerFast(PreTrainedTokenizerFast): + """ + Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. + + This uses notably ByteFallback and no normalization. + + ``` + from transformers import LlamaTokenizerFast + + tokenizer = LlaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer") + tokenizer.encode("Hello this is a test") + >>> [1, 15043, 445, 338, 263, 1243] + ``` + + If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or + call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the + values of the first token and final token of an encoded sequence will not be correct). For more details, checkout + [post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation. + + + This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should + refer to this superclass for more information regarding those methods. + + Args: + vocab_file (`str`): + [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that + contains the vocabulary necessary to instantiate a tokenizer. + tokenizer_file (`str`): + [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that + contains everything needed to load the tokenizer. + + clean_up_tokenization_spaces (`str`, *optional*, defaults to `False`): + Wether to cleanup spaces after decoding, cleanup consists in removing potential artifacts like extra + spaces. + + bos_token (`str`, *optional*, defaults to `""`): + The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. + + eos_token (`str`, *optional*, defaults to `""`): + The end of sequence token. + + unk_token (`str`, *optional*, defaults to `""`): + The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this + token instead. + """ + + vocab_files_names = VOCAB_FILES_NAMES + slow_tokenizer_class = Llama2Tokenizer + padding_side = 'left' + model_input_names = ['input_ids', 'attention_mask'] + + def __init__( + self, + vocab_file=None, + tokenizer_file=None, + clean_up_tokenization_spaces=False, + unk_token='', + bos_token='', + eos_token='', + add_bos_token=True, + add_eos_token=False, + **kwargs, + ): + super().__init__( + vocab_file=vocab_file, + tokenizer_file=tokenizer_file, + clean_up_tokenization_spaces=clean_up_tokenization_spaces, + unk_token=unk_token, + bos_token=bos_token, + eos_token=eos_token, + **kwargs, + ) + self._add_bos_token = add_bos_token + self._add_eos_token = add_eos_token + self.update_post_processor() + + self.vocab_file = vocab_file + self.can_save_slow_tokenizer = False if not self.vocab_file else True + + def update_post_processor(self): + """ + Updates the underlying post processor with the current `bos_token` and `eos_token`. + """ + bos = self.bos_token + bos_token_id = self.bos_token_id + + eos = self.eos_token + eos_token_id = self.eos_token_id + + single = f"{(bos+':0 ') * self.add_bos_token}$A:0{(' '+eos+':0') * self.add_eos_token}" + pair = f"{single}{(' '+bos+':1') * self.add_bos_token} $B:1{(' '+eos+':1') * self.add_eos_token}" + + special_tokens = [] + if self.add_bos_token: + special_tokens.append((bos, bos_token_id)) + if self.add_eos_token: + special_tokens.append((eos, eos_token_id)) + self._tokenizer.post_processor = processors.TemplateProcessing( + single=single, pair=pair, special_tokens=special_tokens) + + @property + def add_eos_token(self): + return self._add_eos_token + + @property + def add_bos_token(self): + return self._add_bos_token + + @add_eos_token.setter + def add_eos_token(self, value): + self._add_eos_token = value + self.update_post_processor() + + @add_bos_token.setter + def add_bos_token(self, value): + self._add_bos_token = value + self.update_post_processor() + + def save_vocabulary(self, + save_directory: str, + filename_prefix: Optional[str] = None) -> Tuple[str]: + if not self.can_save_slow_tokenizer: + raise ValueError( + 'Your fast tokenizer does not have the necessary information to save the vocabulary for a slow ' + 'tokenizer.') + + if not os.path.isdir(save_directory): + logger.error( + f'Vocabulary path ({save_directory}) should be a directory') + return + out_vocab_file = os.path.join( + save_directory, (filename_prefix + '-' if filename_prefix else '') + + VOCAB_FILES_NAMES['vocab_file']) + + if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file): + copyfile(self.vocab_file, out_vocab_file) + + return (out_vocab_file, ) + + def _build_conversation_input_ids(self, conversation: 'Conversation'): + """Builds the input ids for a conversation. + This is the format used in the provided examples. System prompts should be manually added at the beginning of + the conversation. If no system prompt is given, the `DEFAULT_SYSTEM_PROMPT` will be used. + ``` + [INST] B_SYS SytemPrompt E_SYS Prompt [/INST] Answer + [INST] Prompt [/INST] Answer + [INST] Prompt [/INST] + ``` + + If you want to use your own system prompt, make sure to use both `B_SYS` and `E_SYS` use the following: + ```python + >>> from transformers import Conversation + + >>> Conversation( + ... "<>\n Only answer with emojis, and charades\n<>\n\nHow can I build a house in 10 septs?" + ... ) + ``` + Args: + conversation (`Conversation`): + Conversation to build input ids for. + Returns: + `List[int]`: + Input ids for the conversation. + """ + dialogue = list(conversation.iter_texts()) + if not all([is_user for is_user, msg in dialogue[::2]]) or not all( + [not is_user for is_user, msg in dialogue[1::2]]): # noqa + raise ValueError( + "The model only supports 'user' and 'assistant' roles, " + 'starting with user and alternating (u/a/u/a/u...)') + + dialog_tokens = [] + if len(conversation.past_user_inputs) > 0: + if not conversation.past_user_inputs[0].startswith( + B_SYS) or E_SYS not in conversation.past_user_inputs[0]: + conversation.past_user_inputs[0] = ( + B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + + conversation.past_user_inputs[0]) + elif not dialogue[0][1].startswith( + B_SYS) or E_SYS not in dialogue[0][1]: + dialogue[0] = (dialogue[0][0], B_SYS + DEFAULT_SYSTEM_PROMPT + + E_SYS + dialogue[0][1]) + + dialog_tokens += sum( + [[self.bos_token_id] + self.encode( + f'{B_INST} {(prompt[1]).strip()} {E_INST} {(answer[1]).strip()} ', + add_special_tokens=False) + [self.eos_token_id] + for prompt, answer in zip(dialogue[::2], dialogue[1::2])], + [], + ) + if not (dialogue[-1][0]): + raise ValueError( + f"Last message must be from user, got {dialogue[-1]['role']}") + dialog_tokens += [self.bos_token_id] + self.encode( + f'{B_INST} {(dialogue[-1][1]).strip()} {E_INST}', + add_special_tokens=False) + return dialog_tokens diff --git a/modelscope/models/nlp/peer/backbone.py b/modelscope/models/nlp/peer/backbone.py index 2dca8dda..4bf376cd 100644 --- a/modelscope/models/nlp/peer/backbone.py +++ b/modelscope/models/nlp/peer/backbone.py @@ -36,7 +36,7 @@ from modelscope.utils.nlp.utils import parse_labels_in_order from .configuration import PeerConfig from .sas_utils import SequenceSideInfo -logger = logging.get_logger(__name__) +logger = logging.get_logger() PEER_PRETRAINED_MODEL_ARCHIVE_LIST = [ 'google/peer-small-generator', diff --git a/modelscope/models/nlp/peer/configuration.py b/modelscope/models/nlp/peer/configuration.py index da8b0a74..794b89f7 100644 --- a/modelscope/models/nlp/peer/configuration.py +++ b/modelscope/models/nlp/peer/configuration.py @@ -20,7 +20,7 @@ from transformers.configuration_utils import PretrainedConfig from modelscope.utils import logger as logging -logger = logging.get_logger(__name__) +logger = logging.get_logger() class PeerConfig(PretrainedConfig): diff --git a/modelscope/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py index c0696615..749e6171 100644 --- a/modelscope/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py +++ b/modelscope/msdatasets/dataset_cls/custom_datasets/audio/asr_dataset.py @@ -3,6 +3,7 @@ import os from modelscope.msdatasets.ms_dataset import MsDataset +from modelscope.utils.constant import DownloadMode class ASRDataset(MsDataset): @@ -29,11 +30,14 @@ class ASRDataset(MsDataset): return data_list @classmethod - def load(cls, - dataset_name, - namespace='speech_asr', - train_set='train', - dev_set='validation'): + def load( + cls, + dataset_name, + namespace='speech_asr', + train_set='train', + dev_set='validation', + download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS, + ): if os.path.exists(dataset_name): data_dir = dataset_name ds_dict = {} @@ -43,6 +47,10 @@ class ASRDataset(MsDataset): return ds_dict else: from modelscope.msdatasets import MsDataset + ds_dict = MsDataset.load( - dataset_name=dataset_name, namespace=namespace) + dataset_name=dataset_name, + namespace=namespace, + download_mode=download_mode, + ) return ds_dict diff --git a/modelscope/msdatasets/download/dataset_builder.py b/modelscope/msdatasets/download/dataset_builder.py index 140503f0..0c5c4154 100644 --- a/modelscope/msdatasets/download/dataset_builder.py +++ b/modelscope/msdatasets/download/dataset_builder.py @@ -223,11 +223,23 @@ class CsvDatasetBuilder(csv.Csv): if field_name.endswith(':FILE'): transform_fields.append(field_name) - base_extracted_dir = self.split_path_dict.get(split_name, '') + base_extracted_dir: Union[str, list] = self.split_path_dict.get( + split_name, '') for field_name in transform_fields: - if base_extracted_dir: + if isinstance(base_extracted_dir, + list) and len(base_extracted_dir) > 0: + if df.shape[0] != len(base_extracted_dir): + logger.error( + f"Number of lines in meta-csv file for split '{split_name}' ({df.shape[0]}) " + f'does not match number of data-files({len(base_extracted_dir)})!' + ) + else: + df[field_name] = base_extracted_dir + elif isinstance(base_extracted_dir, str) and base_extracted_dir: df[field_name] = df[field_name].apply( lambda x: os.path.join(base_extracted_dir, x)) + else: + logger.warning(f'Nothing to do for field {field_name}') pa_data = pa.Table.from_pandas(df) return Dataset(arrow_table=pa_data) diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index 984fd19c..385e51ef 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -720,6 +720,7 @@ TASK_OUTPUTS = { Tasks.video_super_resolution: [OutputKeys.OUTPUT_VIDEO], Tasks.video_deinterlace: [OutputKeys.OUTPUT_VIDEO], Tasks.nerf_recon_acc: [OutputKeys.OUTPUT], + Tasks.nerf_recon_vq_compression: [OutputKeys.OUTPUT], Tasks.video_colorization: [OutputKeys.OUTPUT_VIDEO], # image quality assessment degradation result for single image diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py index 823964e5..2379274c 100644 --- a/modelscope/pipelines/audio/asr_inference_pipeline.py +++ b/modelscope/pipelines/audio/asr_inference_pipeline.py @@ -210,7 +210,12 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): if isinstance(audio_in, str): # for funasr code, generate wav.scp from url or local path - self.audio_in, self.raw_inputs = generate_scp_from_url(audio_in) + if audio_in.startswith('http') or os.path.isfile(audio_in): + self.audio_in, self.raw_inputs = generate_scp_from_url( + audio_in) + else: + raise FileNotFoundError( + f'file {audio_in} NOT FOUND, please CHECK!') elif isinstance(audio_in, bytes): self.audio_in = audio_in self.raw_inputs = None diff --git a/modelscope/pipelines/audio/speaker_diarization_pipeline.py b/modelscope/pipelines/audio/speaker_diarization_pipeline.py index a20cfcad..dfb808d0 100644 --- a/modelscope/pipelines/audio/speaker_diarization_pipeline.py +++ b/modelscope/pipelines/audio/speaker_diarization_pipeline.py @@ -232,7 +232,13 @@ class SpeakerDiarizationPipeline(Pipeline): def forward(self, audio_in: Union[tuple, str, Any] = None) -> list: """Decoding """ - logger.info('Speaker Diarization Processing: {0} ...'.format(audio_in)) + # log file_path/url or tuple (str, str) + if isinstance(audio_in, str) or \ + (isinstance(audio_in, tuple) and all(isinstance(item, str) for item in audio_in)): + logger.info(f'Speaker Verification Processing: {audio_in} ...') + else: + logger.info( + f'Speaker Verification Processing: {str(audio_in)[:100]} ...') data_cmd, raw_inputs = None, None if isinstance(audio_in, tuple) or isinstance(audio_in, list): diff --git a/modelscope/pipelines/audio/speaker_verification_pipeline.py b/modelscope/pipelines/audio/speaker_verification_pipeline.py index c10f6a95..c23058be 100644 --- a/modelscope/pipelines/audio/speaker_verification_pipeline.py +++ b/modelscope/pipelines/audio/speaker_verification_pipeline.py @@ -180,8 +180,13 @@ class SpeakerVerificationPipeline(Pipeline): def forward(self, audio_in: Union[tuple, str, Any] = None) -> list: """Decoding """ - logger.info( - 'Speaker Verification Processing: {0} ...'.format(audio_in)) + # log file_path/url or tuple (str, str) + if isinstance(audio_in, str) or \ + (isinstance(audio_in, tuple) and all(isinstance(item, str) for item in audio_in)): + logger.info(f'Speaker Verification Processing: {audio_in} ...') + else: + logger.info( + f'Speaker Verification Processing: {str(audio_in)[:100]} ...') data_cmd, raw_inputs = None, None if isinstance(audio_in, tuple) or isinstance(audio_in, list): diff --git a/modelscope/pipelines/audio/timestamp_pipeline.py b/modelscope/pipelines/audio/timestamp_pipeline.py index 17cf9545..98e9eb05 100644 --- a/modelscope/pipelines/audio/timestamp_pipeline.py +++ b/modelscope/pipelines/audio/timestamp_pipeline.py @@ -93,7 +93,7 @@ class TimestampPipeline(Pipeline): def __call__(self, audio_in: Union[str, bytes], - text_in: str = None, + text_in: str, audio_fs: int = None, recog_type: str = None, audio_format: str = None, diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index 48d328d9..5968dba8 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -144,7 +144,8 @@ class Pipeline(ABC): if not isinstance(model, torch.nn.Module): return model.eval() - if self.device_map is None: + from modelscope.utils.torch_utils import is_on_same_device + if is_on_same_device(model): model.to(self.device) if not self._model_prepare: diff --git a/modelscope/pipelines/cv/nerf_recon_vq_compression_pipeline.py b/modelscope/pipelines/cv/nerf_recon_vq_compression_pipeline.py new file mode 100644 index 00000000..5ee6a753 --- /dev/null +++ b/modelscope/pipelines/cv/nerf_recon_vq_compression_pipeline.py @@ -0,0 +1,94 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import Any, Dict + +from modelscope.metainfo import Pipelines +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Input, Model, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.pipelines.util import is_model, is_official_hub_path +from modelscope.utils.constant import Invoke, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.nerf_recon_vq_compression, + module_name=Pipelines.nerf_recon_vq_compression) +class NeRFReconVQCompressionPipeline(Pipeline): + """ NeRF reconstruction VQ compression pipeline + Example: + + ```python + >>> from modelscope.pipelines import pipeline + >>> nerf_recon_vq_compress = pipeline(Tasks.nerf_recon_vq_compression, + 'damo/cv_nerf-3d-reconstruction-vq-compression_damo') + >>> nerf_recon_vq_compress({ + 'data_dir': '/data/lego', # data dir path (str) + 'render_dir': 'save_dir', # save dir path (str) + 'ckpt_path': 'ckpt_path', # ckpt path (str) + }) + >>> # + ``` + """ + + def __init__(self, + model, + dataset_name='blender', + data_dir='', + downsample=1, + ndc_ray=False, + ckpt_path='', + device='gpu', + **kwargs): + """ + use model to create a image sky change pipeline for image editing + Args: + model (str or Model): model_id on modelscope hub + data_type (str): currently only support 'blender' and 'colmap' + use_mask (bool): segment the object or not + ckpt_path (str): the checkpoint ckpt_path + save_mesh (bool): render mesh or not + n_test_traj_steps (int): number of random sampled images for test view, only for colmap data. + test_ray_chunk (int): ray chunk size for test, avoid GPU OOM + device (str): only support gpu + """ + model = Model.from_pretrained( + model, + device=device, + model_prefetched=True, + invoked_by=Invoke.PIPELINE, + dataset_name=dataset_name, + data_dir=data_dir, + downsample=downsample, + ndc_ray=ndc_ray, + ckpt_path=ckpt_path) if is_model(model) else model + + super().__init__(model=model, **kwargs) + if not isinstance(self.model, Model): + logger.error('model object is not initialized.') + raise Exception('model object is not initialized.') + logger.info('init model done') + + def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + test_mode = inputs['test_mode'] + if 'test' in test_mode or 'eval' in test_mode: + inputs['test_mode'] = 'evaluation_test' + elif 'path' in test_mode: + inputs['test_mode'] = 'render_path' + return inputs + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + render_dir = input['render_dir'] + test_mode = input['test_mode'] + N_vis = input.get('N_vis', 5) + if test_mode == 'evaluation_test': + self.model.evaluation(render_dir, N_vis) + elif test_mode == 'render_path': + self.model.render_path(render_dir, N_vis) + else: + raise Exception('test mode {} is not support'.format(test_mode)) + return {OutputKeys.OUTPUT: 'Done'} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py index ce0455b6..3eed0947 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/diffusers_pipeline.py @@ -15,7 +15,7 @@ class DiffusersPipeline(Pipeline): """ use `model` to create a diffusers pipeline Args: - model: model id on modelscope hub. + model: model id on modelscope hub or local dir. device: str = 'gpu' """ diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py index 539fd4ba..0f15e5b4 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/chinese_stable_diffusion_pipeline.py @@ -146,7 +146,8 @@ class _DiffuersChineseStableDiffusionPipeline(StableDiffusionPipeline): do_classifier_free_guidance, negative_prompt=None, prompt_embeds: Optional[torch.FloatTensor] = None, - negative_prompt_embeds: Optional[torch.FloatTensor] = None): + negative_prompt_embeds: Optional[torch.FloatTensor] = None, + lora_scale: Optional[float] = None): r""" Encodes the prompt into text encoder hidden states. @@ -169,7 +170,14 @@ class _DiffuersChineseStableDiffusionPipeline(StableDiffusionPipeline): Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input argument. + lora_scale (`float`, *optional*): + A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded. """ + # set lora scale so that monkey patched LoRA + # function of text encoder can correctly access it + if lora_scale is not None and isinstance(self, LoraLoaderMixin): + self._lora_scale = lora_scale + if prompt is not None and isinstance(prompt, str): batch_size = 1 elif prompt is not None and isinstance(prompt, list): diff --git a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py index f09d459d..7e56f24c 100644 --- a/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py +++ b/modelscope/pipelines/multi_modal/diffusers_wrapped/stable_diffusion/stable_diffusion_pipeline.py @@ -6,8 +6,7 @@ import cv2 import numpy as np import torch import torchvision.transforms as transforms -from diffusers import \ - StableDiffusionPipeline as DiffuserStableDiffusionPipeline +from diffusers import DiffusionPipeline from PIL import Image from modelscope.metainfo import Pipelines @@ -25,29 +24,110 @@ from modelscope.utils.constant import Tasks module_name=Pipelines.diffusers_stable_diffusion) class StableDiffusionPipeline(DiffusersPipeline): - def __init__(self, model: str, lora_dir: str = None, **kwargs): + def __init__(self, + model: str, + lora_dir: str = None, + custom_dir: str = None, + modifier_token: str = None, + **kwargs): """ use `model` to create a stable diffusion pipeline Args: model: model id on modelscope hub or local model dir. + lora_dir: lora weight dir for unet. + custom_dir: custom diffusion weight dir for unet. + modifier_token: token to use as a modifier for the concept of custom diffusion. + use_safetensors: load safetensors weights. """ + use_safetensors = kwargs.pop('use_safetensors', False) + # check custom diffusion input value + if custom_dir is None and modifier_token is not None: + raise ValueError( + 'custom_dir is None but modifier_token is not None') + elif custom_dir is not None and modifier_token is None: + raise ValueError( + 'modifier_token is None but custom_dir is not None') self.device = 'cuda' if torch.cuda.is_available() else 'cpu' # load pipeline torch_type = torch.float16 if self.device == 'cuda' else torch.float32 - self.pipeline = DiffuserStableDiffusionPipeline.from_pretrained( - model, torch_dtype=torch_type) + self.pipeline = DiffusionPipeline.from_pretrained( + model, use_safetensors=use_safetensors, torch_dtype=torch_type) self.pipeline = self.pipeline.to(self.device) + # load lora moudle to unet if lora_dir is not None: assert os.path.exists(lora_dir), f"{lora_dir} isn't exist" self.pipeline.unet.load_attn_procs(lora_dir) + # load custom diffusion to unet + if custom_dir is not None: + assert os.path.exists(custom_dir), f"{custom_dir} isn't exist" + self.pipeline.unet.load_attn_procs( + custom_dir, weight_name='pytorch_custom_diffusion_weights.bin') + modifier_token = modifier_token.split('+') + for modifier_token_name in modifier_token: + self.pipeline.load_textual_inversion( + custom_dir, weight_name=f'{modifier_token_name}.bin') def preprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: return inputs def forward(self, inputs: Dict[str, Any], **forward_params) -> Dict[str, Any]: + """ + Inputs Args: + prompt (`str` or `List[str]`, *optional*): + The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. + instead. + height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. If not defined, one has to pass + `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is + less than `1`). + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator` or `List[torch.Generator]`, *optional*): + One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) + to make generation deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not + provided, text embeddings will be generated from `prompt` input argument. + negative_prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt + weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input + argument. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. The function will be + called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + callback_steps (`int`, *optional*, defaults to 1): + The frequency at which the `callback` function will be called. If not specified, the callback will be + called at every step. + """ if not isinstance(inputs, dict): raise ValueError( f'Expected the input to be a dictionary, but got {type(input)}' @@ -57,7 +137,20 @@ class StableDiffusionPipeline(DiffusersPipeline): raise ValueError('input should contain "text", but not found') images = self.pipeline( - inputs['text'], num_inference_steps=30, guidance_scale=7.5) + prompt=inputs.get('text'), + height=inputs.get('height'), + width=inputs.get('width'), + num_inference_steps=inputs.get('num_inference_steps', 50), + guidance_scale=inputs.get('guidance_scale', 7.5), + negative_prompt=inputs.get('negative_prompt'), + num_images_per_prompt=inputs.get('num_images_per_prompt', 1), + eta=inputs.get('eta', 0.0), + generator=inputs.get('generator'), + latents=inputs.get('latents'), + output_type=inputs.get('output_type', 'pil'), + return_dict=inputs.get('return_dict', True), + callback=inputs.get('callback'), + callback_steps=inputs.get('callback_steps', 1)) return images diff --git a/modelscope/pipelines/nlp/llama2_text_generation_pipeline.py b/modelscope/pipelines/nlp/llama2_text_generation_pipeline.py new file mode 100644 index 00000000..3a9d3d44 --- /dev/null +++ b/modelscope/pipelines/nlp/llama2_text_generation_pipeline.py @@ -0,0 +1,99 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright (c) 2022 Zhipu.AI +from typing import Any, Dict, Union + +import torch + +from modelscope import Model, snapshot_download +from modelscope.metainfo import Pipelines, Preprocessors +from modelscope.models.nlp.llama2 import Llama2Tokenizer +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.pipelines.nlp.text_generation_pipeline import \ + TextGenerationPipeline +from modelscope.preprocessors import Preprocessor +from modelscope.utils.constant import Fields, Tasks + + +@PIPELINES.register_module( + Tasks.text_generation, + module_name=Pipelines.llama2_text_generation_pipeline) +class Llama2TaskPipeline(TextGenerationPipeline): + + def __init__(self, + model: Union[Model, str], + preprocessor: Preprocessor = None, + config_file: str = None, + device: str = 'gpu', + auto_collate=True, + **kwargs): + """Use `model` and `preprocessor` to create a generation pipeline for prediction. + + Args: + model (str or Model): Supply either a local model dir which supported the text generation task, + or a model id from the model hub, or a torch model instance. + preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for + the model if supplied. + kwargs (dict, `optional`): + Extra kwargs passed into the preprocessor's constructor. + Examples: + >>> from modelscope.utils.constant import Tasks + >>> import torch + >>> from modelscope.pipelines import pipeline + >>> from modelscope import snapshot_download, Model + >>> model_dir = snapshot_download("modelscope/Llama-2-13b-chat-ms", + >>> ignore_file_pattern = [r'\\w+\\.safetensors']) + >>> pipe = pipeline(task=Tasks.text_generation, model=model_dir, device_map='auto', + >>> torch_dtype=torch.float16) + >>> inputs="咖啡的作用是什么?" + >>> result = pipe(inputs,max_length=200, do_sample=True, top_p=0.85, + >>> temperature=1.0, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0) + >>> print(result['text']) + + To view other examples plese check tests/pipelines/test_llama2_text_generation_pipeline.py. + """ + self.model = Model.from_pretrained( + model, device_map='auto', torch_dtype=torch.float16) + self.tokenizer = Llama2Tokenizer.from_pretrained(model) + super().__init__(model=self.model, **kwargs) + + def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]: + return inputs + + def _sanitize_parameters(self, **pipeline_parameters): + return {}, pipeline_parameters, {} + + def forward(self, + inputs, + max_length=50, + do_sample=True, + top_p=0.85, + temperature=1.0, + repetition_penalty=1., + eos_token_id=2, + bos_token_id=1, + pad_token_id=0, + **forward_params) -> Dict[str, Any]: + output = {} + inputs = self.tokenizer(inputs, return_tensors='pt') + generate_ids = self.model.generate( + inputs.input_ids.to('cuda'), + max_length=max_length, + do_sample=do_sample, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + eos_token_id=eos_token_id, + bos_token_id=bos_token_id, + pad_token_id=pad_token_id, + **forward_params) + out = self.tokenizer.batch_decode( + generate_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=False)[0] + output['text'] = out + return output + + # format the outputs from pipeline + def postprocess(self, input, **kwargs) -> Dict[str, Any]: + return input diff --git a/modelscope/pipelines/nlp/text_generation_pipeline.py b/modelscope/pipelines/nlp/text_generation_pipeline.py index a0e8a0ee..b62d07bd 100644 --- a/modelscope/pipelines/nlp/text_generation_pipeline.py +++ b/modelscope/pipelines/nlp/text_generation_pipeline.py @@ -6,6 +6,7 @@ from typing import Any, Dict, Optional, Union import torch +from modelscope import snapshot_download from modelscope.metainfo import Pipelines from modelscope.models.base import Model from modelscope.outputs import (ModelOutputBase, OutputKeys, @@ -65,7 +66,8 @@ class TextGenerationPipeline(Pipeline, PipelineStreamingOutputMixin): device=device, auto_collate=auto_collate, compile=kwargs.pop('compile', False), - compile_options=kwargs.pop('compile_options', {})) + compile_options=kwargs.pop('compile_options', {}), + **kwargs) assert isinstance(self.model, Model), \ f'please check whether model config exists in {ModelFile.CONFIGURATION}' @@ -192,9 +194,14 @@ class ChatGLM6bTextGenerationPipeline(Pipeline): quantization_bit=None, use_bf16=False, **kwargs): - from modelscope.models.nlp.chatglm.text_generation import ChatGLMForConditionalGeneration - model = ChatGLMForConditionalGeneration(model) if isinstance( - model, str) else model + from modelscope.models.nlp.chatglm.text_generation import ChatGLMForConditionalGeneration, ChatGLMConfig + if isinstance(model, str): + model_dir = snapshot_download( + model) if not os.path.exists(model) else model + model = ChatGLMForConditionalGeneration.from_pretrained( + model_dir).half() + if torch.cuda.is_available(): + model = model.cuda() if quantization_bit is not None: model = model.quantize(quantization_bit) if use_bf16: @@ -204,11 +211,15 @@ class ChatGLM6bTextGenerationPipeline(Pipeline): super().__init__(model=model, **kwargs) + def _sanitize_parameters(self, **pipeline_parameters): + return {}, pipeline_parameters, {} + def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]: return inputs # define the forward pass def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]: + inputs.update(forward_params) return self.model.chat(inputs) # format the outputs from pipeline @@ -225,9 +236,13 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): quantization_bit=None, use_bf16=False, **kwargs): - from modelscope.models.nlp import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer - model = ChatGLM2ForConditionalGeneration(model) if isinstance( - model, str) else model + from modelscope.models.nlp import ChatGLM2ForConditionalGeneration, ChatGLM2Tokenizer, ChatGLM2Config + if isinstance(model, str): + model_dir = snapshot_download( + model) if not os.path.exists(model) else model + model = ChatGLM2ForConditionalGeneration.from_pretrained(model_dir) + if torch.cuda.is_available(): + model = model.cuda() if quantization_bit is not None: model = model.quantize(quantization_bit) if use_bf16: @@ -239,12 +254,16 @@ class ChatGLM6bV2TextGenerationPipeline(Pipeline): super().__init__(model=model, **kwargs) + def _sanitize_parameters(self, **pipeline_parameters): + return {}, pipeline_parameters, {} + def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]: return inputs # define the forward pass def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]: - return self.model.chat(self.tokenizer, inputs['text']) + inputs.update(forward_params) + return self.model.chat(inputs, self.tokenizer) # format the outputs from pipeline def postprocess(self, input, **kwargs) -> Dict[str, Any]: diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py index 9bea8aaf..86ca61dd 100644 --- a/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py +++ b/modelscope/trainers/hooks/checkpoint/checkpoint_hook.py @@ -50,6 +50,7 @@ class CheckpointHook(Hook): hub_revision (str): Which branch to push the model to, default is `master`. upload_strategy (str): The action adopted when the previous uploading is not done and the next one is coming, can be `cancel` or `wait`. + save_trainer_state (bool): Save the trainer state for continue training, default True. kwargs: by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument. output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference. @@ -75,6 +76,7 @@ class CheckpointHook(Hook): private_hub: Optional[bool] = True, hub_revision: Optional[str] = DEFAULT_REPOSITORY_REVISION, upload_strategy: Optional[str] = UploadStrategy.cancel, + save_trainer_state: bool = True, **kwargs): self.interval = interval self.save_dir = save_dir @@ -97,6 +99,7 @@ class CheckpointHook(Hook): self.private_hub = private_hub self.hub_revision = hub_revision self.upload_strategy = upload_strategy + self.save_trainer_state = save_trainer_state self.tag = -1 self.is_model_id = None self.max_checkpoint_num = None @@ -219,7 +222,8 @@ class CheckpointHook(Hook): checkpoint_path_prefix = os.path.join(self.save_dir, prefix) meta = self._create_training_state(trainer) self.processor.save_checkpoints(trainer, checkpoint_path_prefix, - self.output_dir, meta) + self.output_dir, meta, + self.save_trainer_state) self.save_evaluate_results(trainer) self.history_checkpoints.append(checkpoint_path_prefix) self._remove_obsolete_checkpoints(trainer) @@ -298,6 +302,7 @@ class BestCkptSaverHook(CheckpointHook): max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything. If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the `rule` and `metric_key` arguments. + save_trainer_state (bool): Save the trainer state for continue training, default True. The `BestCkptSaverHook` class accepts `output_sub_dir` and `output_dir` argument as its super class do. If neither of them are passed, the default value is `{save_dir}/output_best`. @@ -316,6 +321,7 @@ class BestCkptSaverHook(CheckpointHook): save_file_name: Optional[str] = None, restore_best: Optional[bool] = False, max_checkpoint_num: Optional[int] = 1, + save_trainer_state: bool = True, **kwargs): assert rule in ['max', 'min'], 'Only support "max" or "min" rule now.' output_kwargs = {} @@ -325,6 +331,7 @@ class BestCkptSaverHook(CheckpointHook): kwargs.pop('save_strategy', None) super().__init__( max_checkpoint_num=max_checkpoint_num, + save_trainer_state=save_trainer_state, **kwargs, **output_kwargs, ) @@ -399,7 +406,8 @@ class BestCkptSaverHook(CheckpointHook): self._best_ckpt_file = checkpoint_path_prefix meta = self._create_training_state(trainer) self.processor.save_checkpoints(trainer, checkpoint_path_prefix, - self.output_dir, meta) + self.output_dir, meta, + self.save_trainer_state) self.save_evaluate_results(trainer) self.history_checkpoints.add(checkpoint_path_prefix) self._remove_obsolete_checkpoints(trainer) diff --git a/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py index 4693968a..43a533c6 100644 --- a/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py +++ b/modelscope/trainers/hooks/checkpoint/checkpoint_processor.py @@ -104,7 +104,8 @@ class CheckpointProcessor: trainer, checkpoint_path_prefix, output_dir, - meta=None): + meta=None, + save_optimizers=True): """Save the state dict for trainer and model. This is a strategic function which can be registered by other hook's function. @@ -115,13 +116,15 @@ class CheckpointProcessor: like: /tmp/test/epoch_0 output_dir(`str`): The output dir for inference. meta: (`dict`): The meta info needed to be saved into files. + save_optimizers: (`bool`): Do save the optimizers state """ model = trainer.unwrap_module(trainer.model) _model_file, _train_state_file = self._get_state_file_name( checkpoint_path_prefix) # Save pth file without model state_dict - self.save_trainer_state(trainer, model, _train_state_file, meta) + self.save_trainer_state(trainer, model, _train_state_file, meta, + save_optimizers) self.save_model_state(model, _model_file) self.link(model, _model_file, output_dir) @@ -175,7 +178,8 @@ class CheckpointProcessor: 'changing to copy the bin file, this may use more disk space.') shutil.copyfile(src_file, dest_file) - def save_trainer_state(self, trainer, model, train_state_file, meta): + def save_trainer_state(self, trainer, model, train_state_file, meta, + save_optimizers): """Save the trainer state, including optimizer/lr_scheduler's state dict, random states etc. Args: @@ -183,12 +187,13 @@ class CheckpointProcessor: model: The model instance. train_state_file: The target file name for saving trainer states. meta: Some extra meta info. + save_optimizers: Save optimizers state or not. """ save_checkpoint( model, train_state_file, - trainer.optimizer, - trainer.lr_scheduler, + trainer.optimizer if save_optimizers else None, + trainer.lr_scheduler if save_optimizers else None, meta=meta, with_model=False) diff --git a/modelscope/trainers/hooks/distributed/deepspeed_hook.py b/modelscope/trainers/hooks/distributed/deepspeed_hook.py index 28d5d79b..868912ba 100644 --- a/modelscope/trainers/hooks/distributed/deepspeed_hook.py +++ b/modelscope/trainers/hooks/distributed/deepspeed_hook.py @@ -156,7 +156,8 @@ class DeepspeedProcessor(CheckpointProcessor, LrSchedulerProcessor, trainer, checkpoint_path_prefix, output_dir, - meta=None): + meta=None, + save_optimizers=True): model = trainer.unwrap_module(trainer.model) _train_state_file = checkpoint_path_prefix + self.rank_name( ) + CheckpointProcessor.TRAINER_STATE_SUFFIX diff --git a/modelscope/trainers/hooks/distributed/megatron_hook.py b/modelscope/trainers/hooks/distributed/megatron_hook.py index 66c857df..302f3f36 100644 --- a/modelscope/trainers/hooks/distributed/megatron_hook.py +++ b/modelscope/trainers/hooks/distributed/megatron_hook.py @@ -57,7 +57,8 @@ class MpuProcessor(CheckpointProcessor): trainer, checkpoint_path_prefix, output_dir, - meta=None): + meta=None, + save_optimizers=True): model = trainer.unwrap_module(trainer.model) _train_state_file = checkpoint_path_prefix + self.rank_name( ) + CheckpointProcessor.TRAINER_STATE_SUFFIX @@ -65,8 +66,8 @@ class MpuProcessor(CheckpointProcessor): save_checkpoint( model, _train_state_file, - trainer.optimizer, - trainer.lr_scheduler, + trainer.optimizer if save_optimizers else None, + trainer.lr_scheduler if save_optimizers else None, meta=meta, with_model=False) diff --git a/modelscope/trainers/multi_modal/custom_diffusion/__init__.py b/modelscope/trainers/multi_modal/custom_diffusion/__init__.py new file mode 100644 index 00000000..66747553 --- /dev/null +++ b/modelscope/trainers/multi_modal/custom_diffusion/__init__.py @@ -0,0 +1,2 @@ +# Copyright © Alibaba, Inc. and its affiliates. +from .custom_diffusion_trainer import CustomDiffusionTrainer diff --git a/modelscope/trainers/multi_modal/custom_diffusion/custom_diffusion_trainer.py b/modelscope/trainers/multi_modal/custom_diffusion/custom_diffusion_trainer.py new file mode 100644 index 00000000..28140fb2 --- /dev/null +++ b/modelscope/trainers/multi_modal/custom_diffusion/custom_diffusion_trainer.py @@ -0,0 +1,743 @@ +# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved. +import hashlib +import itertools +import os +import random +import warnings +from pathlib import Path +from typing import Union + +import json +import numpy as np +import torch +import torch.nn.functional as F +from diffusers import DiffusionPipeline +from diffusers.loaders import AttnProcsLayers +from diffusers.models.attention_processor import CustomDiffusionAttnProcessor +from PIL import Image +from PIL.ImageOps import exif_transpose +from torch.utils.data import Dataset +from torchvision import transforms +from tqdm.auto import tqdm + +from modelscope.metainfo import Trainers +from modelscope.msdatasets import MsDataset +from modelscope.outputs import OutputKeys +from modelscope.trainers.builder import TRAINERS +from modelscope.trainers.hooks.checkpoint.checkpoint_hook import CheckpointHook +from modelscope.trainers.hooks.checkpoint.checkpoint_processor import \ + CheckpointProcessor +from modelscope.trainers.optimizer.builder import build_optimizer +from modelscope.trainers.trainer import EpochBasedTrainer +from modelscope.utils.config import ConfigDict +from modelscope.utils.constant import ModeKeys, TrainerStages +from modelscope.utils.data_utils import to_device +from modelscope.utils.torch_utils import is_dist + + +class CustomCheckpointProcessor(CheckpointProcessor): + + def __init__(self, modifier_token, modifier_token_id): + """Checkpoint processor for custom diffusion. + + Args: + modifier_token: The token to use as a modifier for the concept. + modifier_token_id: The modifier token id for the concept. + """ + self.modifier_token = modifier_token + self.modifier_token_id = modifier_token_id + + def save_checkpoints(self, + trainer, + checkpoint_path_prefix, + output_dir, + meta=None): + """Save the state dict for custom diffusion model. + """ + trainer.model.unet = trainer.model.unet.to(torch.float32) + trainer.model.unet.save_attn_procs(output_dir) + + learned_embeds = trainer.model.text_encoder.get_input_embeddings( + ).weight + if not isinstance(self.modifier_token_id, list): + self.modifier_token_id = [self.modifier_token_id] + for x, y in zip(self.modifier_token_id, self.modifier_token): + learned_embeds_dict = {} + learned_embeds_dict[y] = learned_embeds[x] + torch.save(learned_embeds_dict, f'{output_dir}/{y}.bin') + + +class CustomDiffusionDataset(Dataset): + + def __init__( + self, + concepts_list, + tokenizer, + size=512, + mask_size=64, + center_crop=False, + with_prior_preservation=False, + num_class_images=200, + hflip=False, + aug=True, + ): + """A dataset to prepare the instance and class images with the prompts for fine-tuning the model. + It pre-processes the images and the tokenizes prompts. + + Args: + concepts_list: contain multiple concepts, instance_prompt, class_prompt, etc. + tokenizer: pretrained tokenizer. + size: the size of images. + mask_size: the mask size of images. + center_crop: execute center crop or not. + with_prior_preservation: flag to add prior preservation loss. + hflip: whether to flip horizontally. + aug: perform data augmentation. + + """ + self.size = size + self.mask_size = mask_size + self.center_crop = center_crop + self.tokenizer = tokenizer + self.interpolation = Image.BILINEAR + self.aug = aug + + self.instance_images_path = [] + self.class_images_path = [] + self.with_prior_preservation = with_prior_preservation + for concept in concepts_list: + inst_img_path = [ + (x, concept['instance_prompt']) + for x in Path(concept['instance_data_dir']).iterdir() + if x.is_file() + ] + self.instance_images_path.extend(inst_img_path) + + if with_prior_preservation: + class_data_root = Path(concept['class_data_dir']) + if os.path.isdir(class_data_root): + class_images_path = list(class_data_root.iterdir()) + class_prompt = [ + concept['class_prompt'] + for _ in range(len(class_images_path)) + ] + else: + with open(class_data_root, 'r') as f: + class_images_path = f.read().splitlines() + with open(concept['class_prompt'], 'r') as f: + class_prompt = f.read().splitlines() + + class_img_path = [ + (x, y) for (x, y) in zip(class_images_path, class_prompt) + ] + self.class_images_path.extend( + class_img_path[:num_class_images]) + + random.shuffle(self.instance_images_path) + self.num_instance_images = len(self.instance_images_path) + self.num_class_images = len(self.class_images_path) + self._length = max(self.num_class_images, self.num_instance_images) + self.flip = transforms.RandomHorizontalFlip(0.5 * hflip) + + self.image_transforms = transforms.Compose([ + self.flip, + transforms.Resize( + size, interpolation=transforms.InterpolationMode.BILINEAR), + transforms.CenterCrop(size) + if center_crop else transforms.RandomCrop(size), + transforms.ToTensor(), + transforms.Normalize([0.5], [0.5]), + ]) + + def __len__(self): + return self._length + + def preprocess(self, image, scale, resample): + outer, inner = self.size, scale + factor = self.size // self.mask_size + if scale > self.size: + outer, inner = scale, self.size + top, left = np.random.randint(0, outer - inner + 1), np.random.randint( + 0, outer - inner + 1) + image = image.resize((scale, scale), resample=resample) + image = np.array(image).astype(np.uint8) + image = (image / 127.5 - 1.0).astype(np.float32) + instance_image = np.zeros((self.size, self.size, 3), dtype=np.float32) + mask = np.zeros((self.size // factor, self.size // factor)) + if scale > self.size: + instance_image = image[top:top + inner, left:left + inner, :] + mask = np.ones((self.size // factor, self.size // factor)) + else: + instance_image[top:top + inner, left:left + inner, :] = image + mask[top // factor + 1:(top + scale) // factor - 1, + left // factor + 1:(left + scale) // factor - 1] = 1.0 + return instance_image, mask + + def __getitem__(self, index): + example = {} + instance_image, instance_prompt = self.instance_images_path[ + index % self.num_instance_images] + instance_image = Image.open(instance_image) + if not instance_image.mode == 'RGB': + instance_image = instance_image.convert('RGB') + instance_image = self.flip(instance_image) + + # apply resize augmentation and create a valid image region mask + random_scale = self.size + if self.aug: + random_scale = ( + np.random.randint(self.size // 3, self.size + + 1) if np.random.uniform() < 0.66 else + np.random.randint(int(1.2 * self.size), int(1.4 * self.size))) + instance_image, mask = self.preprocess(instance_image, random_scale, + self.interpolation) + + if random_scale < 0.6 * self.size: + instance_prompt = np.random.choice(['a far away ', 'very small ' + ]) + instance_prompt + elif random_scale > self.size: + instance_prompt = np.random.choice(['zoomed in ', 'close up ' + ]) + instance_prompt + + example['instance_images'] = torch.from_numpy(instance_image).permute( + 2, 0, 1) + example['mask'] = torch.from_numpy(mask) + example['instance_prompt_ids'] = self.tokenizer( + instance_prompt, + truncation=True, + padding='max_length', + max_length=self.tokenizer.model_max_length, + return_tensors='pt', + ).input_ids + + if self.with_prior_preservation: + class_image, class_prompt = self.class_images_path[ + index % self.num_class_images] + class_image = Image.open(class_image) + if not class_image.mode == 'RGB': + class_image = class_image.convert('RGB') + example['class_images'] = self.image_transforms(class_image) + example['class_mask'] = torch.ones_like(example['mask']) + example['class_prompt_ids'] = self.tokenizer( + class_prompt, + truncation=True, + padding='max_length', + max_length=self.tokenizer.model_max_length, + return_tensors='pt', + ).input_ids + + return example + + +class PromptDataset(Dataset): + + def __init__(self, prompt, num_samples): + """Dataset to prepare the prompts to generate class images. + + Args: + prompt: Class prompt. + num_samples: The number sample for class images. + + """ + self.prompt = prompt + self.num_samples = num_samples + + def __len__(self): + return self.num_samples + + def __getitem__(self, index): + example = {} + example['prompt'] = self.prompt + example['index'] = index + return example + + +@TRAINERS.register_module(module_name=Trainers.custom_diffusion) +class CustomDiffusionTrainer(EpochBasedTrainer): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + """Custom diffusion trainers for fine-tuning stable diffusion. + + Args: + with_prior_preservation: a boolean indicating whether to enable prior loss. + instance_prompt: a string specifying the instance prompt. + class_prompt: a string specifying the class prompt. + class_data_dir: the path to the class data directory. + num_class_images: the number of class images to generate. + prior_loss_weight: the weight of the prior loss. + modifier_token: A token to use as a modifier for the concept. + initializer_token: A token to use as initializer word. + freeze_model: crossattn to enable fine-tuning of all params in the cross attention. + sample_batch_size: Batch size (per device) for sampling images. + train_batch_size: Batch size (per device) for the training dataloader. + center_crop: execute center crop or not. + concepts_list: Path to json containing multiple concepts, will overwrite parameters. + instance_data_name: The instance data local dir or online ID. + + """ + self.with_prior_preservation = kwargs.pop('with_prior_preservation', + True) + instance_prompt = kwargs.pop('instance_prompt', 'a photo of sks dog') + class_prompt = kwargs.pop('class_prompt', 'dog') + class_data_dir = kwargs.pop('class_data_dir', '/tmp/class_data') + self.real_prior = kwargs.pop('real_prior', False) + self.num_class_images = kwargs.pop('num_class_images', 200) + self.resolution = kwargs.pop('resolution', 512) + self.prior_loss_weight = kwargs.pop('prior_loss_weight', 1.0) + self.modifier_token = kwargs.pop('modifier_token', '') + self.initializer_token = kwargs.pop('initializer_token', 'ktn+pll+ucd') + self.freeze_model = kwargs.pop('freeze_model', 'crossattn_kv') + self.sample_batch_size = kwargs.pop('sample_batch_size', 4) + self.train_batch_size = kwargs.pop('train_batch_size', 2) + self.center_crop = kwargs.pop('center_crop', False) + self.concepts_list = kwargs.pop('concepts_list', None) + instance_data_name = kwargs.pop( + 'instance_data_name', 'buptwq/lora-stable-diffusion-finetune-dog') + + # Extract downloaded image folder + if self.concepts_list is None: + if os.path.isdir(instance_data_name): + instance_data_dir = instance_data_name + else: + ds = MsDataset.load(instance_data_name, split='train') + instance_data_dir = os.path.dirname( + next(iter(ds))['Target:FILE']) + + # construct concept list + if self.concepts_list is None: + self.concepts_list = [{ + 'instance_prompt': instance_prompt, + 'class_prompt': class_prompt, + 'instance_data_dir': instance_data_dir, + 'class_data_dir': class_data_dir, + }] + else: + with open(self.concepts_list, 'r') as f: + self.concepts_list = json.load(f) + + for concept in self.concepts_list: + if not os.path.exists(concept['class_data_dir']): + os.makedirs(concept['class_data_dir']) + if not os.path.exists(concept['instance_data_dir']): + raise Exception( + f"instance dataset {concept['instance_data_dir']} does not exist." + ) + + # Adding a modifier token which is optimized + self.modifier_token_id = [] + initializer_token_id = [] + if self.modifier_token is not None: + self.modifier_token = self.modifier_token.split('+') + self.initializer_token = self.initializer_token.split('+') + if len(self.modifier_token) > len(self.initializer_token): + raise ValueError( + 'You must specify + separated initializer token for each modifier token.' + ) + for modifier_token, initializer_token in zip( + self.modifier_token, + self.initializer_token[:len(self.modifier_token)]): + # Add the placeholder token in tokenizer + num_added_tokens = self.model.tokenizer.add_tokens( + modifier_token) + if num_added_tokens == 0: + raise ValueError( + f'The tokenizer already contains the token {modifier_token}. Please pass a different' + ' `modifier_token` that is not already in the tokenizer.' + ) + + # Convert the initializer_token, placeholder_token to ids + token_ids = self.model.tokenizer.encode( + [initializer_token], add_special_tokens=False) + # Check if initializer_token is a single token or a sequence of tokens + if len(token_ids) > 1: + raise ValueError( + 'The initializer token must be a single token.') + + initializer_token_id.append(token_ids[0]) + self.modifier_token_id.append( + self.model.tokenizer.convert_tokens_to_ids(modifier_token)) + + # Resize the token embeddings as we are adding new special tokens to the tokenizer + self.model.text_encoder.resize_token_embeddings( + len(self.model.tokenizer)) + + # Resize the token embeddings as we are adding new special tokens to the tokenizer + self.model.text_encoder.resize_token_embeddings( + len(self.model.tokenizer)) + + # Initialise the newly added placeholder token with the embeddings of the initializer token + token_embeds = self.model.text_encoder.get_input_embeddings( + ).weight.data + for x, y in zip(self.modifier_token_id, initializer_token_id): + token_embeds[x] = token_embeds[y] + + # Freeze all parameters except for the token embeddings in text encoder + params_to_freeze = itertools.chain( + self.model.text_encoder.text_model.encoder.parameters(), + self.model.text_encoder.text_model.final_layer_norm.parameters(), + self.model.text_encoder.text_model.embeddings.position_embedding. + parameters(), + ) + self.freeze_params(params_to_freeze) + + # Save checkpoint and configurate files + ckpt_hook = list( + filter(lambda hook: isinstance(hook, CheckpointHook), + self.hooks))[0] + ckpt_hook.set_processor( + CustomCheckpointProcessor(self.modifier_token, + self.modifier_token_id)) + + # Add new Custom Diffusion weights to the attention layers + attention_class = CustomDiffusionAttnProcessor + # Only train key, value projection layers if freeze_model = 'crossattn_kv' else train all params. + train_q_out = False if self.freeze_model == 'crossattn_kv' else True + custom_diffusion_attn_procs = {} + + st = self.model.unet.state_dict() + for name, _ in self.model.unet.attn_processors.items(): + cross_attention_dim = None if name.endswith( + 'attn1.processor' + ) else self.model.unet.config.cross_attention_dim + if name.startswith('mid_block'): + hidden_size = self.model.unet.config.block_out_channels[-1] + elif name.startswith('up_blocks'): + block_id = int(name[len('up_blocks.')]) + hidden_size = list( + reversed( + self.model.unet.config.block_out_channels))[block_id] + elif name.startswith('down_blocks'): + block_id = int(name[len('down_blocks.')]) + hidden_size = self.model.unet.config.block_out_channels[ + block_id] + layer_name = name.split('.processor')[0] + weights = { + 'to_k_custom_diffusion.weight': + st[layer_name + '.to_k.weight'], + 'to_v_custom_diffusion.weight': + st[layer_name + '.to_v.weight'], + } + if train_q_out: + weights['to_q_custom_diffusion.weight'] = st[layer_name + + '.to_q.weight'] + weights['to_out_custom_diffusion.0.weight'] = st[ + layer_name + '.to_out.0.weight'] + weights['to_out_custom_diffusion.0.bias'] = st[ + layer_name + '.to_out.0.bias'] + if cross_attention_dim is not None: + custom_diffusion_attn_procs[name] = attention_class( + train_kv=True, + train_q_out=train_q_out, + hidden_size=hidden_size, + cross_attention_dim=cross_attention_dim, + ).to(self.model.unet.device) + custom_diffusion_attn_procs[name].load_state_dict(weights) + else: + custom_diffusion_attn_procs[name] = attention_class( + train_kv=False, + train_q_out=False, + hidden_size=hidden_size, + cross_attention_dim=cross_attention_dim, + ) + del st + self.model.unet.set_attn_processor(custom_diffusion_attn_procs) + self.custom_diffusion_layers = AttnProcsLayers( + self.model.unet.attn_processors) + + # Check for conflicts and conflicts + if self.with_prior_preservation: + for concept in self.concepts_list: + if concept['class_data_dir'] is None: + raise ValueError( + 'You must specify a data directory for class images.') + if concept['class_prompt'] is None: + raise ValueError( + 'You must specify prompt for class images.') + else: + for concept in self.concepts_list: + if concept['class_data_dir'] is not None: + warnings.warn( + 'You need not use --class_data_dir without --with_prior_preservation.' + ) + if concept['class_prompt'] is not None: + warnings.warn( + 'You need not use --class_prompt without --with_prior_preservation.' + ) + + # Generate class images if prior preservation is enabled. + if self.with_prior_preservation: + self.generate_image() + + # Dataset and DataLoaders creation: + train_dataset = CustomDiffusionDataset( + concepts_list=self.concepts_list, + tokenizer=self.model.tokenizer, + with_prior_preservation=self.with_prior_preservation, + size=self.resolution, + mask_size=self.model.vae.encode( + torch.randn(1, 3, self.resolution, + self.resolution).to(dtype=torch.float32).to( + self.device)).latent_dist.sample().size()[-1], + center_crop=self.center_crop, + num_class_images=self.num_class_images, + hflip=False, + aug=True, + ) + train_dataloader = torch.utils.data.DataLoader( + train_dataset, + batch_size=self.train_batch_size, + shuffle=True, + collate_fn=lambda examples: self.collate_fn(examples), + num_workers=2, + ) + self.iter_train_dataloader = itertools.cycle(train_dataloader) + + def freeze_params(self, params): + for param in params: + param.requires_grad = False + + def collate_fn(self, examples): + input_ids = [example['instance_prompt_ids'] for example in examples] + pixel_values = [example['instance_images'] for example in examples] + mask = [example['mask'] for example in examples] + # Concat class and instance examples which avoid doing two forward passes. + if self.with_prior_preservation: + input_ids += [example['class_prompt_ids'] for example in examples] + pixel_values += [example['class_images'] for example in examples] + mask += [example['class_mask'] for example in examples] + + input_ids = torch.cat(input_ids, dim=0) + pixel_values = torch.stack(pixel_values) + mask = torch.stack(mask) + pixel_values = pixel_values.to( + memory_format=torch.contiguous_format).float() + mask = mask.to(memory_format=torch.contiguous_format).float() + + batch = { + 'input_ids': input_ids, + 'pixel_values': pixel_values, + 'mask': mask.unsqueeze(1) + } + return batch + + def generate_image(self): + """ Generate class images if prior preservation is enabled. + """ + for i, concept in enumerate(self.concepts_list): + class_images_dir = Path(concept['class_data_dir']) + if not class_images_dir.exists(): + class_images_dir.mkdir(parents=True, exist_ok=True) + + cur_class_images = len(list(class_images_dir.iterdir())) + + if cur_class_images < self.num_class_images: + pipeline = DiffusionPipeline.from_pretrained( + self.model_dir, + torch_dtype=torch.float32, + safety_checker=None, + revision=None, + ) + pipeline.set_progress_bar_config(disable=True) + + num_new_images = self.num_class_images - cur_class_images + + sample_dataset = PromptDataset(concept['class_prompt'], + num_new_images) + sample_dataloader = torch.utils.data.DataLoader( + sample_dataset, batch_size=self.sample_batch_size) + + pipeline.to(self.device) + + for example in tqdm( + sample_dataloader, + desc='Generating class images', + # disable=not accelerator.is_local_main_process, + ): + images = pipeline(example['prompt']).images + + for i, image in enumerate(images): + hash_image = hashlib.sha1(image.tobytes()).hexdigest() + save_index = example['index'][i] + cur_class_images + image_filename = class_images_dir / f'{save_index}-{hash_image}.jpg' + image.save(image_filename) + + del pipeline + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + def build_optimizer(self, cfg: ConfigDict, default_args: dict = None): + try: + return build_optimizer( + itertools.chain( + self.model.text_encoder.get_input_embeddings().parameters( + ), self.custom_diffusion_layers.parameters()), + cfg=cfg, + default_args=default_args) + except KeyError as e: + self.logger.error( + f'Build optimizer error, the optimizer {cfg} is a torch native component, ' + f'please check if your torch with version: {torch.__version__} matches the config.' + ) + raise e + + def train_loop(self, data_loader): + """ Training loop used by `EpochBasedTrainer.train()` + """ + self.invoke_hook(TrainerStages.before_run) + self.model.train() + for _ in range(self._epoch, self._max_epochs): + self.invoke_hook(TrainerStages.before_train_epoch) + for i, data_batch in enumerate(data_loader): + if i < self.inner_iter: + # inner_iter may be read out from the checkpoint file, so skip the trained iters in the epoch. + continue + data_batch = to_device(data_batch, self.device) + self.data_batch = data_batch + self._inner_iter = i + self.invoke_hook(TrainerStages.before_train_iter) + self.train_step(self.model, data_batch) + self.invoke_hook(TrainerStages.after_train_iter) + # Zero out the gradients for all token embeddings except the newly added + # embeddings for the concept to optimize the concept embeddings. + if self.modifier_token is not None: + grads_text_encoder = self.model.text_encoder.get_input_embeddings( + ).weight.grad + # Get the index for tokens that we want to zero the grads. + index_grads_to_zero = torch.arange( + len(self.model.tokenizer)) != self.modifier_token_id[0] + for i in range(len(self.modifier_token_id[1:])): + modifier_flag = torch.arange( + len(self.model.tokenizer) + ) != self.modifier_token_id[i] + index_grads_to_zero = index_grads_to_zero & modifier_flag + grads_data = grads_text_encoder.data[ + index_grads_to_zero, :].fill_(0) + grads_text_encoder.data[ + index_grads_to_zero, :] = grads_data + # Value changed after the hooks are invoked, do not move them above the invoke_hook code. + del self.data_batch + self._iter += 1 + self._mode = ModeKeys.TRAIN + + if i + 1 >= self.iters_per_epoch: + break + + self.invoke_hook(TrainerStages.after_train_epoch) + # Value changed after the hooks are invoked, do not move them above the invoke_hook code. + self._inner_iter = 0 + self._epoch += 1 + if self._stop_training: + break + + self.invoke_hook(TrainerStages.after_run) + + def train_step(self, model, inputs): + """ Perform a training step on a batch of inputs. + + Subclass and override to inject custom behavior. + + Args: + model (`TorchModel`): The model to train. + inputs (`Dict[str, Union[torch.Tensor, Any]]`): + The inputs and targets of the model. + + The dictionary will be unpacked before being fed to the model. Most models expect the targets under the + argument `labels`. Check your model's documentation for all accepted arguments. + + Return: + `torch.Tensor`: The tensor with training loss on this batch. + """ + self.model.unet.train() + if self.modifier_token is not None: + self.model.text_encoder.train() + self._mode = ModeKeys.TRAIN + + batch = next(self.iter_train_dataloader) + # Convert images to latent space + latents = self.model.vae.encode(batch['pixel_values'].to( + dtype=torch.float32).to(self.device)).latent_dist.sample() + latents = latents * self.model.vae.config.scaling_factor + + # Sample noise that we'll add to the latents + noise = torch.randn_like(latents) + bsz = latents.shape[0] + # Sample a random timestep for each image + timesteps = torch.randint( + 0, + self.model.noise_scheduler.config.num_train_timesteps, (bsz, ), + device=latents.device) + timesteps = timesteps.long() + + # Add noise to the latents according to the noise magnitude at each timestep + # (this is the forward diffusion process) + noisy_latents = self.model.noise_scheduler.add_noise( + latents, noise, timesteps) + + # Get the text embedding for conditioning + encoder_hidden_states = self.model.text_encoder(batch['input_ids'].to( + self.device))[0] + + # Predict the noise residual + model_pred = self.model.unet(noisy_latents, timesteps, + encoder_hidden_states).sample + + # Get the target for loss depending on the prediction type + if self.model.noise_scheduler.config.prediction_type == 'epsilon': + target = noise + elif self.model.noise_scheduler.config.prediction_type == 'v_prediction': + target = self.model.noise_scheduler.get_velocity( + latents, noise, timesteps) + else: + raise ValueError( + f'Unknown prediction type {self.model.noise_scheduler.config.prediction_type}' + ) + + if self.with_prior_preservation: + # Chunk the noise and model_pred into two parts and compute the loss on each part separately. + model_pred, model_pred_prior = torch.chunk(model_pred, 2, dim=0) + target, target_prior = torch.chunk(target, 2, dim=0) + mask = torch.chunk(batch['mask'].to(self.device), 2, dim=0)[0] + # Compute instance loss + loss = F.mse_loss( + model_pred.float(), target.float(), reduction='none') + loss = ((loss * mask).sum([1, 2, 3]) / mask.sum([1, 2, 3])).mean() + + # Compute prior loss + prior_loss = F.mse_loss( + model_pred_prior.float(), + target_prior.float(), + reduction='mean') + + # Add the prior loss to the instance loss. + loss = loss + self.prior_loss_weight * prior_loss + else: + mask = batch['mask'].to(self.device) + loss = F.mse_loss( + model_pred.float(), target.float(), reduction='none') + loss = ((loss * mask).sum([1, 2, 3]) / mask.sum([1, 2, 3])).mean() + + train_outputs = {} + train_outputs[OutputKeys.LOSS] = loss + + # add model output info to log + if 'log_vars' not in train_outputs: + default_keys_pattern = ['loss'] + match_keys = set([]) + for key_p in default_keys_pattern: + match_keys.update( + [key for key in train_outputs.keys() if key_p in key]) + + log_vars = {} + for key in match_keys: + value = train_outputs.get(key, None) + if value is not None: + if is_dist(): + value = value.data.clone().to('cuda') + dist.all_reduce(value.div_(dist.get_world_size())) + log_vars.update({key: value.item()}) + self.log_buffer.update(log_vars) + else: + self.log_buffer.update(train_outputs['log_vars']) + + self.train_outputs = train_outputs diff --git a/modelscope/trainers/multi_modal/dreambooth_diffusion/dreambooth_diffusion_trainer.py b/modelscope/trainers/multi_modal/dreambooth_diffusion/dreambooth_diffusion_trainer.py index 65623ed8..3b300ea4 100644 --- a/modelscope/trainers/multi_modal/dreambooth_diffusion/dreambooth_diffusion_trainer.py +++ b/modelscope/trainers/multi_modal/dreambooth_diffusion/dreambooth_diffusion_trainer.py @@ -10,8 +10,6 @@ from typing import Union import torch import torch.nn.functional as F from diffusers import DiffusionPipeline -from diffusers.loaders import AttnProcsLayers -from diffusers.models.attention_processor import LoRAAttnProcessor from PIL import Image from PIL.ImageOps import exif_transpose from torch.utils.data import Dataset @@ -41,7 +39,8 @@ class DreamboothCheckpointProcessor(CheckpointProcessor): trainer, checkpoint_path_prefix, output_dir, - meta=None): + meta=None, + save_optimizers=True): """Save the state dict for dreambooth model. """ pipeline_args = {} diff --git a/modelscope/trainers/multi_modal/lora_diffusion/lora_diffusion_trainer.py b/modelscope/trainers/multi_modal/lora_diffusion/lora_diffusion_trainer.py index 40da164e..7c6644bd 100644 --- a/modelscope/trainers/multi_modal/lora_diffusion/lora_diffusion_trainer.py +++ b/modelscope/trainers/multi_modal/lora_diffusion/lora_diffusion_trainer.py @@ -21,7 +21,8 @@ class LoraDiffusionCheckpointProcessor(CheckpointProcessor): trainer, checkpoint_path_prefix, output_dir, - meta=None): + meta=None, + save_optimizers=True): """Save the state dict for lora tune model. """ trainer.model.unet = trainer.model.unet.to(torch.float32) @@ -33,6 +34,14 @@ class LoraDiffusionTrainer(EpochBasedTrainer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + """Lora trainers for fine-tuning stable diffusion + + Args: + lora_rank: The rank size of lora intermediate linear. + + """ + lora_rank = kwargs.pop('lora_rank', 4) + # set lora save checkpoint processor ckpt_hook = list( filter(lambda hook: isinstance(hook, CheckpointHook), @@ -58,7 +67,8 @@ class LoraDiffusionTrainer(EpochBasedTrainer): lora_attn_procs[name] = LoRAAttnProcessor( hidden_size=hidden_size, - cross_attention_dim=cross_attention_dim) + cross_attention_dim=cross_attention_dim, + rank=lora_rank) self.model.unet.set_attn_processor(lora_attn_procs) diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 2e62be89..2dc300c5 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -43,7 +43,8 @@ from modelscope.utils.logger import get_logger from modelscope.utils.registry import build_from_cfg from modelscope.utils.torch_utils import (compile_model, get_dist_info, get_local_rank, init_dist, is_dist, - is_master, set_random_seed) + is_master, is_on_same_device, + set_random_seed) from ..swift import Swift from .base import BaseTrainer from .builder import TRAINERS @@ -257,7 +258,7 @@ class EpochBasedTrainer(BaseTrainer): # If not working in parallel scenario, put model to device as a default logic. device_name = self.device if self.device is not None else 'gpu' self.device = create_device(device_name) - if self.device.type == 'cuda': + if self.device.type == 'cuda' and is_on_same_device(self.model): self.model.to(self.device) self.print_cfg() diff --git a/modelscope/utils/checkpoint.py b/modelscope/utils/checkpoint.py index bbde6034..147b80e9 100644 --- a/modelscope/utils/checkpoint.py +++ b/modelscope/utils/checkpoint.py @@ -622,6 +622,7 @@ def save_pretrained(model, origin_file_to_be_ignored = [save_checkpoint_name] ignore_file_set = set(origin_file_to_be_ignored) ignore_file_set.add(ModelFile.CONFIGURATION) + ignore_file_set.add('*.safetensors') ignore_file_set.add('.*') if hasattr(model, 'model_dir') and model.model_dir is not None and is_master(): diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 4692d921..f68c83d9 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -154,6 +154,7 @@ class CVTasks(object): motion_generation = 'motion-generation' # 3d reconstruction nerf_recon_acc = 'nerf-recon-acc' + nerf_recon_vq_compression = 'nerf-recon-vq-compression' # vision efficient tuning vision_efficient_tuning = 'vision-efficient-tuning' diff --git a/modelscope/utils/data_collators.py b/modelscope/utils/data_collators.py index 044b1993..0981c836 100644 --- a/modelscope/utils/data_collators.py +++ b/modelscope/utils/data_collators.py @@ -7,7 +7,7 @@ from typing import Any, List, Optional, Tuple from .logger import get_logger -logger = get_logger(__name__) +logger = get_logger() class RemoveColumnsCollator: diff --git a/modelscope/utils/error.py b/modelscope/utils/error.py index 841662c0..8259c7ce 100644 --- a/modelscope/utils/error.py +++ b/modelscope/utils/error.py @@ -168,3 +168,9 @@ TAMING_IMPORT_ERROR = """ {0} requires the timm library but it was not found in your environment. You can install it with pip: `pip install taming-transformers-rom1504` """ + +# docstyle-ignore +XFORMERS_IMPORT_ERROR = """ +{0} requires the timm library but it was not found in your environment. You can install it with pip: +`pip install xformers>=0.0.17` +""" diff --git a/modelscope/utils/hf_util.py b/modelscope/utils/hf_util.py new file mode 100644 index 00000000..8d877ef9 --- /dev/null +++ b/modelscope/utils/hf_util.py @@ -0,0 +1,116 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +import sys + +from transformers import AutoConfig as AutoConfigHF +from transformers import AutoModel as AutoModelHF +from transformers import AutoModelForCausalLM as AutoModelForCausalLMHF +from transformers import AutoModelForSeq2SeqLM as AutoModelForSeq2SeqLMHF +from transformers import AutoTokenizer as AutoTokenizerHF +from transformers import GenerationConfig as GenerationConfigHF +from transformers import PreTrainedModel, PreTrainedTokenizerBase + +from modelscope import snapshot_download +from modelscope.utils.constant import Invoke + + +def user_agent(invoked_by=None): + if invoked_by is None: + invoked_by = Invoke.PRETRAINED + uagent = '%s/%s' % (Invoke.KEY, invoked_by) + return uagent + + +def patch_tokenizer_base(): + """ Monkey patch PreTrainedTokenizerBase.from_pretrained to adapt to modelscope hub. + """ + ori_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__ + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *model_args, + **kwargs): + ignore_file_pattern = [r'\w+\.bin', r'\w+\.safetensors'] + if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', None) + model_dir = snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern) + else: + model_dir = pretrained_model_name_or_path + return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) + + PreTrainedTokenizerBase.from_pretrained = from_pretrained + + +def patch_model_base(): + """ Monkey patch PreTrainedModel.from_pretrained to adapt to modelscope hub. + """ + ori_from_pretrained = PreTrainedModel.from_pretrained.__func__ + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *model_args, + **kwargs): + ignore_file_pattern = [r'\w+\.safetensors'] + if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', None) + model_dir = snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern) + else: + model_dir = pretrained_model_name_or_path + return ori_from_pretrained(cls, model_dir, *model_args, **kwargs) + + PreTrainedModel.from_pretrained = from_pretrained + + +patch_tokenizer_base() +patch_model_base() + + +def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs): + """Get a custom wrapper class for auto classes to download the models from the ModelScope hub + Args: + module_class: The actual module class + ignore_file_pattern (`str` or `List`, *optional*, default to `None`): + Any file pattern to be ignored in downloading, like exact file names or file extensions. + Returns: + The wrapper + """ + + class ClassWrapper(module_class): + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *model_args, + **kwargs): + if not os.path.exists(pretrained_model_name_or_path): + revision = kwargs.pop('revision', None) + model_dir = snapshot_download( + pretrained_model_name_or_path, + revision=revision, + ignore_file_pattern=ignore_file_pattern, + user_agent=user_agent()) + else: + model_dir = pretrained_model_name_or_path + + return module_class.from_pretrained(model_dir, *model_args, + **kwargs) + + return ClassWrapper + + +AutoModel = get_wrapped_class( + AutoModelHF, ignore_file_pattern=[r'\w+\.safetensors']) +AutoModelForCausalLM = get_wrapped_class( + AutoModelForCausalLMHF, ignore_file_pattern=[r'\w+\.safetensors']) +AutoModelForSeq2SeqLM = get_wrapped_class( + AutoModelForSeq2SeqLMHF, ignore_file_pattern=[r'\w+\.safetensors']) + +AutoTokenizer = get_wrapped_class( + AutoTokenizerHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors']) +AutoConfig = get_wrapped_class( + AutoConfigHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors']) +GenerationConfig = get_wrapped_class( + GenerationConfigHF, ignore_file_pattern=[r'\w+\.bin', r'\w+\.safetensors']) diff --git a/modelscope/utils/import_utils.py b/modelscope/utils/import_utils.py index 3e8be2e1..f2fc7e37 100644 --- a/modelscope/utils/import_utils.py +++ b/modelscope/utils/import_utils.py @@ -306,6 +306,7 @@ REQUIREMENTS_MAAPING = OrderedDict([ ('mpi4py', (is_package_available('mpi4py'), MPI4PY_IMPORT_ERROR)), ('open_clip', (is_package_available('open_clip'), OPENCLIP_IMPORT_ERROR)), ('taming', (is_package_available('taming'), TAMING_IMPORT_ERROR)), + ('xformers', (is_package_available('xformers'), XFORMERS_IMPORT_ERROR)), ]) SYSTEM_PACKAGE = set(['os', 'sys', 'typing']) diff --git a/modelscope/utils/logger.py b/modelscope/utils/logger.py index 17923a6d..58d007c5 100644 --- a/modelscope/utils/logger.py +++ b/modelscope/utils/logger.py @@ -25,7 +25,7 @@ def get_logger(log_file: Optional[str] = None, logger_name = __name__.split('.')[0] logger = logging.getLogger(logger_name) - + logger.propagate = False if logger_name in init_loggers: add_file_handler_if_needed(logger, log_file, file_mode, log_level) return logger diff --git a/modelscope/utils/plugins.py b/modelscope/utils/plugins.py index e997f676..2c510dd2 100644 --- a/modelscope/utils/plugins.py +++ b/modelscope/utils/plugins.py @@ -3,6 +3,7 @@ # Part of the implementation is borrowed from wimglenn/johnnydep import copy +import filecmp import importlib import os import pkgutil @@ -28,6 +29,9 @@ logger = get_logger() storage = LocalStorage() MODELSCOPE_FILE_DIR = get_default_cache_dir() +MODELSCOPE_DYNAMIC_MODULE = 'modelscope_modules' +BASE_MODULE_DIR = os.path.join(MODELSCOPE_FILE_DIR, MODELSCOPE_DYNAMIC_MODULE) + PLUGINS_FILENAME = '.modelscope_plugins' OFFICIAL_PLUGINS = [ { @@ -322,6 +326,41 @@ def import_module_from_file(module_name, file_path): return module +def create_module_from_files(file_list, file_prefix, module_name): + """ + Create a python module from a list of files by copying them to the destination directory. + + Args: + file_list (List[str]): List of relative file paths to be copied. + file_prefix (str): Path prefix for each file in file_list. + module_name (str): Name of the module. + + Returns: + None + """ + + def create_empty_file(file_path): + with open(file_path, 'w') as _: + pass + + dest_dir = os.path.join(BASE_MODULE_DIR, module_name) + for file_path in file_list: + file_dir = os.path.dirname(file_path) + target_dir = os.path.join(dest_dir, file_dir) + os.makedirs(target_dir, exist_ok=True) + init_file = os.path.join(target_dir, '__init__.py') + if not os.path.exists(init_file): + create_empty_file(init_file) + + target_file = os.path.join(target_dir, file_path) + src_file = os.path.join(file_prefix, file_path) + if not os.path.exists(target_file) or not filecmp.cmp( + src_file, target_file): + shutil.copyfile(src_file, target_file) + + importlib.invalidate_caches() + + def import_module_from_model_dir(model_dir): """ import all the necessary module from a model dir @@ -340,12 +379,26 @@ def import_module_from_model_dir(model_dir): # install the requirements firstly install_requirements_by_files(requirements) - # then import the modules - import sys - sys.path.insert(0, model_dir) - for file in file_dirs: - module_name = Path(file).stem - import_module_from_file(module_name, file) + if BASE_MODULE_DIR not in sys.path: + sys.path.append(BASE_MODULE_DIR) + + module_name = Path(model_dir).stem + + # in order to keep forward compatibility, we add module path to + # sys.path so that submodule can be imported directly as before + MODULE_PATH = os.path.join(BASE_MODULE_DIR, module_name) + if MODULE_PATH not in sys.path: + sys.path.append(MODULE_PATH) + + relative_file_dirs = [ + file.replace(model_dir.rstrip(os.sep) + os.sep, '') + for file in file_dirs + ] + create_module_from_files(relative_file_dirs, model_dir, module_name) + for file in relative_file_dirs: + submodule = module_name + '.' + file.replace(os.sep, '.').replace( + '.py', '') + importlib.import_module(submodule) def install_requirements_by_names(plugins: List[str]): diff --git a/modelscope/utils/torch_utils.py b/modelscope/utils/torch_utils.py index 1b2e1094..1a673458 100644 --- a/modelscope/utils/torch_utils.py +++ b/modelscope/utils/torch_utils.py @@ -354,3 +354,9 @@ def all_gather(data, group=None): data_list.append(pickle.loads(buffer)) return data_list + + +def is_on_same_device(model: torch.nn.Module) -> bool: + device_set = set(map(lambda p: p.device.type, + model.parameters())) - {'cpu'} + return len(device_set) <= 1 diff --git a/modelscope/version.py b/modelscope/version.py index 32823e7b..fbb09a54 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '1.6.2' +__version__ = '1.7.1' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future __release_datetime__ = '2099-10-13 08:56:12' diff --git a/requirements/framework.txt b/requirements/framework.txt index d98765bf..e748026e 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -datasets +datasets>=2.8.0,<=2.13.0 einops filelock>=3.3.0 gast>=0.2.2 diff --git a/setup.cfg b/setup.cfg index f0b80b48..62f157e8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,7 +21,7 @@ ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids [flake8] max-line-length = 120 select = B,C,E,F,P,T4,W,B9 -ignore = F401,F405,F821,W503,E251 +ignore = F401,F403,F405,F821,W503,E251 exclude = docs/src,*.pyi,.git [darglint] diff --git a/tests/models/test_llama2.py b/tests/models/test_llama2.py new file mode 100644 index 00000000..f31d2cad --- /dev/null +++ b/tests/models/test_llama2.py @@ -0,0 +1,59 @@ +import unittest + +import torch + +from modelscope import Model, snapshot_download +from modelscope.models.nlp.llama2 import Llama2Tokenizer +from modelscope.utils.test_utils import test_level + + +class Llama2Test(unittest.TestCase): + + def setUp(self) -> None: + self.model_name = 'modelscope/Llama-2-7b-chat-ms' + self.system = 'you are a helpful assistant!' + self.text_first_round = 'hello' + self.text_second_round = 'do you know peking university?' + self.text_third_round = 'where is it?' + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_chat(self): + model_dir = snapshot_download( + self.model_name, ignore_file_pattern=[r'\w+\.safetensors']) + model = Model.from_pretrained( + model_dir, device_map='auto', torch_dtype=torch.float16) + tokenizer = Llama2Tokenizer.from_pretrained(model_dir) + + inputs = { + 'text': self.text_first_round, + 'history': [], + 'system': self.system + } + result = model.chat(input=inputs, tokenizer=tokenizer) + self.assertIsInstance(result['history'], list) + self.assertEqual(len(result['history']), 1) + self.assertEqual(result['history'][0][0], self.text_first_round) + + inputs = { + 'text': self.text_second_round, + 'history': result['history'], + 'system': self.system + } + result = model.chat(input=inputs, tokenizer=tokenizer) + self.assertIsInstance(result['history'], list) + self.assertEqual(len(result['history']), 2) + self.assertEqual(result['history'][1][0], self.text_second_round) + + inputs = { + 'text': self.text_third_round, + 'history': result['history'], + 'system': self.system + } + result = model.chat(input=inputs, tokenizer=tokenizer) + self.assertIsInstance(result['history'], list) + self.assertEqual(len(result['history']), 3) + self.assertEqual(result['history'][2][0], self.text_third_round) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_model_base.py b/tests/models/test_model_base.py new file mode 100644 index 00000000..9d353ec5 --- /dev/null +++ b/tests/models/test_model_base.py @@ -0,0 +1,41 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +import shutil +import tempfile +import unittest + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from modelscope.models.base import Model + + +class BaseTest(unittest.TestCase): + + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + self.tmp_dir = tempfile.TemporaryDirectory().name + if not os.path.exists(self.tmp_dir): + os.makedirs(self.tmp_dir) + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + super().tearDown() + + def test_from_pretrained(self): + model = Model.from_pretrained( + 'baichuan-inc/baichuan-7B', revision='v1.0.5') + self.assertIsNotNone(model) + + def test_from_pretrained_hf(self): + model = Model.from_pretrained( + 'damo/nlp_structbert_sentence-similarity_chinese-tiny', + use_hf=True) + self.assertIsNotNone(model) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_llama2_text_generation_pipeline.py b/tests/pipelines/test_llama2_text_generation_pipeline.py new file mode 100644 index 00000000..2a532257 --- /dev/null +++ b/tests/pipelines/test_llama2_text_generation_pipeline.py @@ -0,0 +1,47 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + +import torch + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class Llama2TextGenerationPipelineTest(unittest.TestCase): + + def setUp(self) -> None: + self.llama2_model_id_7B_chat_ms = 'modelscope/Llama-2-7b-chat-ms' + self.llama2_input_chat_ch = '天空为什么是蓝色的?' + + def run_pipeline_with_model_id(self, + model_id, + input, + init_kwargs={}, + run_kwargs={}): + pipeline_ins = pipeline( + task=Tasks.text_generation, model=model_id, **init_kwargs) + pipeline_ins._model_prepare = True + result = pipeline_ins(input, **run_kwargs) + print(result['text']) + + # 7B_ms_chat + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_llama2_7B_chat_ms_with_model_name_with_chat_ch_with_args(self): + self.run_pipeline_with_model_id( + self.llama2_model_id_7B_chat_ms, + self.llama2_input_chat_ch, + init_kwargs={ + 'device_map': 'auto', + 'torch_dtype': torch.float16 + }, + run_kwargs={ + 'max_length': 200, + 'do_sample': True, + 'top_p': 0.85 + }) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_vqrf.py b/tests/pipelines/test_vqrf.py new file mode 100644 index 00000000..d8cb8c88 --- /dev/null +++ b/tests/pipelines/test_vqrf.py @@ -0,0 +1,90 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import unittest + +import torch + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.msdatasets import MsDataset +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import DownloadMode, Tasks +from modelscope.utils.test_utils import test_level + + +class NeRFReconVQCompressionBlender(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'DAMOXR/cv_nerf_3d-reconstruction_vector-quantize-compression' + pretrained_model = 'ficus_demo.pt' + data_dir = MsDataset.load( + 'nerf_recon_dataset', namespace='damo', + split='train').config_kwargs['split_config']['train'] + nerf_synthetic_dataset = os.path.join(data_dir, 'nerf_synthetic') + self.blender_scene = 'ficus' + data_dir = os.path.join(nerf_synthetic_dataset, self.blender_scene) + + self.pipeline = pipeline( + Tasks.nerf_recon_vq_compression, + model=self.model_id, + dataset_name='blender', + data_dir=data_dir, + downsample=1, + ndc_ray=False, + ckpt_path=pretrained_model) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') + def test_evalutaion(self): + render_dir = f'./exp/{self.blender_scene}' + self.pipeline( + dict(test_mode='evaluation_test', render_dir=render_dir, N_vis=5)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') + def test_render_path(self): + render_dir = f'./exp/{self.blender_scene}' + self.pipeline( + dict(test_mode='render_path', render_dir=render_dir, N_vis=30)) + + +class NeRFReconVQCompressionLLFF(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'DAMOXR/cv_nerf_3d-reconstruction_vector-quantize-compression' + pretrained_model = 'fern_demo.pt' + data_dir = MsDataset.load( + 'DAMOXR/nerf_llff_data', + subset_name='default', + split='test', + ).config_kwargs['split_config']['test'] + nerf_llff = os.path.join(data_dir, 'nerf_llff_data') + self.llff_scene = 'fern' + data_dir = os.path.join(nerf_llff, self.llff_scene) + + self.pipeline = pipeline( + Tasks.nerf_recon_vq_compression, + model=self.model_id, + dataset_name='llff', + data_dir=data_dir, + downsample=4, + ndc_ray=True, + ckpt_path=pretrained_model) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') + def test_evalutaion(self): + render_dir = f'./exp/{self.llff_scene}' + self.pipeline( + dict(test_mode='evaluation_test', render_dir=render_dir, N_vis=5)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') + def test_render_path(self): + render_dir = f'./exp/{self.llff_scene}' + self.pipeline( + dict(test_mode='render_path', render_dir=render_dir, N_vis=10)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/trainers/test_custom_diffusion_trainer.py b/tests/trainers/test_custom_diffusion_trainer.py new file mode 100644 index 00000000..6c647401 --- /dev/null +++ b/tests/trainers/test_custom_diffusion_trainer.py @@ -0,0 +1,98 @@ +# Copyright 2022-2023 The Alibaba Fundamental Vision Team Authors. All rights reserved. +import os +import shutil +import tempfile +import unittest + +import cv2 + +from modelscope.metainfo import Trainers +from modelscope.msdatasets import MsDataset +from modelscope.pipelines import pipeline +from modelscope.trainers import build_trainer +from modelscope.utils.constant import DownloadMode +from modelscope.utils.test_utils import test_level + + +class TestCustomDiffusionTrainer(unittest.TestCase): + + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + self.train_dataset = MsDataset.load( + 'buptwq/lora-stable-diffusion-finetune-dog', + split='train', + download_mode=DownloadMode.FORCE_REDOWNLOAD) + self.eval_dataset = MsDataset.load( + 'buptwq/lora-stable-diffusion-finetune-dog', + split='validation', + download_mode=DownloadMode.FORCE_REDOWNLOAD) + + self.max_epochs = 5 + + self.tmp_dir = tempfile.TemporaryDirectory().name + if not os.path.exists(self.tmp_dir): + os.makedirs(self.tmp_dir) + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + super().tearDown() + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_custom_diffusion_train(self): + model_id = 'AI-ModelScope/stable-diffusion-v1-5' + model_revision = 'v1.0.9' + prompt = 'a dog.' + + def cfg_modify_fn(cfg): + cfg.train.max_epochs = self.max_epochs + cfg.train.lr_scheduler = { + 'type': 'LambdaLR', + 'lr_lambda': lambda _: 1, + 'last_epoch': -1 + } + cfg.train.optimizer.lr = 1e-5 + return cfg + + kwargs = dict( + model=model_id, + model_revision=model_revision, + work_dir=self.tmp_dir, + train_dataset=self.train_dataset, + eval_dataset=self.eval_dataset, + cfg_modify_fn=cfg_modify_fn) + + trainer = build_trainer( + name=Trainers.custom_diffusion, default_args=kwargs) + trainer.train() + result = trainer.evaluate() + print(f'Custom-diffusion train output: {result}.') + + results_files = os.listdir(self.tmp_dir) + self.assertIn(f'{trainer.timestamp}.log.json', results_files) + + pipe = pipeline( + task=Tasks.text_to_image_synthesis, model=f'{self.tmp_dir}/output') + output = pipe({'text': prompt}) + cv2.imwrite('./custom_result.png', output['output_imgs'][0]) + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_dreambooth_diffusion_eval(self): + model_id = 'AI-ModelScope/stable-diffusion-v1-5' + model_revision = 'v1.0.9' + + kwargs = dict( + model=model_id, + model_revision=model_revision, + work_dir=self.tmp_dir, + train_dataset=None, + eval_dataset=self.eval_dataset) + + trainer = build_trainer( + name=Trainers.dreambooth_diffusion, default_args=kwargs) + result = trainer.evaluate() + print(f'Custom-diffusion eval output: {result}.') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/trainers/test_lora_diffusion_trainer.py b/tests/trainers/test_lora_diffusion_trainer.py index a9b9e299..2ffef2db 100644 --- a/tests/trainers/test_lora_diffusion_trainer.py +++ b/tests/trainers/test_lora_diffusion_trainer.py @@ -35,7 +35,7 @@ class TestLoraDiffusionTrainer(unittest.TestCase): shutil.rmtree(self.tmp_dir) super().tearDown() - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_lora_diffusion_train(self): model_id = 'AI-ModelScope/stable-diffusion-v1-5' model_revision = 'v1.0.9' @@ -67,7 +67,7 @@ class TestLoraDiffusionTrainer(unittest.TestCase): results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_lora_diffusion_eval(self): model_id = 'AI-ModelScope/stable-diffusion-v1-5' model_revision = 'v1.0.9' diff --git a/tests/utils/test_hf_util.py b/tests/utils/test_hf_util.py new file mode 100644 index 00000000..7c10cca6 --- /dev/null +++ b/tests/utils/test_hf_util.py @@ -0,0 +1,55 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + +from transformers import LlamaForCausalLM, LlamaTokenizer + +from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM, + AutoTokenizer, GenerationConfig) + + +class HFUtilTest(unittest.TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def test_auto_tokenizer(self): + tokenizer = AutoTokenizer.from_pretrained( + 'baichuan-inc/Baichuan-13B-Chat', + trust_remote_code=True, + revision='v1.0.3') + self.assertEqual(tokenizer.vocab_size, 64000) + self.assertEqual(tokenizer.model_max_length, 4096) + self.assertFalse(tokenizer.is_fast) + + def test_auto_model(self): + model = AutoModelForCausalLM.from_pretrained( + 'baichuan-inc/baichuan-7B', trust_remote_code=True) + self.assertTrue(model is not None) + + def test_auto_config(self): + config = AutoConfig.from_pretrained( + 'baichuan-inc/Baichuan-13B-Chat', + trust_remote_code=True, + revision='v1.0.3') + self.assertEqual(config.model_type, 'baichuan') + gen_config = GenerationConfig.from_pretrained( + 'baichuan-inc/Baichuan-13B-Chat', + trust_remote_code=True, + revision='v1.0.3') + self.assertEqual(gen_config.assistant_token_id, 196) + + def test_transformer_patch(self): + tokenizer = LlamaTokenizer.from_pretrained( + 'skyline2006/llama-7b', revision='v1.0.1') + self.assertIsNotNone(tokenizer) + model = LlamaForCausalLM.from_pretrained( + 'skyline2006/llama-7b', revision='v1.0.1') + self.assertIsNotNone(model) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_plugin.py b/tests/utils/test_plugin.py index 447ce1c9..6bbba197 100644 --- a/tests/utils/test_plugin.py +++ b/tests/utils/test_plugin.py @@ -124,3 +124,7 @@ class PluginTest(unittest.TestCase): result = self.plugins_manager.list_plugins(show_all=True) self.assertEqual(len(result.items()), len(OFFICIAL_PLUGINS)) + + +if __name__ == '__main__': + unittest.main()