# Copyright (c) Alibaba, Inc. and its affiliates. import os import unittest from os import path as osp import cv2 from PIL import Image from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.cv.image_utils import created_boxed_image from modelscope.utils.test_utils import test_level class OfaTasksTest(unittest.TestCase): def setUp(self) -> None: self.output_dir = 'unittest_output' os.makedirs(self.output_dir, exist_ok=True) def save_img(self, image_in, box, image_out): cv2.imwrite( osp.join(self.output_dir, image_out), created_boxed_image(image_in, box)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_image_captioning_with_model(self): model = Model.from_pretrained('damo/ofa_image-caption_coco_large_en') img_captioning = pipeline( task=Tasks.image_captioning, model=model, ) image = 'data/test/images/image_captioning.png' result = img_captioning(image) print(result[OutputKeys.CAPTION]) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_image_captioning_with_name(self): img_captioning = pipeline( Tasks.image_captioning, model='damo/ofa_image-caption_coco_large_en') result = img_captioning('data/test/images/image_captioning.png') print(result[OutputKeys.CAPTION]) img_captioning.model.num_return_sequences = 2 result = img_captioning('data/test/images/image_captioning.png') print(result[OutputKeys.CAPTION]) # test batch infer img_captioning.model.num_return_sequences = 1 results = img_captioning( [{ 'image': 'data/test/images/image_captioning.png' } for _ in range(3)], batch_size=2) for r in results: print(r[OutputKeys.CAPTION]) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_ocr_recognize_with_name(self): ocr_recognize = pipeline( Tasks.ocr_recognition, model='damo/ofa_ocr-recognition_scene_base_zh') result = ocr_recognize('data/test/images/image_ocr_recognition.jpg') print(result[OutputKeys.TEXT]) # test batch infer results = ocr_recognize( ['data/test/images/image_ocr_recognition.jpg' for _ in range(3)], batch_size=2) for r in results: print(r[OutputKeys.TEXT]) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_image_classification_with_model(self): model = Model.from_pretrained( 'damo/ofa_image-classification_imagenet_large_en') ofa_pipe = pipeline(Tasks.image_classification, model=model) image = 'data/test/images/image_classification.png' result = ofa_pipe(image) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_image_classification_with_name(self): ofa_pipe = pipeline( Tasks.image_classification, model='damo/ofa_image-classification_imagenet_large_en') image = 'data/test/images/image_classification.png' result = ofa_pipe(image) print(result) # test batch infer image = ['data/test/images/image_classification.png' for _ in range(3)] results = ofa_pipe(image, batch_size=2) for r in results: print(r[OutputKeys.LABELS], r[OutputKeys.SCORES]) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_summarization_with_model(self): model = Model.from_pretrained( 'damo/ofa_summarization_gigaword_large_en') ofa_pipe = pipeline(Tasks.text_summarization, model=model) text = 'five-time world champion michelle kwan withdrew' + \ 'from the #### us figure skating championships on wednesday ,' + \ ' but will petition us skating officials for the chance to ' + \ 'compete at the #### turin olympics .' input = {'text': text} result = ofa_pipe(input) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_summarization_with_name(self): ofa_pipe = pipeline( Tasks.text_summarization, model='damo/ofa_summarization_gigaword_large_en') text = 'five-time world champion michelle kwan withdrew' + \ 'from the #### us figure skating championships on wednesday ,' + \ ' but will petition us skating officials for the chance to ' + \ 'compete at the #### turin olympics .' input = {'text': text} result = ofa_pipe(input) print(result) # test for return multiple sequences ofa_pipe.model.num_return_sequences = 2 result = ofa_pipe(input) print(result) # test batch infer ofa_pipe.model.num_return_sequences = 1 input = [{'text': text} for _ in range(3)] results = ofa_pipe(input, batch_size=2) for r in results: print(r[OutputKeys.TEXT]) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_text_classification_with_model(self): model = Model.from_pretrained( 'damo/ofa_text-classification_mnli_large_en') ofa_pipe = pipeline(Tasks.text_classification, model=model) text = 'One of our number will carry out your instructions minutely.' text2 = 'A member of my team will execute your orders with immense precision.' result = ofa_pipe((text, text2)) result = ofa_pipe({'text': text, 'text2': text2}) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_text_classification_with_name(self): ofa_pipe = pipeline( Tasks.text_classification, model='damo/ofa_text-classification_mnli_large_en') text = 'One of our number will carry out your instructions minutely.' text2 = 'A member of my team will execute your orders with immense precision.' result = ofa_pipe((text, text2)) print(result) # test batch infer inputs = [(text, text2) for _ in range(3)] results = ofa_pipe(inputs, batch_size=2) for r in results: print(r[OutputKeys.LABELS], r[OutputKeys.SCORES]) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_visual_entailment_with_model(self): model = Model.from_pretrained( 'damo/ofa_visual-entailment_snli-ve_large_en') ofa_pipe = pipeline(Tasks.visual_entailment, model=model) image = 'data/test/images/dogs.jpg' text = 'there are two birds.' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_visual_entailment_with_name(self): ofa_pipe = pipeline( Tasks.visual_entailment, model='damo/ofa_visual-entailment_snli-ve_large_en') image = 'data/test/images/dogs.jpg' text = 'there are two birds.' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) # test batch infer input = [{'image': image, 'text': text} for _ in range(3)] results = ofa_pipe(input, batch_size=2) for r in results: print(r[OutputKeys.LABELS], r[OutputKeys.SCORES]) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_visual_grounding_with_model(self): model = Model.from_pretrained( 'damo/ofa_visual-grounding_refcoco_large_en') ofa_pipe = pipeline(Tasks.visual_grounding, model=model) image = 'data/test/images/visual_grounding.png' text = 'a blue turtle-like pokemon with round head' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) image_name = image.split('/')[-2] self.save_img( image, result[OutputKeys.BOXES][0], # just one box osp.join('large_en_model_' + image_name + '.png')) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_visual_grounding_with_name(self): ofa_pipe = pipeline( Tasks.visual_grounding, model='damo/ofa_visual-grounding_refcoco_large_en') image = 'data/test/images/visual_grounding.png' text = 'a blue turtle-like pokemon with round head' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) image_name = image.split('/')[-2] self.save_img(image, result[OutputKeys.BOXES][0], osp.join('large_en_name_' + image_name + '.png')) # test batch infer result = ofa_pipe([input for _ in range(3)], batch_size=2) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_visual_grounding_zh_with_name(self): model = 'damo/ofa_visual-grounding_refcoco_large_zh' ofa_pipe = pipeline(Tasks.visual_grounding, model=model) image = 'data/test/images/visual_grounding.png' text = '一个圆头的蓝色宝可梦' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) image_name = image.split('/')[-1] self.save_img(image, result[OutputKeys.BOXES][0], osp.join('large_zh_name_' + image_name)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_visual_question_answering_with_model(self): model = Model.from_pretrained( 'damo/ofa_visual-question-answering_pretrain_large_en') ofa_pipe = pipeline(Tasks.visual_question_answering, model=model) image = 'data/test/images/visual_question_answering.png' text = 'what is grown on the plant?' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_visual_question_answering_with_name(self): model = 'damo/ofa_visual-question-answering_pretrain_large_en' ofa_pipe = pipeline(Tasks.visual_question_answering, model=model) image = 'data/test/images/visual_question_answering.png' text = 'what is grown on the plant?' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) # test batch infer result = ofa_pipe([input for _ in range(3)], batch_size=2) print(result) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_image_captioning_distilled_with_model(self): model = Model.from_pretrained( 'damo/ofa_image-caption_coco_distilled_en') img_captioning = pipeline( task=Tasks.image_captioning, model=model, ) image_path = 'data/test/images/image_captioning.png' image = Image.open(image_path) result = img_captioning(image) print(result[OutputKeys.CAPTION]) # test batch infer print(img_captioning([image for _ in range(3)], batch_size=2)) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_visual_entailment_distilled_model_with_name(self): ofa_pipe = pipeline( Tasks.visual_entailment, model='damo/ofa_visual-entailment_snli-ve_distilled_v2_en') image = 'data/test/images/dogs.jpg' text = 'there are two birds.' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_visual_grounding_distilled_model_with_model(self): model = Model.from_pretrained( 'damo/ofa_visual-grounding_refcoco_distilled_en') ofa_pipe = pipeline(Tasks.visual_grounding, model=model) image = 'data/test/images/visual_grounding.png' text = 'a blue turtle-like pokemon with round head' input = {'image': image, 'text': text} result = ofa_pipe(input) print(result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_text_to_image_synthesis_with_name(self): model = 'damo/ofa_text-to-image-synthesis_coco_large_en' ofa_pipe = pipeline(Tasks.text_to_image_synthesis, model=model) ofa_pipe.model.generator.beam_size = 2 example = {'text': 'a bear in the water.'} result = ofa_pipe(example) result[OutputKeys.OUTPUT_IMGS][0].save('result.png') print(f'Output written to {osp.abspath("result.png")}') @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_text_to_image_synthesis_with_model(self): model = Model.from_pretrained( 'damo/ofa_text-to-image-synthesis_coco_large_en') ofa_pipe = pipeline(Tasks.text_to_image_synthesis, model=model) ofa_pipe.model.generator.beam_size = 2 example = {'text': 'a bear in the water.'} result = ofa_pipe(example) result[OutputKeys.OUTPUT_IMG].save('result.png') print(f'Output written to {osp.abspath("result.png")}') @unittest.skipUnless( test_level() >= 1, 'skip test in current test level, model has no text2phone_dict.txt') def test_run_with_asr_with_name(self): model = 'damo/ofa_mmspeech_pretrain_base_zh' ofa_pipe = pipeline(Tasks.auto_speech_recognition, model=model) example = {'wav': 'data/test/audios/asr_example_ofa.wav'} result = ofa_pipe(example) print(result[OutputKeys.TEXT]) # test batch infer result = ofa_pipe([example for _ in range(3)], batch_size=2) for r in result: print(r[OutputKeys.TEXT]) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_sudoku_with_name(self): model = 'damo/ofa_sudoku_kaggle_large' ofa_pipe = pipeline(Tasks.sudoku, model=model) # the valid num is 1-9,and use 0 represents the empty block # the separator of column is ` : `, and the separator of row is ` | ` example = '5 : 3 : 0 : 0 : 7 : 0 : 0 : 0 : 0 | \ 6 : 0 : 0 : 1 : 9 : 5 : 0 : 0 : 0 | \ 0 : 9 : 8 : 0 : 0 : 0 : 0 : 6 : 0 | \ 8 : 0 : 0 : 0 : 6 : 0 : 0 : 0 : 3 | \ 4 : 0 : 0 : 8 : 0 : 3 : 0 : 0 : 1 | \ 7 : 0 : 0 : 0 : 2 : 0 : 0 : 0 : 6 | \ 0 : 6 : 0 : 0 : 0 : 0 : 2 : 8 : 0 | \ 0 : 0 : 0 : 4 : 1 : 9 : 0 : 0 : 5 | \ 0 : 0 : 0 : 0 : 8 : 0 : 0 : 7 : 9' result = ofa_pipe(example) print(result[OutputKeys.TEXT]) # test batch infer result = ofa_pipe([example for _ in range(3)], batch_size=2) for r in result: print(r[OutputKeys.TEXT]) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_text2sql_with_name(self): model = 'damo/ofa_text2sql_spider_large_en' ofa_pipe = pipeline(Tasks.text2sql, model=model) text = 'Show all book categories and the number of books in each category.' database = 'culture_company' # optional, default `culture_company` example = {'text': text, 'database': database} result = ofa_pipe(example) print(result[OutputKeys.TEXT]) # test batch infer result = ofa_pipe([example for _ in range(3)], batch_size=2) for r in result: print(r[OutputKeys.TEXT]) if __name__ == '__main__': unittest.main()