From e223c1b00825cbdf66d60f0643224987e9e14232 Mon Sep 17 00:00:00 2001 From: "ashui.cbh" Date: Mon, 24 Oct 2022 18:47:01 +0800 Subject: [PATCH 1/3] [to #42322933]merge master after demo service support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit demo service 对接,修改输入接口为可调用的方式 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10502169 --- modelscope/pipeline_inputs.py | 4 ++++ .../pipelines/cv/image_inpainting_pipeline.py | 17 +++++++++-------- tests/pipelines/test_image_inpainting.py | 18 ++++++++---------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py index 77940c3c..13560229 100644 --- a/modelscope/pipeline_inputs.py +++ b/modelscope/pipeline_inputs.py @@ -91,6 +91,10 @@ TASK_INPUTS = { InputType.IMAGE, Tasks.crowd_counting: InputType.IMAGE, + Tasks.image_inpainting: { + 'img': InputType.IMAGE, + 'mask': InputType.IMAGE, + }, # image generation task result for a single image Tasks.image_to_image_generation: diff --git a/modelscope/pipelines/cv/image_inpainting_pipeline.py b/modelscope/pipelines/cv/image_inpainting_pipeline.py index 6ae0d63e..aff9788d 100644 --- a/modelscope/pipelines/cv/image_inpainting_pipeline.py +++ b/modelscope/pipelines/cv/image_inpainting_pipeline.py @@ -77,21 +77,22 @@ class ImageInpaintingPipeline(Pipeline): img, ((0, 0), (0, out_height - height), (0, out_width - width)), mode='symmetric') - def preprocess(self, input: Input) -> Dict[str, Any]: - if isinstance(input, str): - image_name, mask_name = input.split('+') + def preprocess(self, input: Dict[str, Any]) -> Dict[str, Any]: + if isinstance(input['img'], str): + image_name, mask_name = input['img'], input['mask'] img = LoadImage.convert_to_ndarray(image_name) img = self.transforms(img) mask = np.array(LoadImage(mode='L')(mask_name)['img']) mask = self.transforms(mask) - elif isinstance(input, PIL.Image.Image): - img = input.crop((0, 0, int(input.width / 2), input.height)) + elif isinstance(input['img'], PIL.Image.Image): + img = input['img'] img = self.transforms(np.array(img)) - mask = input.crop((int(input.width / 2), 0, input.width, - input.height)).convert('L') + mask = input['mask'].convert('L') mask = self.transforms(np.array(mask)) else: - raise TypeError('input should be either str or PIL.Image') + raise TypeError( + 'input should be either str or PIL.Image, and both inputs should have the same type' + ) result = dict(image=img, mask=mask[None, ...]) if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1: diff --git a/tests/pipelines/test_image_inpainting.py b/tests/pipelines/test_image_inpainting.py index b89ce399..a8b704b7 100644 --- a/tests/pipelines/test_image_inpainting.py +++ b/tests/pipelines/test_image_inpainting.py @@ -20,6 +20,10 @@ class ImageInpaintingTest(unittest.TestCase): self.input_location = 'data/test/images/image_inpainting/image_inpainting.png' self.input_mask_location = 'data/test/images/image_inpainting/image_inpainting_mask.png' self.model_id = 'damo/cv_fft_inpainting_lama' + self.input = { + 'img': self.input_location, + 'mask': self.input_mask_location + } def save_result(self, result): vis_img = result[OutputKeys.OUTPUT_IMG] @@ -28,8 +32,7 @@ class ImageInpaintingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_inpainting(self): inpainting = pipeline(Tasks.image_inpainting, model=self.model_id) - result = inpainting(self.input_location + '+' - + self.input_mask_location) + result = inpainting(self.input) if result: self.save_result(result) else: @@ -41,8 +44,7 @@ class ImageInpaintingTest(unittest.TestCase): # if input image is HR, set refine=True is more better inpainting = pipeline( Tasks.image_inpainting, model=self.model_id, refine=True) - result = inpainting(self.input_location + '+' - + self.input_mask_location) + result = inpainting(self.input) if result: self.save_result(result) else: @@ -53,10 +55,7 @@ class ImageInpaintingTest(unittest.TestCase): inpainting = pipeline(Tasks.image_inpainting, model=self.model_id) img = Image.open(self.input_location).convert('RGB') mask = Image.open(self.input_mask_location).convert('RGB') - img_new = Image.new('RGB', (img.width + mask.width, img.height)) - img_new.paste(img, (0, 0)) - img_new.paste(mask, (img.width, 0)) - result = inpainting(img_new) + result = inpainting({'img': img, 'mask': mask}) if result: self.save_result(result) else: @@ -65,8 +64,7 @@ class ImageInpaintingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_inpainting_with_default_task(self): inpainting = pipeline(Tasks.image_inpainting) - result = inpainting(self.input_location + '+' - + self.input_mask_location) + result = inpainting(self.input) if result: self.save_result(result) else: From 35c612a64276c77fa50239ebe40bb6b977655250 Mon Sep 17 00:00:00 2001 From: "yichang.zyc" Date: Mon, 24 Oct 2022 23:40:38 +0800 Subject: [PATCH 2/3] =?UTF-8?q?[to=20#42322933]=E5=8E=BB=E9=99=A4clip=20ut?= =?UTF-8?q?=E4=B8=AD=E7=9A=84dev=20revision=20=20=20=20=20=20=20=20=20Link?= =?UTF-8?q?:=20https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/1?= =?UTF-8?q?0507748?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove clip ut dev revision --- tests/pipelines/test_multi_modal_embedding.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/pipelines/test_multi_modal_embedding.py b/tests/pipelines/test_multi_modal_embedding.py index 23954c27..ee9cdb1f 100644 --- a/tests/pipelines/test_multi_modal_embedding.py +++ b/tests/pipelines/test_multi_modal_embedding.py @@ -19,14 +19,11 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): self.model_id = 'damo/multi-modal_clip-vit-base-patch16_zh' test_input = {'text': '皮卡丘'} - model_version = 'dev' @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run(self): pipeline_multi_modal_embedding = pipeline( - Tasks.multi_modal_embedding, - model=self.model_id, - model_revision=self.model_version) + Tasks.multi_modal_embedding, model=self.model_id) text_embedding = pipeline_multi_modal_embedding( self.test_input)[OutputKeys.TEXT_EMBEDDING] print('l1-norm: {}'.format( @@ -36,8 +33,7 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_from_modelhub(self): - model = Model.from_pretrained( - self.model_id, revision=self.model_version) + model = Model.from_pretrained(self.model_id) pipeline_multi_modal_embedding = pipeline( task=Tasks.multi_modal_embedding, model=model) text_embedding = pipeline_multi_modal_embedding( @@ -50,8 +46,7 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_default_model(self): pipeline_multi_modal_embedding = pipeline( - task=Tasks.multi_modal_embedding, - model_revision=self.model_version) + task=Tasks.multi_modal_embedding) text_embedding = pipeline_multi_modal_embedding( self.test_input)[OutputKeys.TEXT_EMBEDDING] print('l1-norm: {}'.format( From c4dbb69d6538885352b6999f416ef30f55b34ae7 Mon Sep 17 00:00:00 2001 From: "zhangyanzhao.zyz" Date: Mon, 24 Oct 2022 23:41:20 +0800 Subject: [PATCH 3/3] =?UTF-8?q?[to=20#42322933]=E5=A2=9E=E5=8A=A0=E5=AF=B9?= =?UTF-8?q?text-ranking=E4=BB=BB=E5=8A=A1=E4=B8=AD=E6=96=87=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E7=9A=84=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95=EF=BC=8C?= =?UTF-8?q?=E4=BB=A5=E6=96=B9=E4=BE=BF=E5=BE=97=E5=88=B0=E5=AE=98=E6=96=B9?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E6=89=93=E6=A0=87=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加对text-ranking任务中文模型的单元测试,以方便得到官方模型打标。 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492754 --- tests/pipelines/test_text_ranking.py | 43 +++++++----- tests/trainers/test_finetune_text_ranking.py | 72 +++++++++++++++++++- 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/tests/pipelines/test_text_ranking.py b/tests/pipelines/test_text_ranking.py index ece3c617..57fa809c 100644 --- a/tests/pipelines/test_text_ranking.py +++ b/tests/pipelines/test_text_ranking.py @@ -13,7 +13,11 @@ from modelscope.utils.test_utils import test_level class TextRankingTest(unittest.TestCase): - model_id = 'damo/nlp_corom_passage-ranking_english-base' + models = [ + 'damo/nlp_corom_passage-ranking_english-base', + 'damo/nlp_rom_passage-ranking_chinese-base' + ] + inputs = { 'source_sentence': ["how long it take to get a master's degree"], 'sentences_to_compare': [ @@ -26,29 +30,32 @@ class TextRankingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_by_direct_model_download(self): - cache_path = snapshot_download(self.model_id) - tokenizer = TextRankingPreprocessor(cache_path) - model = TextRanking.from_pretrained(cache_path) - pipeline1 = TextRankingPipeline(model, preprocessor=tokenizer) - pipeline2 = pipeline( - Tasks.text_ranking, model=model, preprocessor=tokenizer) - print(f'sentence: {self.inputs}\n' - f'pipeline1:{pipeline1(input=self.inputs)}') - print() - print(f'pipeline2: {pipeline2(input=self.inputs)}') + for model_id in self.models: + cache_path = snapshot_download(model_id) + tokenizer = TextRankingPreprocessor(cache_path) + model = TextRanking.from_pretrained(cache_path) + pipeline1 = TextRankingPipeline(model, preprocessor=tokenizer) + pipeline2 = pipeline( + Tasks.text_ranking, model=model, preprocessor=tokenizer) + print(f'sentence: {self.inputs}\n' + f'pipeline1:{pipeline1(input=self.inputs)}') + print() + print(f'pipeline2: {pipeline2(input=self.inputs)}') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): - model = Model.from_pretrained(self.model_id) - tokenizer = TextRankingPreprocessor(model.model_dir) - pipeline_ins = pipeline( - task=Tasks.text_ranking, model=model, preprocessor=tokenizer) - print(pipeline_ins(input=self.inputs)) + for model_id in self.models: + model = Model.from_pretrained(model_id) + tokenizer = TextRankingPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.text_ranking, model=model, preprocessor=tokenizer) + print(pipeline_ins(input=self.inputs)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline(task=Tasks.text_ranking, model=self.model_id) - print(pipeline_ins(input=self.inputs)) + for model_id in self.models: + pipeline_ins = pipeline(task=Tasks.text_ranking, model=model_id) + print(pipeline_ins(input=self.inputs)) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): diff --git a/tests/trainers/test_finetune_text_ranking.py b/tests/trainers/test_finetune_text_ranking.py index e603bff2..3561cb46 100644 --- a/tests/trainers/test_finetune_text_ranking.py +++ b/tests/trainers/test_finetune_text_ranking.py @@ -14,6 +14,7 @@ from modelscope.msdatasets import MsDataset from modelscope.pipelines import pipeline from modelscope.trainers import build_trainer from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.test_utils import test_level class TestFinetuneSequenceClassification(unittest.TestCase): @@ -58,6 +59,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_finetune_msmarco(self): def cfg_modify_fn(cfg): @@ -70,7 +72,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): 'query_sequence': 'query', 'pos_sequence': 'positive_passages', 'neg_sequence': 'negative_passages', - 'passage_text_fileds': ['title', 'text'], + 'text_fileds': ['title', 'text'], 'qid_field': 'query_id' }, 'val': { @@ -78,7 +80,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): 'query_sequence': 'query', 'pos_sequence': 'positive_passages', 'neg_sequence': 'negative_passages', - 'passage_text_fileds': ['title', 'text'], + 'text_fileds': ['title', 'text'], 'qid_field': 'query_id' }, } @@ -112,7 +114,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): # load dataset ds = MsDataset.load('passage-ranking-demo', 'zyznull') train_ds = ds['train'].to_hf_dataset() - dev_ds = ds['train'].to_hf_dataset() + dev_ds = ds['dev'].to_hf_dataset() model_id = 'damo/nlp_corom_passage-ranking_english-base' self.finetune( @@ -124,6 +126,70 @@ class TestFinetuneSequenceClassification(unittest.TestCase): output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR) self.pipeline_text_ranking(output_dir) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_finetune_dureader(self): + + def cfg_modify_fn(cfg): + cfg.task = 'text-ranking' + cfg['preprocessor'] = {'type': 'text-ranking'} + cfg.train.optimizer.lr = 2e-5 + cfg['dataset'] = { + 'train': { + 'type': 'bert', + 'query_sequence': 'query', + 'pos_sequence': 'positive_passages', + 'neg_sequence': 'negative_passages', + 'text_fileds': ['text'], + 'qid_field': 'query_id' + }, + 'val': { + 'type': 'bert', + 'query_sequence': 'query', + 'pos_sequence': 'positive_passages', + 'neg_sequence': 'negative_passages', + 'text_fileds': ['text'], + 'qid_field': 'query_id' + }, + } + cfg['train']['neg_samples'] = 4 + cfg['evaluation']['dataloader']['batch_size_per_gpu'] = 30 + cfg.train.max_epochs = 1 + cfg.train.train_batch_size = 4 + cfg.train.lr_scheduler = { + 'type': 'LinearLR', + 'start_factor': 1.0, + 'end_factor': 0.0, + 'options': { + 'by_epoch': False + } + } + cfg.train.hooks = [{ + 'type': 'CheckpointHook', + 'interval': 1 + }, { + 'type': 'TextLoggerHook', + 'interval': 1 + }, { + 'type': 'IterTimerHook' + }, { + 'type': 'EvaluationHook', + 'by_epoch': False, + 'interval': 5000 + }] + return cfg + + # load dataset + ds = MsDataset.load('dureader-retrieval-ranking', 'zyznull') + train_ds = ds['train'].to_hf_dataset() + dev_ds = ds['dev'].to_hf_dataset() + + model_id = 'damo/nlp_rom_passage-ranking_chinese-base' + self.finetune( + model_id=model_id, + train_dataset=train_ds, + eval_dataset=dev_ds, + cfg_modify_fn=cfg_modify_fn) + def pipeline_text_ranking(self, model_dir): model = Model.from_pretrained(model_dir) pipeline_ins = pipeline(task=Tasks.text_ranking, model=model)