mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-24 03:59:23 +01:00
Merge remote-tracking branch 'origin/master' into ofa/finetune
This commit is contained in:
@@ -91,6 +91,10 @@ TASK_INPUTS = {
|
||||
InputType.IMAGE,
|
||||
Tasks.crowd_counting:
|
||||
InputType.IMAGE,
|
||||
Tasks.image_inpainting: {
|
||||
'img': InputType.IMAGE,
|
||||
'mask': InputType.IMAGE,
|
||||
},
|
||||
|
||||
# image generation task result for a single image
|
||||
Tasks.image_to_image_generation:
|
||||
|
||||
@@ -77,21 +77,22 @@ class ImageInpaintingPipeline(Pipeline):
|
||||
img, ((0, 0), (0, out_height - height), (0, out_width - width)),
|
||||
mode='symmetric')
|
||||
|
||||
def preprocess(self, input: Input) -> Dict[str, Any]:
|
||||
if isinstance(input, str):
|
||||
image_name, mask_name = input.split('+')
|
||||
def preprocess(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if isinstance(input['img'], str):
|
||||
image_name, mask_name = input['img'], input['mask']
|
||||
img = LoadImage.convert_to_ndarray(image_name)
|
||||
img = self.transforms(img)
|
||||
mask = np.array(LoadImage(mode='L')(mask_name)['img'])
|
||||
mask = self.transforms(mask)
|
||||
elif isinstance(input, PIL.Image.Image):
|
||||
img = input.crop((0, 0, int(input.width / 2), input.height))
|
||||
elif isinstance(input['img'], PIL.Image.Image):
|
||||
img = input['img']
|
||||
img = self.transforms(np.array(img))
|
||||
mask = input.crop((int(input.width / 2), 0, input.width,
|
||||
input.height)).convert('L')
|
||||
mask = input['mask'].convert('L')
|
||||
mask = self.transforms(np.array(mask))
|
||||
else:
|
||||
raise TypeError('input should be either str or PIL.Image')
|
||||
raise TypeError(
|
||||
'input should be either str or PIL.Image, and both inputs should have the same type'
|
||||
)
|
||||
result = dict(image=img, mask=mask[None, ...])
|
||||
|
||||
if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
|
||||
|
||||
@@ -20,6 +20,10 @@ class ImageInpaintingTest(unittest.TestCase):
|
||||
self.input_location = 'data/test/images/image_inpainting/image_inpainting.png'
|
||||
self.input_mask_location = 'data/test/images/image_inpainting/image_inpainting_mask.png'
|
||||
self.model_id = 'damo/cv_fft_inpainting_lama'
|
||||
self.input = {
|
||||
'img': self.input_location,
|
||||
'mask': self.input_mask_location
|
||||
}
|
||||
|
||||
def save_result(self, result):
|
||||
vis_img = result[OutputKeys.OUTPUT_IMG]
|
||||
@@ -28,8 +32,7 @@ class ImageInpaintingTest(unittest.TestCase):
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_inpainting(self):
|
||||
inpainting = pipeline(Tasks.image_inpainting, model=self.model_id)
|
||||
result = inpainting(self.input_location + '+'
|
||||
+ self.input_mask_location)
|
||||
result = inpainting(self.input)
|
||||
if result:
|
||||
self.save_result(result)
|
||||
else:
|
||||
@@ -41,8 +44,7 @@ class ImageInpaintingTest(unittest.TestCase):
|
||||
# if input image is HR, set refine=True is more better
|
||||
inpainting = pipeline(
|
||||
Tasks.image_inpainting, model=self.model_id, refine=True)
|
||||
result = inpainting(self.input_location + '+'
|
||||
+ self.input_mask_location)
|
||||
result = inpainting(self.input)
|
||||
if result:
|
||||
self.save_result(result)
|
||||
else:
|
||||
@@ -53,10 +55,7 @@ class ImageInpaintingTest(unittest.TestCase):
|
||||
inpainting = pipeline(Tasks.image_inpainting, model=self.model_id)
|
||||
img = Image.open(self.input_location).convert('RGB')
|
||||
mask = Image.open(self.input_mask_location).convert('RGB')
|
||||
img_new = Image.new('RGB', (img.width + mask.width, img.height))
|
||||
img_new.paste(img, (0, 0))
|
||||
img_new.paste(mask, (img.width, 0))
|
||||
result = inpainting(img_new)
|
||||
result = inpainting({'img': img, 'mask': mask})
|
||||
if result:
|
||||
self.save_result(result)
|
||||
else:
|
||||
@@ -65,8 +64,7 @@ class ImageInpaintingTest(unittest.TestCase):
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_inpainting_with_default_task(self):
|
||||
inpainting = pipeline(Tasks.image_inpainting)
|
||||
result = inpainting(self.input_location + '+'
|
||||
+ self.input_mask_location)
|
||||
result = inpainting(self.input)
|
||||
if result:
|
||||
self.save_result(result)
|
||||
else:
|
||||
|
||||
@@ -19,14 +19,11 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
|
||||
self.model_id = 'damo/multi-modal_clip-vit-base-patch16_zh'
|
||||
|
||||
test_input = {'text': '皮卡丘'}
|
||||
model_version = 'dev'
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run(self):
|
||||
pipeline_multi_modal_embedding = pipeline(
|
||||
Tasks.multi_modal_embedding,
|
||||
model=self.model_id,
|
||||
model_revision=self.model_version)
|
||||
Tasks.multi_modal_embedding, model=self.model_id)
|
||||
text_embedding = pipeline_multi_modal_embedding(
|
||||
self.test_input)[OutputKeys.TEXT_EMBEDDING]
|
||||
print('l1-norm: {}'.format(
|
||||
@@ -36,8 +33,7 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_run_with_model_from_modelhub(self):
|
||||
model = Model.from_pretrained(
|
||||
self.model_id, revision=self.model_version)
|
||||
model = Model.from_pretrained(self.model_id)
|
||||
pipeline_multi_modal_embedding = pipeline(
|
||||
task=Tasks.multi_modal_embedding, model=model)
|
||||
text_embedding = pipeline_multi_modal_embedding(
|
||||
@@ -50,8 +46,7 @@ class MultiModalEmbeddingTest(unittest.TestCase, DemoCompatibilityCheck):
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_run_with_default_model(self):
|
||||
pipeline_multi_modal_embedding = pipeline(
|
||||
task=Tasks.multi_modal_embedding,
|
||||
model_revision=self.model_version)
|
||||
task=Tasks.multi_modal_embedding)
|
||||
text_embedding = pipeline_multi_modal_embedding(
|
||||
self.test_input)[OutputKeys.TEXT_EMBEDDING]
|
||||
print('l1-norm: {}'.format(
|
||||
|
||||
@@ -13,7 +13,11 @@ from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TextRankingTest(unittest.TestCase):
|
||||
model_id = 'damo/nlp_corom_passage-ranking_english-base'
|
||||
models = [
|
||||
'damo/nlp_corom_passage-ranking_english-base',
|
||||
'damo/nlp_rom_passage-ranking_chinese-base'
|
||||
]
|
||||
|
||||
inputs = {
|
||||
'source_sentence': ["how long it take to get a master's degree"],
|
||||
'sentences_to_compare': [
|
||||
@@ -26,29 +30,32 @@ class TextRankingTest(unittest.TestCase):
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_run_by_direct_model_download(self):
|
||||
cache_path = snapshot_download(self.model_id)
|
||||
tokenizer = TextRankingPreprocessor(cache_path)
|
||||
model = TextRanking.from_pretrained(cache_path)
|
||||
pipeline1 = TextRankingPipeline(model, preprocessor=tokenizer)
|
||||
pipeline2 = pipeline(
|
||||
Tasks.text_ranking, model=model, preprocessor=tokenizer)
|
||||
print(f'sentence: {self.inputs}\n'
|
||||
f'pipeline1:{pipeline1(input=self.inputs)}')
|
||||
print()
|
||||
print(f'pipeline2: {pipeline2(input=self.inputs)}')
|
||||
for model_id in self.models:
|
||||
cache_path = snapshot_download(model_id)
|
||||
tokenizer = TextRankingPreprocessor(cache_path)
|
||||
model = TextRanking.from_pretrained(cache_path)
|
||||
pipeline1 = TextRankingPipeline(model, preprocessor=tokenizer)
|
||||
pipeline2 = pipeline(
|
||||
Tasks.text_ranking, model=model, preprocessor=tokenizer)
|
||||
print(f'sentence: {self.inputs}\n'
|
||||
f'pipeline1:{pipeline1(input=self.inputs)}')
|
||||
print()
|
||||
print(f'pipeline2: {pipeline2(input=self.inputs)}')
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_with_model_from_modelhub(self):
|
||||
model = Model.from_pretrained(self.model_id)
|
||||
tokenizer = TextRankingPreprocessor(model.model_dir)
|
||||
pipeline_ins = pipeline(
|
||||
task=Tasks.text_ranking, model=model, preprocessor=tokenizer)
|
||||
print(pipeline_ins(input=self.inputs))
|
||||
for model_id in self.models:
|
||||
model = Model.from_pretrained(model_id)
|
||||
tokenizer = TextRankingPreprocessor(model.model_dir)
|
||||
pipeline_ins = pipeline(
|
||||
task=Tasks.text_ranking, model=model, preprocessor=tokenizer)
|
||||
print(pipeline_ins(input=self.inputs))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_run_with_model_name(self):
|
||||
pipeline_ins = pipeline(task=Tasks.text_ranking, model=self.model_id)
|
||||
print(pipeline_ins(input=self.inputs))
|
||||
for model_id in self.models:
|
||||
pipeline_ins = pipeline(task=Tasks.text_ranking, model=model_id)
|
||||
print(pipeline_ins(input=self.inputs))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_run_with_default_model(self):
|
||||
|
||||
@@ -14,6 +14,7 @@ from modelscope.msdatasets import MsDataset
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
@@ -58,6 +59,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
results_files = os.listdir(self.tmp_dir)
|
||||
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_finetune_msmarco(self):
|
||||
|
||||
def cfg_modify_fn(cfg):
|
||||
@@ -70,7 +72,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
'query_sequence': 'query',
|
||||
'pos_sequence': 'positive_passages',
|
||||
'neg_sequence': 'negative_passages',
|
||||
'passage_text_fileds': ['title', 'text'],
|
||||
'text_fileds': ['title', 'text'],
|
||||
'qid_field': 'query_id'
|
||||
},
|
||||
'val': {
|
||||
@@ -78,7 +80,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
'query_sequence': 'query',
|
||||
'pos_sequence': 'positive_passages',
|
||||
'neg_sequence': 'negative_passages',
|
||||
'passage_text_fileds': ['title', 'text'],
|
||||
'text_fileds': ['title', 'text'],
|
||||
'qid_field': 'query_id'
|
||||
},
|
||||
}
|
||||
@@ -112,7 +114,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
# load dataset
|
||||
ds = MsDataset.load('passage-ranking-demo', 'zyznull')
|
||||
train_ds = ds['train'].to_hf_dataset()
|
||||
dev_ds = ds['train'].to_hf_dataset()
|
||||
dev_ds = ds['dev'].to_hf_dataset()
|
||||
|
||||
model_id = 'damo/nlp_corom_passage-ranking_english-base'
|
||||
self.finetune(
|
||||
@@ -124,6 +126,70 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
|
||||
output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR)
|
||||
self.pipeline_text_ranking(output_dir)
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_finetune_dureader(self):
|
||||
|
||||
def cfg_modify_fn(cfg):
|
||||
cfg.task = 'text-ranking'
|
||||
cfg['preprocessor'] = {'type': 'text-ranking'}
|
||||
cfg.train.optimizer.lr = 2e-5
|
||||
cfg['dataset'] = {
|
||||
'train': {
|
||||
'type': 'bert',
|
||||
'query_sequence': 'query',
|
||||
'pos_sequence': 'positive_passages',
|
||||
'neg_sequence': 'negative_passages',
|
||||
'text_fileds': ['text'],
|
||||
'qid_field': 'query_id'
|
||||
},
|
||||
'val': {
|
||||
'type': 'bert',
|
||||
'query_sequence': 'query',
|
||||
'pos_sequence': 'positive_passages',
|
||||
'neg_sequence': 'negative_passages',
|
||||
'text_fileds': ['text'],
|
||||
'qid_field': 'query_id'
|
||||
},
|
||||
}
|
||||
cfg['train']['neg_samples'] = 4
|
||||
cfg['evaluation']['dataloader']['batch_size_per_gpu'] = 30
|
||||
cfg.train.max_epochs = 1
|
||||
cfg.train.train_batch_size = 4
|
||||
cfg.train.lr_scheduler = {
|
||||
'type': 'LinearLR',
|
||||
'start_factor': 1.0,
|
||||
'end_factor': 0.0,
|
||||
'options': {
|
||||
'by_epoch': False
|
||||
}
|
||||
}
|
||||
cfg.train.hooks = [{
|
||||
'type': 'CheckpointHook',
|
||||
'interval': 1
|
||||
}, {
|
||||
'type': 'TextLoggerHook',
|
||||
'interval': 1
|
||||
}, {
|
||||
'type': 'IterTimerHook'
|
||||
}, {
|
||||
'type': 'EvaluationHook',
|
||||
'by_epoch': False,
|
||||
'interval': 5000
|
||||
}]
|
||||
return cfg
|
||||
|
||||
# load dataset
|
||||
ds = MsDataset.load('dureader-retrieval-ranking', 'zyznull')
|
||||
train_ds = ds['train'].to_hf_dataset()
|
||||
dev_ds = ds['dev'].to_hf_dataset()
|
||||
|
||||
model_id = 'damo/nlp_rom_passage-ranking_chinese-base'
|
||||
self.finetune(
|
||||
model_id=model_id,
|
||||
train_dataset=train_ds,
|
||||
eval_dataset=dev_ds,
|
||||
cfg_modify_fn=cfg_modify_fn)
|
||||
|
||||
def pipeline_text_ranking(self, model_dir):
|
||||
model = Model.from_pretrained(model_dir)
|
||||
pipeline_ins = pipeline(task=Tasks.text_ranking, model=model)
|
||||
|
||||
Reference in New Issue
Block a user