[to #42322933]兼容新增clip huge模型

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10585552

    * compatiable with vit huge, and set clip base default mm-ebed pipeline
This commit is contained in:
yichang.zyc
2022-11-01 09:57:31 +08:00
committed by yingda.chen
parent 06abae4dc6
commit 9187103e3a
2 changed files with 6 additions and 5 deletions

View File

@@ -349,11 +349,13 @@ class CLIP(nn.Module):
text_num_hidden_layers: int,
text_type_vocab_size: int,
tokenizer: FullTokenizer,
# vision_head_width, added this param for ViT-H
vision_head_width: int = 64,
):
super().__init__()
if isinstance(vision_layers, (tuple, list)):
vision_heads = vision_width * 32 // 64
vision_heads = vision_width * 32 // vision_head_width
self.visual = ModifiedResNet(
layers=vision_layers,
output_dim=embed_dim,
@@ -361,7 +363,7 @@ class CLIP(nn.Module):
input_resolution=image_resolution,
width=vision_width)
else:
vision_heads = vision_width // 64
vision_heads = vision_width // vision_head_width
self.visual = VisualTransformer(
input_resolution=image_resolution,
patch_size=vision_patch_size,

View File

@@ -93,9 +93,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
'damo/cv_resnet50_live-category'),
Tasks.video_category: (Pipelines.video_category,
'damo/cv_resnet50_video-category'),
Tasks.multi_modal_embedding:
(Pipelines.multi_modal_embedding,
'damo/multi-modal_clip-vit-large-patch14_zh'),
Tasks.multi_modal_embedding: (Pipelines.multi_modal_embedding,
'damo/multi-modal_clip-vit-base-patch16_zh'),
Tasks.generative_multi_modal_embedding:
(Pipelines.generative_multi_modal_embedding,
'damo/multi-modal_gemm-vit-large-patch14_generative-multi-modal-embedding'