mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 12:39:25 +01:00
[to #42322933]兼容新增clip huge模型
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10585552 * compatiable with vit huge, and set clip base default mm-ebed pipeline
This commit is contained in:
@@ -349,11 +349,13 @@ class CLIP(nn.Module):
|
||||
text_num_hidden_layers: int,
|
||||
text_type_vocab_size: int,
|
||||
tokenizer: FullTokenizer,
|
||||
# vision_head_width, added this param for ViT-H
|
||||
vision_head_width: int = 64,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
if isinstance(vision_layers, (tuple, list)):
|
||||
vision_heads = vision_width * 32 // 64
|
||||
vision_heads = vision_width * 32 // vision_head_width
|
||||
self.visual = ModifiedResNet(
|
||||
layers=vision_layers,
|
||||
output_dim=embed_dim,
|
||||
@@ -361,7 +363,7 @@ class CLIP(nn.Module):
|
||||
input_resolution=image_resolution,
|
||||
width=vision_width)
|
||||
else:
|
||||
vision_heads = vision_width // 64
|
||||
vision_heads = vision_width // vision_head_width
|
||||
self.visual = VisualTransformer(
|
||||
input_resolution=image_resolution,
|
||||
patch_size=vision_patch_size,
|
||||
|
||||
@@ -93,9 +93,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
|
||||
'damo/cv_resnet50_live-category'),
|
||||
Tasks.video_category: (Pipelines.video_category,
|
||||
'damo/cv_resnet50_video-category'),
|
||||
Tasks.multi_modal_embedding:
|
||||
(Pipelines.multi_modal_embedding,
|
||||
'damo/multi-modal_clip-vit-large-patch14_zh'),
|
||||
Tasks.multi_modal_embedding: (Pipelines.multi_modal_embedding,
|
||||
'damo/multi-modal_clip-vit-base-patch16_zh'),
|
||||
Tasks.generative_multi_modal_embedding:
|
||||
(Pipelines.generative_multi_modal_embedding,
|
||||
'damo/multi-modal_gemm-vit-large-patch14_generative-multi-modal-embedding'
|
||||
|
||||
Reference in New Issue
Block a user