From ba3db0f552b408e4196bb033a632070518b88078 Mon Sep 17 00:00:00 2001 From: "siyang.ssy" Date: Tue, 25 Oct 2022 22:56:14 +0800 Subject: [PATCH] [to #42322933] fix video embedding output Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10525516 --- .../multi_modal/mmr/models/clip_for_mm_video_embedding.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py index f1b1a6c7..0cc040c6 100644 --- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py +++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py @@ -236,8 +236,10 @@ class VideoCLIPForMultiModalEmbedding(TorchModel): logger.info('text feature: {}'.format(sequence_output[0][0][0])) logger.info('video feature: {}'.format(visual_output[0][0][0])) - output[OutputKeys.VIDEO_EMBEDDING] = visual_output - output[OutputKeys.TEXT_EMBEDDING] = sequence_output + output[ + OutputKeys.VIDEO_EMBEDDING] = visual_output.cpu().detach().numpy() + output[OutputKeys.TEXT_EMBEDDING] = sequence_output.cpu().detach( + ).numpy() return output def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: