adjust video_human_matting output of video to support demo service

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11486472
2025-12-25 12:39:25 +01:00 · 2023-01-31 14:34:05 +00:00
parent 3eda77aa4f
commit a272d00c54
2 changed files with 11 additions and 8 deletions
--- a/modelscope/outputs/outputs.py
+++ b/modelscope/outputs/outputs.py
@@ -534,8 +534,9 @@ TASK_OUTPUTS = {
    # video human matting result for a single video
    #   {
    #       "masks": [np.array # 2D array with shape [height, width]]
+    #       "output_video": "path_to_matting_video"
    #   }
-    Tasks.video_human_matting: [OutputKeys.MASKS],
+    Tasks.video_human_matting: [OutputKeys.MASKS, OutputKeys.OUTPUT_VIDEO],

    # ============ nlp tasks ===================

--- a/modelscope/pipelines/cv/video_human_matting_pipeline.py
+++ b/modelscope/pipelines/cv/video_human_matting_pipeline.py
@@ -37,9 +37,11 @@ class VideoHumanMattingPipeline(Pipeline):
    def preprocess(self, input) -> Input:
        return input

-    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
+    def forward(self, input: Dict[str, Any],
+                **forward_params) -> Dict[str, Any]:
        video_path = input['video_input_path']
        out_path = input['output_path']
+        render = forward_params.get('render', False)
        video_input = cv2.VideoCapture(video_path)
        fps = video_input.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
@@ -58,19 +60,19 @@ class VideoHumanMattingPipeline(Pipeline):
                frame_tensor = preprocess(frame)
                pha, *rec = self.model.model(
                    frame_tensor.to(self.device), *rec, downsample_ratio=scale)
-                com = pha * 255
-                com = com.repeat(1, 3, 1, 1)
-                com = com[0].data.cpu().numpy().transpose(1, 2,
-                                                          0).astype(np.uint8)
+                mask = pha * 255
+                mask = mask[0].data.cpu().numpy().transpose(1, 2, 0)
+                com = mask.repeat(3, 2).astype(np.uint8)
                video_save.write(com)
-                masks.append(com / 255)
+                masks.append((mask / 255).astype(np.uint8))
                success, frame = video_input.read()
        logger.info('matting process done')
        video_input.release()
        video_save.release()

        return {
-            OutputKeys.MASKS: masks,
+            OutputKeys.MASKS: None if render else masks,
+            OutputKeys.OUTPUT_VIDEO: out_path
        }

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: