From a272d00c540f1c37c55fb5f97a576bc46f33ad24 Mon Sep 17 00:00:00 2001 From: "jinmao.yk" Date: Tue, 31 Jan 2023 14:34:05 +0000 Subject: [PATCH] adjust video_human_matting output of video to support demo service Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11486472 --- modelscope/outputs/outputs.py | 3 ++- .../pipelines/cv/video_human_matting_pipeline.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index dec5084f..76a5c779 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -534,8 +534,9 @@ TASK_OUTPUTS = { # video human matting result for a single video # { # "masks": [np.array # 2D array with shape [height, width]] + # "output_video": "path_to_matting_video" # } - Tasks.video_human_matting: [OutputKeys.MASKS], + Tasks.video_human_matting: [OutputKeys.MASKS, OutputKeys.OUTPUT_VIDEO], # ============ nlp tasks =================== diff --git a/modelscope/pipelines/cv/video_human_matting_pipeline.py b/modelscope/pipelines/cv/video_human_matting_pipeline.py index e9a05d84..a9035dd3 100644 --- a/modelscope/pipelines/cv/video_human_matting_pipeline.py +++ b/modelscope/pipelines/cv/video_human_matting_pipeline.py @@ -37,9 +37,11 @@ class VideoHumanMattingPipeline(Pipeline): def preprocess(self, input) -> Input: return input - def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + def forward(self, input: Dict[str, Any], + **forward_params) -> Dict[str, Any]: video_path = input['video_input_path'] out_path = input['output_path'] + render = forward_params.get('render', False) video_input = cv2.VideoCapture(video_path) fps = video_input.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'mp4v') @@ -58,19 +60,19 @@ class VideoHumanMattingPipeline(Pipeline): frame_tensor = preprocess(frame) pha, *rec = self.model.model( frame_tensor.to(self.device), *rec, downsample_ratio=scale) - com = pha * 255 - com = com.repeat(1, 3, 1, 1) - com = com[0].data.cpu().numpy().transpose(1, 2, - 0).astype(np.uint8) + mask = pha * 255 + mask = mask[0].data.cpu().numpy().transpose(1, 2, 0) + com = mask.repeat(3, 2).astype(np.uint8) video_save.write(com) - masks.append(com / 255) + masks.append((mask / 255).astype(np.uint8)) success, frame = video_input.read() logger.info('matting process done') video_input.release() video_save.release() return { - OutputKeys.MASKS: masks, + OutputKeys.MASKS: None if render else masks, + OutputKeys.OUTPUT_VIDEO: out_path } def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: