From eb4bd99a7e69b50ec04d1cfeb779eb1f3013af87 Mon Sep 17 00:00:00 2001
From: Kang <66989698+kangzhao2@users.noreply.github.com>
Date: Tue, 22 Aug 2023 10:57:23 +0800
Subject: [PATCH] fix video output of image2video (#488)

* fix video output

* fix logger.error

* fix log error
---
 .../image_to_video/modules/autoencoder.py     |  9 +++++---
 .../image_to_video/modules/embedder.py        |  1 -
 .../multi_modal/image_to_video_pipeline.py    | 23 ++++++++++++-------
 .../multi_modal/video_to_video_pipeline.py    |  5 ++--
 tests/pipelines/test_image2video.py           |  2 +-
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/modelscope/models/multi_modal/image_to_video/modules/autoencoder.py b/modelscope/models/multi_modal/image_to_video/modules/autoencoder.py
index 935134bc..34e8b7b0 100755
--- a/modelscope/models/multi_modal/image_to_video/modules/autoencoder.py
+++ b/modelscope/models/multi_modal/image_to_video/modules/autoencoder.py
@@ -1,13 +1,16 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import collections
-import logging
 
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
 
 def nonlinearity(x):
     # swish
@@ -348,7 +351,7 @@ class Decoder(nn.Module):
         block_in = ch * ch_mult[self.num_resolutions - 1]
         curr_res = resolution // 2**(self.num_resolutions - 1)
         self.z_shape = (1, z_channels, curr_res, curr_res)
-        logging.info('Working with z of shape {} = {} dimensions.'.format(
+        logger.info('Working with z of shape {} = {} dimensions.'.format(
             self.z_shape, np.prod(self.z_shape)))
 
         # z to block_in
@@ -478,7 +481,7 @@ class AutoencoderKL(nn.Module):
                 k_new = k.split('first_stage_model.')[-1]
                 sd_new[k_new] = sd[k]
         self.load_state_dict(sd_new, strict=True)
-        logging.info(f'Restored from {path}')
+        logger.info(f'Restored from {path}')
 
     def on_train_batch_end(self, *args, **kwargs):
         if self.use_ema:
diff --git a/modelscope/models/multi_modal/image_to_video/modules/embedder.py b/modelscope/models/multi_modal/image_to_video/modules/embedder.py
index 39063a57..69445667 100755
--- a/modelscope/models/multi_modal/image_to_video/modules/embedder.py
+++ b/modelscope/models/multi_modal/image_to_video/modules/embedder.py
@@ -1,6 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-import logging
 import os
 
 import numpy as np
diff --git a/modelscope/pipelines/multi_modal/image_to_video_pipeline.py b/modelscope/pipelines/multi_modal/image_to_video_pipeline.py
index 7ac71d16..b649d189 100644
--- a/modelscope/pipelines/multi_modal/image_to_video_pipeline.py
+++ b/modelscope/pipelines/multi_modal/image_to_video_pipeline.py
@@ -1,6 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import os
+import subprocess
 import tempfile
 from typing import Any, Dict, Optional
 
@@ -72,14 +73,20 @@ class ImageToVideoPipeline(Pipeline):
             output_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name
             temp_video_file = True
 
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        h, w, c = video[0].shape
-        video_writer = cv2.VideoWriter(
-            output_video_path, fourcc, fps=8, frameSize=(w, h))
-        for i in range(len(video)):
-            img = cv2.cvtColor(video[i], cv2.COLOR_RGB2BGR)
-            video_writer.write(img)
-        video_writer.release()
+        temp_dir = tempfile.mkdtemp()
+        for fid, frame in enumerate(video):
+            tpth = os.path.join(temp_dir, '%06d.png' % (fid + 1))
+            cv2.imwrite(tpth, frame[:, :, ::-1],
+                        [int(cv2.IMWRITE_JPEG_QUALITY), 100])
+
+        cmd = f'ffmpeg -y -f image2 -loglevel quiet -framerate 8.0 -i {temp_dir}/%06d.png \
+        -vcodec libx264 -crf 17 -pix_fmt yuv420p {output_video_path}'
+
+        status, output = subprocess.getstatusoutput(cmd)
+        if status != 0:
+            logger.error('Save Video Error with {}'.format(output))
+        os.system(f'rm -rf {temp_dir}')
+
         if temp_video_file:
             video_file_content = b''
             with open(output_video_path, 'rb') as f:
diff --git a/modelscope/pipelines/multi_modal/video_to_video_pipeline.py b/modelscope/pipelines/multi_modal/video_to_video_pipeline.py
index 36e6544d..a3b89ca6 100644
--- a/modelscope/pipelines/multi_modal/video_to_video_pipeline.py
+++ b/modelscope/pipelines/multi_modal/video_to_video_pipeline.py
@@ -1,6 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import os
+import subprocess
 import tempfile
 from typing import Any, Dict, Optional
 
@@ -111,9 +112,9 @@ class VideoToVideoPipeline(Pipeline):
         cmd = f'ffmpeg -y -f image2 -loglevel quiet -framerate 8.0 -i {temp_dir}/%06d.png \
         -vcodec libx264 -crf 17 -pix_fmt yuv420p {output_video_path}'
 
-        status = os.system(cmd)
+        status, output = subprocess.getstatusoutput(cmd)
         if status != 0:
-            logger.info('Save Video Error with {}'.format(status))
+            logger.error('Save Video Error with {}'.format(output))
         os.system(f'rm -rf {temp_dir}')
 
         if temp_video_file:
diff --git a/tests/pipelines/test_image2video.py b/tests/pipelines/test_image2video.py
index b6daf73c..e7a1c01c 100644
--- a/tests/pipelines/test_image2video.py
+++ b/tests/pipelines/test_image2video.py
@@ -4,7 +4,7 @@ import unittest
 from modelscope.models import Model
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
-from modelscope.utils.constant import DownloadMode, Tasks
+from modelscope.utils.constant import Tasks
 from modelscope.utils.test_utils import test_level