From cba07bb2fd3b970a7223c8827a10fe5255c28113 Mon Sep 17 00:00:00 2001 From: Wang Qiang <37444407+XDUWQ@users.noreply.github.com> Date: Wed, 16 Aug 2023 21:03:11 +0800 Subject: [PATCH] Fix bugs of configs file path and duration (#476) * fix bugs of configs file path and duration * pre commit * delete configs * test videocomposer model version --- .../videocomposer/configs/base.yaml | 2 - .../configs/exp01_vidcomposer_full.yaml | 20 --------- .../configs/exp02_motion_transfer.yaml | 23 ----------- .../exp02_motion_transfer_vs_style.yaml | 24 ----------- .../configs/exp03_sketch2video_style.yaml | 26 ------------ .../configs/exp04_sketch2video_wo_style.yaml | 26 ------------ .../configs/exp05_text_depths_wo_style.yaml | 26 ------------ .../configs/exp06_text_depths_vs_style.yaml | 26 ------------ .../exp10_vidcomposer_no_watermark_full.yaml | 21 ---------- .../multi_modal/videocomposer/ops/utils.py | 14 ++++--- .../multi_modal/videocomposer/utils/config.py | 41 ++++++++++--------- .../videocomposer/videocomposer_model.py | 14 ++++++- tests/pipelines/test_videocomposer.py | 2 +- 13 files changed, 42 insertions(+), 223 deletions(-) delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/base.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp01_vidcomposer_full.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer_vs_style.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp03_sketch2video_style.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp04_sketch2video_wo_style.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp05_text_depths_wo_style.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml delete mode 100644 modelscope/models/multi_modal/videocomposer/configs/exp10_vidcomposer_no_watermark_full.yaml diff --git a/modelscope/models/multi_modal/videocomposer/configs/base.yaml b/modelscope/models/multi_modal/videocomposer/configs/base.yaml deleted file mode 100644 index 42f756f8..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/base.yaml +++ /dev/null @@ -1,2 +0,0 @@ -ENABLE: true -DATASET: webvid10m diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp01_vidcomposer_full.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp01_vidcomposer_full.yaml deleted file mode 100644 index ec312138..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp01_vidcomposer_full.yaml +++ /dev/null @@ -1,20 +0,0 @@ -TASK_TYPE: MULTI_TASK -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -num_workers: 1 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_228000.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer.yaml deleted file mode 100644 index 4b756d32..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer.yaml +++ /dev/null @@ -1,23 +0,0 @@ -TASK_TYPE: SINGLE_TASK -read_image: True # You NEED Open It -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -guidances: ['y', 'local_image', 'motion'] # You NEED Open It -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -seed: 182 -num_workers: 0 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_228000.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer_vs_style.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer_vs_style.yaml deleted file mode 100644 index 7928e7ba..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp02_motion_transfer_vs_style.yaml +++ /dev/null @@ -1,24 +0,0 @@ -TASK_TYPE: SINGLE_TASK -read_image: True # You NEED Open It -read_style: True -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -guidances: ['y', 'local_image', 'image', 'motion'] # You NEED Open It -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -seed: 182 -num_workers: 0 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_228000.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp03_sketch2video_style.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp03_sketch2video_style.yaml deleted file mode 100644 index fd710ee5..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp03_sketch2video_style.yaml +++ /dev/null @@ -1,26 +0,0 @@ -TASK_TYPE: SINGLE_TASK -read_image: False # You NEED Open It -read_style: True -read_sketch: True -save_origin_video: False -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -guidances: ['y', 'image', 'single_sketch'] # You NEED Open It -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -seed: 182 -num_workers: 0 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_228000.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp04_sketch2video_wo_style.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp04_sketch2video_wo_style.yaml deleted file mode 100644 index a5cc54bf..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp04_sketch2video_wo_style.yaml +++ /dev/null @@ -1,26 +0,0 @@ -TASK_TYPE: SINGLE_TASK -read_image: False # You NEED Open It -read_style: False -read_sketch: True -save_origin_video: False -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -guidances: ['y', 'single_sketch'] # You NEED Open It -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -seed: 182 -num_workers: 0 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_228000.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp05_text_depths_wo_style.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp05_text_depths_wo_style.yaml deleted file mode 100644 index 29c053b1..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp05_text_depths_wo_style.yaml +++ /dev/null @@ -1,26 +0,0 @@ -TASK_TYPE: SINGLE_TASK -read_image: False # You NEED Open It -read_style: False -read_sketch: False -save_origin_video: True -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -guidances: ['y', 'depth'] # You NEED Open It -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -seed: 182 -num_workers: 0 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_228000.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml deleted file mode 100644 index 2732dc5c..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml +++ /dev/null @@ -1,26 +0,0 @@ -TASK_TYPE: SINGLE_TASK -read_image: False -read_style: True -read_sketch: False -save_origin_video: True -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -guidances: ['y', 'image', 'depth'] # You NEED Open It -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 228000 -seed: 182 -num_workers: 0 -mvs_visual: False -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_141000_no_watermark.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/configs/exp10_vidcomposer_no_watermark_full.yaml b/modelscope/models/multi_modal/videocomposer/configs/exp10_vidcomposer_no_watermark_full.yaml deleted file mode 100644 index 1be311d8..00000000 --- a/modelscope/models/multi_modal/videocomposer/configs/exp10_vidcomposer_no_watermark_full.yaml +++ /dev/null @@ -1,21 +0,0 @@ -TASK_TYPE: VideoComposer_Inference -ENABLE: true -DATASET: webvid10m -video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch'] -batch_sizes: { - "1": 1, - "4": 1, - "8": 1, - "16": 1, -} -vit_image_size: 224 -network_name: UNetSD_temporal -resume: true -resume_step: 141000 -seed: 14 -num_workers: 1 -mvs_visual: True -chunk_size: 1 -resume_checkpoint: "model_weights/non_ema_141000_no_watermark.pth" -log_dir: 'outputs' -num_steps: 1 diff --git a/modelscope/models/multi_modal/videocomposer/ops/utils.py b/modelscope/models/multi_modal/videocomposer/ops/utils.py index f9aadc15..f47ad8e5 100644 --- a/modelscope/models/multi_modal/videocomposer/ops/utils.py +++ b/modelscope/models/multi_modal/videocomposer/ops/utils.py @@ -62,7 +62,7 @@ def rand_name(length=8, suffix=''): def save_with_model_kwargs(model_kwargs, video_data, autoencoder, ori_video, - viz_num, step, caps, palette, cfg): + viz_num, step, caps, palette, cfg, duration): scale_factor = 0.18215 video_data = 1. / scale_factor * video_data @@ -99,7 +99,8 @@ def save_with_model_kwargs(model_kwargs, video_data, autoencoder, ori_video, cfg.mean, cfg.std, nrow=1, - save_origin_video=cfg.save_origin_video) + save_origin_video=cfg.save_origin_video, + duration=duration) texts = '\n'.join(caps[:viz_num]) open(text_key, 'w').writelines(texts) @@ -395,11 +396,11 @@ def save_image(bucket, @torch.no_grad() -def video_tensor_to_gif(tensor, path, duration=120, loop=0, optimize=True): +def video_tensor_to_gif(tensor, path, duration=200, loop=0, optimize=True): tensor = tensor.permute(1, 2, 3, 0) images = tensor.unbind(dim=0) images = [(image.numpy() * 255).astype('uint8') for image in images] - imageio.mimwrite(path, images, duration=125) + imageio.mimwrite(path, images, duration=duration) return images @@ -449,7 +450,8 @@ def save_video_multiple_conditions(oss_key, nrow=8, retry=5, save_origin_video=True, - bucket=None): + bucket=None, + duration=200): mean = torch.tensor(mean, device=video_tensor.device).view(1, -1, 1, 1, 1) std = torch.tensor(std, device=video_tensor.device).view(1, -1, 1, 1, 1) video_tensor = video_tensor.mul_(std).add_(mean) @@ -525,7 +527,7 @@ def save_video_multiple_conditions(oss_key, vid_gif, ], dim=3) - video_tensor_to_gif(vid_gif, filename) + video_tensor_to_gif(vid_gif, filename, duration=duration) exception = None break except Exception as e: diff --git a/modelscope/models/multi_modal/videocomposer/utils/config.py b/modelscope/models/multi_modal/videocomposer/utils/config.py index 18424257..059e3463 100644 --- a/modelscope/models/multi_modal/videocomposer/utils/config.py +++ b/modelscope/models/multi_modal/videocomposer/utils/config.py @@ -22,7 +22,17 @@ def setup_seed(seed): class Config(object): - def __init__(self, load=True, cfg_dict=None, cfg_level=None): + def __init__(self, + load=True, + cfg_dict=None, + cfg_level=None, + model_dir=None, + cfg_file_name='exp06_text_depths_vs_style.yaml'): + if model_dir is not None and os.path.isdir(model_dir): + self.model_dir = model_dir + '/configs' + else: + raise Exception(f'model_dir {model_dir} is not exist!') + self.cfg_file_name = cfg_file_name self._level = 'cfg' + ('.' + cfg_level if cfg_level is not None else '') if load: @@ -44,9 +54,7 @@ class Config(object): '--cfg', dest='cfg_file', help='Path to the configuration file', - default= - './modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml' - ) + default=os.path.join(self.model_dir, self.cfg_file_name)) parser.add_argument( '--init_method', help='Initialization method, includes TCP or shared file-system', @@ -104,17 +112,11 @@ class Config(object): def _initialize_cfg(self): if self.need_initialization: self.need_initialization = False - if os.path.exists( - './modelscope/models/multi_modal/videocomposer/configs/base.yaml' - ): - with open( - './modelscope/models/multi_modal/videocomposer/configs/base.yaml', - 'r') as f: + if os.path.exists(os.path.join(self.model_dir, 'base.yaml')): + with open(os.path.join(self.model_dir, 'base.yaml'), 'r') as f: cfg = yaml.load(f.read(), Loader=yaml.SafeLoader) else: - with open( - './modelscope/models/multi_modal/videocomposer/configs/base.yaml', - 'r') as f: + with open(os.path.join(self.model_dir, 'base.yaml'), 'r') as f: cfg = yaml.load(f.read(), Loader=yaml.SafeLoader) return cfg @@ -245,7 +247,12 @@ class Config(object): def recur(key, elem): if type(elem) is dict: - return key, Config(load=False, cfg_dict=elem, cfg_level=key) + return key, Config( + load=False, + cfg_dict=elem, + cfg_level=key, + model_dir=self.model_dir, + cfg_file_name=self.cfg_file_name) else: if type(elem) is str and elem[1:3] == 'e-': elem = float(elem) @@ -265,9 +272,3 @@ class Config(object): def deep_copy(self): return copy.deepcopy(self) - - -if __name__ == '__main__': - # debug - cfg = Config(load=True) - print(cfg.DATA) diff --git a/modelscope/models/multi_modal/videocomposer/videocomposer_model.py b/modelscope/models/multi_modal/videocomposer/videocomposer_model.py index 3e2a910c..c085e16f 100644 --- a/modelscope/models/multi_modal/videocomposer/videocomposer_model.py +++ b/modelscope/models/multi_modal/videocomposer/videocomposer_model.py @@ -73,11 +73,20 @@ class VideoComposer(TorchModel): super().__init__(model_dir=model_dir, *args, **kwargs) self.device = torch.device('cuda') if torch.cuda.is_available() \ else torch.device('cpu') + self.duration = kwargs.pop('duration', 200) clip_checkpoint = kwargs.pop('clip_checkpoint', 'open_clip_pytorch_model.bin') sd_checkpoint = kwargs.pop('sd_checkpoint', 'v2-1_512-ema-pruned.ckpt') - _cfg = Config(load=True) + cfg_file_name = kwargs.pop('cfg_file_name', + 'exp06_text_depths_vs_style.yaml') + _cfg = Config( + load=True, + cfg_dict=None, + cfg_level=None, + model_dir=model_dir, + cfg_file_name=cfg_file_name) cfg.update(_cfg.cfg_dict) + # rank-wise params l1 = len(cfg.frame_lens) l2 = len(cfg.feature_framerates) @@ -472,7 +481,8 @@ class VideoComposer(TorchModel): step=0, caps=caps, palette=palette, - cfg=self.cfg) + cfg=self.cfg, + duration=self.duration) return { 'video': video_output.type(torch.float32).cpu(), diff --git a/tests/pipelines/test_videocomposer.py b/tests/pipelines/test_videocomposer.py index 4cbca237..06441313 100644 --- a/tests/pipelines/test_videocomposer.py +++ b/tests/pipelines/test_videocomposer.py @@ -14,7 +14,7 @@ class VideoDeinterlaceTest(unittest.TestCase): def setUp(self) -> None: self.task = Tasks.text_to_video_synthesis self.model_id = 'buptwq/videocomposer' - self.model_revision = 'v1.0.1' + self.model_revision = 'v1.0.4' self.dataset_id = 'buptwq/videocomposer-depths-style' self.text = 'A glittering and translucent fish swimming in a \ small glass bowl with multicolored piece of stone, like a glass fish'