Fix bugs of configs file path and duration (#476)

* fix bugs of configs file path and duration

* pre commit

* delete configs

* test videocomposer model version
This commit is contained in:
Wang Qiang
2023-08-16 21:03:11 +08:00
committed by wenmeng.zwm
parent d7a2d64a0c
commit cba07bb2fd
13 changed files with 42 additions and 223 deletions

View File

@@ -1,2 +0,0 @@
ENABLE: true
DATASET: webvid10m

View File

@@ -1,20 +0,0 @@
TASK_TYPE: MULTI_TASK
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
num_workers: 1
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_228000.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,23 +0,0 @@
TASK_TYPE: SINGLE_TASK
read_image: True # You NEED Open It
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
guidances: ['y', 'local_image', 'motion'] # You NEED Open It
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
seed: 182
num_workers: 0
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_228000.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,24 +0,0 @@
TASK_TYPE: SINGLE_TASK
read_image: True # You NEED Open It
read_style: True
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
guidances: ['y', 'local_image', 'image', 'motion'] # You NEED Open It
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
seed: 182
num_workers: 0
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_228000.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,26 +0,0 @@
TASK_TYPE: SINGLE_TASK
read_image: False # You NEED Open It
read_style: True
read_sketch: True
save_origin_video: False
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
guidances: ['y', 'image', 'single_sketch'] # You NEED Open It
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
seed: 182
num_workers: 0
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_228000.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,26 +0,0 @@
TASK_TYPE: SINGLE_TASK
read_image: False # You NEED Open It
read_style: False
read_sketch: True
save_origin_video: False
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
guidances: ['y', 'single_sketch'] # You NEED Open It
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
seed: 182
num_workers: 0
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_228000.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,26 +0,0 @@
TASK_TYPE: SINGLE_TASK
read_image: False # You NEED Open It
read_style: False
read_sketch: False
save_origin_video: True
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
guidances: ['y', 'depth'] # You NEED Open It
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
seed: 182
num_workers: 0
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_228000.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,26 +0,0 @@
TASK_TYPE: SINGLE_TASK
read_image: False
read_style: True
read_sketch: False
save_origin_video: True
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
guidances: ['y', 'image', 'depth'] # You NEED Open It
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 228000
seed: 182
num_workers: 0
mvs_visual: False
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_141000_no_watermark.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -1,21 +0,0 @@
TASK_TYPE: VideoComposer_Inference
ENABLE: true
DATASET: webvid10m
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
batch_sizes: {
"1": 1,
"4": 1,
"8": 1,
"16": 1,
}
vit_image_size: 224
network_name: UNetSD_temporal
resume: true
resume_step: 141000
seed: 14
num_workers: 1
mvs_visual: True
chunk_size: 1
resume_checkpoint: "model_weights/non_ema_141000_no_watermark.pth"
log_dir: 'outputs'
num_steps: 1

View File

@@ -62,7 +62,7 @@ def rand_name(length=8, suffix=''):
def save_with_model_kwargs(model_kwargs, video_data, autoencoder, ori_video,
viz_num, step, caps, palette, cfg):
viz_num, step, caps, palette, cfg, duration):
scale_factor = 0.18215
video_data = 1. / scale_factor * video_data
@@ -99,7 +99,8 @@ def save_with_model_kwargs(model_kwargs, video_data, autoencoder, ori_video,
cfg.mean,
cfg.std,
nrow=1,
save_origin_video=cfg.save_origin_video)
save_origin_video=cfg.save_origin_video,
duration=duration)
texts = '\n'.join(caps[:viz_num])
open(text_key, 'w').writelines(texts)
@@ -395,11 +396,11 @@ def save_image(bucket,
@torch.no_grad()
def video_tensor_to_gif(tensor, path, duration=120, loop=0, optimize=True):
def video_tensor_to_gif(tensor, path, duration=200, loop=0, optimize=True):
tensor = tensor.permute(1, 2, 3, 0)
images = tensor.unbind(dim=0)
images = [(image.numpy() * 255).astype('uint8') for image in images]
imageio.mimwrite(path, images, duration=125)
imageio.mimwrite(path, images, duration=duration)
return images
@@ -449,7 +450,8 @@ def save_video_multiple_conditions(oss_key,
nrow=8,
retry=5,
save_origin_video=True,
bucket=None):
bucket=None,
duration=200):
mean = torch.tensor(mean, device=video_tensor.device).view(1, -1, 1, 1, 1)
std = torch.tensor(std, device=video_tensor.device).view(1, -1, 1, 1, 1)
video_tensor = video_tensor.mul_(std).add_(mean)
@@ -525,7 +527,7 @@ def save_video_multiple_conditions(oss_key,
vid_gif,
], dim=3)
video_tensor_to_gif(vid_gif, filename)
video_tensor_to_gif(vid_gif, filename, duration=duration)
exception = None
break
except Exception as e:

View File

@@ -22,7 +22,17 @@ def setup_seed(seed):
class Config(object):
def __init__(self, load=True, cfg_dict=None, cfg_level=None):
def __init__(self,
load=True,
cfg_dict=None,
cfg_level=None,
model_dir=None,
cfg_file_name='exp06_text_depths_vs_style.yaml'):
if model_dir is not None and os.path.isdir(model_dir):
self.model_dir = model_dir + '/configs'
else:
raise Exception(f'model_dir {model_dir} is not exist!')
self.cfg_file_name = cfg_file_name
self._level = 'cfg' + ('.'
+ cfg_level if cfg_level is not None else '')
if load:
@@ -44,9 +54,7 @@ class Config(object):
'--cfg',
dest='cfg_file',
help='Path to the configuration file',
default=
'./modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml'
)
default=os.path.join(self.model_dir, self.cfg_file_name))
parser.add_argument(
'--init_method',
help='Initialization method, includes TCP or shared file-system',
@@ -104,17 +112,11 @@ class Config(object):
def _initialize_cfg(self):
if self.need_initialization:
self.need_initialization = False
if os.path.exists(
'./modelscope/models/multi_modal/videocomposer/configs/base.yaml'
):
with open(
'./modelscope/models/multi_modal/videocomposer/configs/base.yaml',
'r') as f:
if os.path.exists(os.path.join(self.model_dir, 'base.yaml')):
with open(os.path.join(self.model_dir, 'base.yaml'), 'r') as f:
cfg = yaml.load(f.read(), Loader=yaml.SafeLoader)
else:
with open(
'./modelscope/models/multi_modal/videocomposer/configs/base.yaml',
'r') as f:
with open(os.path.join(self.model_dir, 'base.yaml'), 'r') as f:
cfg = yaml.load(f.read(), Loader=yaml.SafeLoader)
return cfg
@@ -245,7 +247,12 @@ class Config(object):
def recur(key, elem):
if type(elem) is dict:
return key, Config(load=False, cfg_dict=elem, cfg_level=key)
return key, Config(
load=False,
cfg_dict=elem,
cfg_level=key,
model_dir=self.model_dir,
cfg_file_name=self.cfg_file_name)
else:
if type(elem) is str and elem[1:3] == 'e-':
elem = float(elem)
@@ -265,9 +272,3 @@ class Config(object):
def deep_copy(self):
return copy.deepcopy(self)
if __name__ == '__main__':
# debug
cfg = Config(load=True)
print(cfg.DATA)

View File

@@ -73,11 +73,20 @@ class VideoComposer(TorchModel):
super().__init__(model_dir=model_dir, *args, **kwargs)
self.device = torch.device('cuda') if torch.cuda.is_available() \
else torch.device('cpu')
self.duration = kwargs.pop('duration', 200)
clip_checkpoint = kwargs.pop('clip_checkpoint',
'open_clip_pytorch_model.bin')
sd_checkpoint = kwargs.pop('sd_checkpoint', 'v2-1_512-ema-pruned.ckpt')
_cfg = Config(load=True)
cfg_file_name = kwargs.pop('cfg_file_name',
'exp06_text_depths_vs_style.yaml')
_cfg = Config(
load=True,
cfg_dict=None,
cfg_level=None,
model_dir=model_dir,
cfg_file_name=cfg_file_name)
cfg.update(_cfg.cfg_dict)
# rank-wise params
l1 = len(cfg.frame_lens)
l2 = len(cfg.feature_framerates)
@@ -472,7 +481,8 @@ class VideoComposer(TorchModel):
step=0,
caps=caps,
palette=palette,
cfg=self.cfg)
cfg=self.cfg,
duration=self.duration)
return {
'video': video_output.type(torch.float32).cpu(),

View File

@@ -14,7 +14,7 @@ class VideoDeinterlaceTest(unittest.TestCase):
def setUp(self) -> None:
self.task = Tasks.text_to_video_synthesis
self.model_id = 'buptwq/videocomposer'
self.model_revision = 'v1.0.1'
self.model_revision = 'v1.0.4'
self.dataset_id = 'buptwq/videocomposer-depths-style'
self.text = 'A glittering and translucent fish swimming in a \
small glass bowl with multicolored piece of stone, like a glass fish'