mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 20:19:51 +01:00
Fix bugs of configs file path and duration (#476)
* fix bugs of configs file path and duration * pre commit * delete configs * test videocomposer model version
This commit is contained in:
@@ -1,2 +0,0 @@
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
@@ -1,20 +0,0 @@
|
||||
TASK_TYPE: MULTI_TASK
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
num_workers: 1
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_228000.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,23 +0,0 @@
|
||||
TASK_TYPE: SINGLE_TASK
|
||||
read_image: True # You NEED Open It
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
guidances: ['y', 'local_image', 'motion'] # You NEED Open It
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
seed: 182
|
||||
num_workers: 0
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_228000.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,24 +0,0 @@
|
||||
TASK_TYPE: SINGLE_TASK
|
||||
read_image: True # You NEED Open It
|
||||
read_style: True
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
guidances: ['y', 'local_image', 'image', 'motion'] # You NEED Open It
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
seed: 182
|
||||
num_workers: 0
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_228000.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,26 +0,0 @@
|
||||
TASK_TYPE: SINGLE_TASK
|
||||
read_image: False # You NEED Open It
|
||||
read_style: True
|
||||
read_sketch: True
|
||||
save_origin_video: False
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
guidances: ['y', 'image', 'single_sketch'] # You NEED Open It
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
seed: 182
|
||||
num_workers: 0
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_228000.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,26 +0,0 @@
|
||||
TASK_TYPE: SINGLE_TASK
|
||||
read_image: False # You NEED Open It
|
||||
read_style: False
|
||||
read_sketch: True
|
||||
save_origin_video: False
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
guidances: ['y', 'single_sketch'] # You NEED Open It
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
seed: 182
|
||||
num_workers: 0
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_228000.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,26 +0,0 @@
|
||||
TASK_TYPE: SINGLE_TASK
|
||||
read_image: False # You NEED Open It
|
||||
read_style: False
|
||||
read_sketch: False
|
||||
save_origin_video: True
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
guidances: ['y', 'depth'] # You NEED Open It
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
seed: 182
|
||||
num_workers: 0
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_228000.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,26 +0,0 @@
|
||||
TASK_TYPE: SINGLE_TASK
|
||||
read_image: False
|
||||
read_style: True
|
||||
read_sketch: False
|
||||
save_origin_video: True
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
guidances: ['y', 'image', 'depth'] # You NEED Open It
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 228000
|
||||
seed: 182
|
||||
num_workers: 0
|
||||
mvs_visual: False
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_141000_no_watermark.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -1,21 +0,0 @@
|
||||
TASK_TYPE: VideoComposer_Inference
|
||||
ENABLE: true
|
||||
DATASET: webvid10m
|
||||
video_compositions: ['text', 'mask', 'depthmap', 'sketch', 'motion', 'image', 'local_image', 'single_sketch']
|
||||
batch_sizes: {
|
||||
"1": 1,
|
||||
"4": 1,
|
||||
"8": 1,
|
||||
"16": 1,
|
||||
}
|
||||
vit_image_size: 224
|
||||
network_name: UNetSD_temporal
|
||||
resume: true
|
||||
resume_step: 141000
|
||||
seed: 14
|
||||
num_workers: 1
|
||||
mvs_visual: True
|
||||
chunk_size: 1
|
||||
resume_checkpoint: "model_weights/non_ema_141000_no_watermark.pth"
|
||||
log_dir: 'outputs'
|
||||
num_steps: 1
|
||||
@@ -62,7 +62,7 @@ def rand_name(length=8, suffix=''):
|
||||
|
||||
|
||||
def save_with_model_kwargs(model_kwargs, video_data, autoencoder, ori_video,
|
||||
viz_num, step, caps, palette, cfg):
|
||||
viz_num, step, caps, palette, cfg, duration):
|
||||
scale_factor = 0.18215
|
||||
video_data = 1. / scale_factor * video_data
|
||||
|
||||
@@ -99,7 +99,8 @@ def save_with_model_kwargs(model_kwargs, video_data, autoencoder, ori_video,
|
||||
cfg.mean,
|
||||
cfg.std,
|
||||
nrow=1,
|
||||
save_origin_video=cfg.save_origin_video)
|
||||
save_origin_video=cfg.save_origin_video,
|
||||
duration=duration)
|
||||
|
||||
texts = '\n'.join(caps[:viz_num])
|
||||
open(text_key, 'w').writelines(texts)
|
||||
@@ -395,11 +396,11 @@ def save_image(bucket,
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def video_tensor_to_gif(tensor, path, duration=120, loop=0, optimize=True):
|
||||
def video_tensor_to_gif(tensor, path, duration=200, loop=0, optimize=True):
|
||||
tensor = tensor.permute(1, 2, 3, 0)
|
||||
images = tensor.unbind(dim=0)
|
||||
images = [(image.numpy() * 255).astype('uint8') for image in images]
|
||||
imageio.mimwrite(path, images, duration=125)
|
||||
imageio.mimwrite(path, images, duration=duration)
|
||||
return images
|
||||
|
||||
|
||||
@@ -449,7 +450,8 @@ def save_video_multiple_conditions(oss_key,
|
||||
nrow=8,
|
||||
retry=5,
|
||||
save_origin_video=True,
|
||||
bucket=None):
|
||||
bucket=None,
|
||||
duration=200):
|
||||
mean = torch.tensor(mean, device=video_tensor.device).view(1, -1, 1, 1, 1)
|
||||
std = torch.tensor(std, device=video_tensor.device).view(1, -1, 1, 1, 1)
|
||||
video_tensor = video_tensor.mul_(std).add_(mean)
|
||||
@@ -525,7 +527,7 @@ def save_video_multiple_conditions(oss_key,
|
||||
vid_gif,
|
||||
], dim=3)
|
||||
|
||||
video_tensor_to_gif(vid_gif, filename)
|
||||
video_tensor_to_gif(vid_gif, filename, duration=duration)
|
||||
exception = None
|
||||
break
|
||||
except Exception as e:
|
||||
|
||||
@@ -22,7 +22,17 @@ def setup_seed(seed):
|
||||
|
||||
class Config(object):
|
||||
|
||||
def __init__(self, load=True, cfg_dict=None, cfg_level=None):
|
||||
def __init__(self,
|
||||
load=True,
|
||||
cfg_dict=None,
|
||||
cfg_level=None,
|
||||
model_dir=None,
|
||||
cfg_file_name='exp06_text_depths_vs_style.yaml'):
|
||||
if model_dir is not None and os.path.isdir(model_dir):
|
||||
self.model_dir = model_dir + '/configs'
|
||||
else:
|
||||
raise Exception(f'model_dir {model_dir} is not exist!')
|
||||
self.cfg_file_name = cfg_file_name
|
||||
self._level = 'cfg' + ('.'
|
||||
+ cfg_level if cfg_level is not None else '')
|
||||
if load:
|
||||
@@ -44,9 +54,7 @@ class Config(object):
|
||||
'--cfg',
|
||||
dest='cfg_file',
|
||||
help='Path to the configuration file',
|
||||
default=
|
||||
'./modelscope/models/multi_modal/videocomposer/configs/exp06_text_depths_vs_style.yaml'
|
||||
)
|
||||
default=os.path.join(self.model_dir, self.cfg_file_name))
|
||||
parser.add_argument(
|
||||
'--init_method',
|
||||
help='Initialization method, includes TCP or shared file-system',
|
||||
@@ -104,17 +112,11 @@ class Config(object):
|
||||
def _initialize_cfg(self):
|
||||
if self.need_initialization:
|
||||
self.need_initialization = False
|
||||
if os.path.exists(
|
||||
'./modelscope/models/multi_modal/videocomposer/configs/base.yaml'
|
||||
):
|
||||
with open(
|
||||
'./modelscope/models/multi_modal/videocomposer/configs/base.yaml',
|
||||
'r') as f:
|
||||
if os.path.exists(os.path.join(self.model_dir, 'base.yaml')):
|
||||
with open(os.path.join(self.model_dir, 'base.yaml'), 'r') as f:
|
||||
cfg = yaml.load(f.read(), Loader=yaml.SafeLoader)
|
||||
else:
|
||||
with open(
|
||||
'./modelscope/models/multi_modal/videocomposer/configs/base.yaml',
|
||||
'r') as f:
|
||||
with open(os.path.join(self.model_dir, 'base.yaml'), 'r') as f:
|
||||
cfg = yaml.load(f.read(), Loader=yaml.SafeLoader)
|
||||
return cfg
|
||||
|
||||
@@ -245,7 +247,12 @@ class Config(object):
|
||||
|
||||
def recur(key, elem):
|
||||
if type(elem) is dict:
|
||||
return key, Config(load=False, cfg_dict=elem, cfg_level=key)
|
||||
return key, Config(
|
||||
load=False,
|
||||
cfg_dict=elem,
|
||||
cfg_level=key,
|
||||
model_dir=self.model_dir,
|
||||
cfg_file_name=self.cfg_file_name)
|
||||
else:
|
||||
if type(elem) is str and elem[1:3] == 'e-':
|
||||
elem = float(elem)
|
||||
@@ -265,9 +272,3 @@ class Config(object):
|
||||
|
||||
def deep_copy(self):
|
||||
return copy.deepcopy(self)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# debug
|
||||
cfg = Config(load=True)
|
||||
print(cfg.DATA)
|
||||
|
||||
@@ -73,11 +73,20 @@ class VideoComposer(TorchModel):
|
||||
super().__init__(model_dir=model_dir, *args, **kwargs)
|
||||
self.device = torch.device('cuda') if torch.cuda.is_available() \
|
||||
else torch.device('cpu')
|
||||
self.duration = kwargs.pop('duration', 200)
|
||||
clip_checkpoint = kwargs.pop('clip_checkpoint',
|
||||
'open_clip_pytorch_model.bin')
|
||||
sd_checkpoint = kwargs.pop('sd_checkpoint', 'v2-1_512-ema-pruned.ckpt')
|
||||
_cfg = Config(load=True)
|
||||
cfg_file_name = kwargs.pop('cfg_file_name',
|
||||
'exp06_text_depths_vs_style.yaml')
|
||||
_cfg = Config(
|
||||
load=True,
|
||||
cfg_dict=None,
|
||||
cfg_level=None,
|
||||
model_dir=model_dir,
|
||||
cfg_file_name=cfg_file_name)
|
||||
cfg.update(_cfg.cfg_dict)
|
||||
|
||||
# rank-wise params
|
||||
l1 = len(cfg.frame_lens)
|
||||
l2 = len(cfg.feature_framerates)
|
||||
@@ -472,7 +481,8 @@ class VideoComposer(TorchModel):
|
||||
step=0,
|
||||
caps=caps,
|
||||
palette=palette,
|
||||
cfg=self.cfg)
|
||||
cfg=self.cfg,
|
||||
duration=self.duration)
|
||||
|
||||
return {
|
||||
'video': video_output.type(torch.float32).cpu(),
|
||||
|
||||
@@ -14,7 +14,7 @@ class VideoDeinterlaceTest(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.task = Tasks.text_to_video_synthesis
|
||||
self.model_id = 'buptwq/videocomposer'
|
||||
self.model_revision = 'v1.0.1'
|
||||
self.model_revision = 'v1.0.4'
|
||||
self.dataset_id = 'buptwq/videocomposer-depths-style'
|
||||
self.text = 'A glittering and translucent fish swimming in a \
|
||||
small glass bowl with multicolored piece of stone, like a glass fish'
|
||||
|
||||
Reference in New Issue
Block a user