From 6924c1583aac01ce6404cd57ba2132a2e5c10ef9 Mon Sep 17 00:00:00 2001 From: "ryan.yy" Date: Wed, 1 Mar 2023 12:05:57 +0800 Subject: [PATCH] =?UTF-8?q?nerf=E9=87=8D=E5=BB=BA=E5=8A=A0=E9=80=9F?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=20=E5=8A=A0=E5=85=A5trainer=E8=AE=AD?= =?UTF-8?q?=E7=BB=83=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11794296 --- modelscope/metainfo.py | 1 + .../cv/nerf_recon_acc/nerf_preprocess.py | 8 +- .../cv/nerf_recon_acc/nerf_recon_acc.py | 186 ++----- modelscope/outputs/outputs.py | 2 +- .../pipelines/cv/nerf_recon_acc_pipeline.py | 55 +- modelscope/trainers/cv/__init__.py | 4 +- .../trainers/cv/nerf_recon_acc_trainer.py | 484 ++++++++++++++++++ tests/pipelines/test_nerf_recon_acc.py | 36 +- tests/trainers/test_nerf_recon_acc_trainer.py | 34 ++ 9 files changed, 620 insertions(+), 190 deletions(-) create mode 100644 modelscope/trainers/cv/nerf_recon_acc_trainer.py create mode 100644 tests/trainers/test_nerf_recon_acc_trainer.py diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index f5c7f23c..52721f72 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -799,6 +799,7 @@ class CVTrainers(object): image_classification_team = 'image-classification-team' image_classification = 'image-classification' image_fewshot_detection = 'image-fewshot-detection' + nerf_recon_acc = 'nerf-recon-acc' class NLPTrainers(object): diff --git a/modelscope/models/cv/nerf_recon_acc/nerf_preprocess.py b/modelscope/models/cv/nerf_recon_acc/nerf_preprocess.py index 1fd90d49..8be3f42b 100644 --- a/modelscope/models/cv/nerf_recon_acc/nerf_preprocess.py +++ b/modelscope/models/cv/nerf_recon_acc/nerf_preprocess.py @@ -50,9 +50,6 @@ class NeRFReconPreprocessor(Preprocessor): raise Exception('data type {} is not support currently'.format( self.data_type)) - if 'data_dir' not in data.keys(): - raise Exception('Do not specify the data dir') - data_dir = data['data_dir'] os.makedirs(data_dir, exist_ok=True) if self.data_type == 'blender': @@ -61,10 +58,9 @@ class NeRFReconPreprocessor(Preprocessor): raise Exception('Blender dataset is not found') if self.data_type == 'colmap': - if 'video_input_path' not in data.keys(): - raise Exception('Do not specify the video path') video_path = data['video_input_path'] - self.split_frames(video_path, data_dir, self.frame_count) + if video_path != '': + self.split_frames(video_path, data_dir, self.frame_count) self.gen_poses(data_dir, self.match_type, self.use_distortion) files_needed = [ '{}.bin'.format(f) for f in ['cameras', 'images', 'points3D'] diff --git a/modelscope/models/cv/nerf_recon_acc/nerf_recon_acc.py b/modelscope/models/cv/nerf_recon_acc/nerf_recon_acc.py index d16aa4fe..2d2fbd58 100644 --- a/modelscope/models/cv/nerf_recon_acc/nerf_recon_acc.py +++ b/modelscope/models/cv/nerf_recon_acc/nerf_recon_acc.py @@ -1,16 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import glob import os -import random -import re import time -from collections import OrderedDict -from typing import Any, Dict import cv2 import numpy as np import torch -import torch.nn.functional as F import tqdm from modelscope.metainfo import Models @@ -20,7 +15,6 @@ from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger from .dataloader.nerf_dataset import BlenderDataset, ColmapDataset from .network.nerf import NeRFModel -from .network.segmenter import ObjectSegmenter from .network.utils import PSNR logger = get_logger() @@ -32,47 +26,38 @@ __all__ = ['NeRFReconAcc'] Tasks.nerf_recon_acc, module_name=Models.nerf_recon_acc) class NeRFReconAcc(TorchModel): - def __init__(self, model_dir, data_type, use_mask, network_cfg, **kwargs): + def __init__(self, model_dir, network_cfg, **kwargs): """initialize the acceleration version of nerf reconstruction model for object. + NeRFReconAcc accelerate single object reconstruction time from ~10hours to ~10min. Args: model_dir (str): the model path. - data_type (str): default is 'colmap' + data_type (str): only support 'blender' or 'colmap' use_mask (bool): whether use mask of objects, default True - max_step (int): max train steps, default 30000 - train_num_rays (int): init number of rays in training, default 256 num_samples_per_ray (int): sampling numbers for each ray, default 1024 - max_train_num_rays (int): max number of rays in training, default 8192 test_ray_chunk (int): chunk size for rendering, default 1024 - dynamic_ray_sampling (bool): whether use dynamic ray sampling when training, default True max_size (int): max size of (width, height) when training, default 800 n_test_traj_steps (int): number of testing images, default 120 log_every_n_steps (int): print log info every n steps, default 1000 save_mesh (bool): whether to save the reconstructed mesh of object, default False - save_ckpt (bool): whether to save the checkpoints in data_dir, default False network_cfg (dict): args of network config """ super().__init__(model_dir, **kwargs) if not torch.cuda.is_available(): raise Exception('GPU is required') - - self.data_type = data_type - self.use_mask = use_mask - self.max_step = kwargs['max_step'] - self.train_num_rays = kwargs['train_num_rays'] + logger.info('model params:{}'.format(kwargs)) + self.data_type = kwargs['data_type'] + self.use_mask = kwargs['use_mask'] self.num_samples_per_ray = kwargs['num_samples_per_ray'] - self.train_num_samples = self.train_num_rays * self.num_samples_per_ray - self.max_train_num_rays = kwargs['max_train_num_rays'] - self.dynamic_ray_sampling = kwargs['dynamic_ray_sampling'] - - self.log_every_n_steps = kwargs['log_every_n_steps'] + self.test_ray_chunk = kwargs['test_ray_chunk'] self.save_mesh = kwargs['save_mesh'] - self.save_ckpt = kwargs['save_ckpt'] + self.ckpt_path = kwargs['ckpt_path'] - if self.use_mask: - segment_path = os.path.join(model_dir, 'matting.pb') - self.segmenter = ObjectSegmenter(segment_path) + if self.ckpt_path == '': + self.ckpt_path = os.path.join(model_dir, 'model.ckpt') + if not os.path.exists(self.ckpt_path): + raise Exception('ckpt path not found') if self.data_type == 'blender': self.img_wh = (800, 800) @@ -94,149 +79,44 @@ class NeRFReconAcc(TorchModel): logger.info('run nerf without mask data') logger.info(network_cfg) - self.model = NeRFModel(network_cfg, **kwargs).cuda() - self.optimizer = torch.optim.Adam( - self.model.parameters(), lr=0.01, eps=1e-15) - self.grad_scaler = torch.cuda.amp.GradScaler(2**10) + self.model = NeRFModel( + network_cfg, + num_samples_per_ray=self.num_samples_per_ray, + test_ray_chunk=self.test_ray_chunk).cuda() + + checkpoints = torch.load(self.ckpt_path) + self.model.load_state_dict(checkpoints['network_state_dict']) + self.model = self.model.cuda() + self.model.eval() - self.scheduler = torch.optim.lr_scheduler.MultiStepLR( - self.optimizer, - milestones=[ - self.max_step // 2, self.max_step * 3 // 4, - self.max_step * 9 // 10 - ], - gamma=0.33, - ) self.criterions = PSNR() - self.set_random_seed(42) - - def set_random_seed(self, seed): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - @torch.enable_grad() - def nerf_reconstruction(self, data_dir): - if os.path.exists(os.path.join(data_dir, 'preprocess')): - use_distortion = True - else: - use_distortion = False - if self.use_mask: - if use_distortion: - image_dir = os.path.join(data_dir, 'preprocess/images') - save_mask_dir = os.path.join(data_dir, 'preprocess/masks') - else: - image_dir = os.path.join(data_dir, 'images') - save_mask_dir = os.path.join(data_dir, 'masks') - os.makedirs(save_mask_dir, exist_ok=True) - img_list = glob.glob('{}/*.*g'.format(image_dir)) + glob.glob( - '{}/*.*G'.format(image_dir)) - for img_path in img_list: - img = cv2.imread(img_path) - mask = self.segmenter.run_mask(img) - outpath = os.path.join(save_mask_dir, - os.path.basename(img_path)) - cv2.imwrite(outpath, mask) - logger.info('segment images done!') + def nerf_reconstruction(self, data_dir, render_dir): if self.data_type == 'blender': - self.train_dataset = BlenderDataset( - root_fp=data_dir, - split='train', - img_wh=self.img_wh, - num_rays=self.train_num_rays, - color_bkgd_aug=self.background, - ) - self.test_dataset = BlenderDataset( root_fp=data_dir, split='test', img_wh=self.img_wh, - num_rays=self.train_num_rays, ) elif self.data_type == 'colmap': - self.train_dataset = ColmapDataset( - root_fp=data_dir, - split='train', - img_wh=self.img_wh, - max_size=self.max_size, - num_rays=self.train_num_rays, - color_bkgd_aug=self.background, - ) - self.test_dataset = ColmapDataset( root_fp=data_dir, split='test', img_wh=self.img_wh, max_size=self.max_size, - num_rays=self.train_num_rays, n_test_traj_steps=self.n_test_traj_steps, ) - step = 0 - tic = time.time() - while step < self.max_step: - for i in range(len(self.train_dataset)): - self.model.train() - data = self.train_dataset[i] - self.model.update_step(step) - rays = data['rays'].cuda() - pixels = data['pixels'].cuda() + tic_start = time.time() + os.makedirs(render_dir, exist_ok=True) + logger.info('save render path: {}.'.format(render_dir)) + self.render_video(render_dir) + tic_end = time.time() + duration = tic_end - tic_start + logger.info('reconstruction done, cost time: {}'.format(duration)) - out = self.model(rays) - - if out['num_samples'] == 0: - continue - - loss = 0. - - if self.dynamic_ray_sampling: - temp = self.train_num_samples / sum(out['num_samples']) - train_num_rays = int(self.train_num_rays * temp) - self.train_num_rays = min( - int(self.train_num_rays * 0.9 + train_num_rays * 0.1), - self.max_train_num_rays) - - self.train_dataset.update_num_rays(self.train_num_rays) - loss_rgb = F.smooth_l1_loss(out['comp_rgb'][out['rays_valid']], - pixels[out['rays_valid']]) - loss += loss_rgb - psnr = self.criterions(out['comp_rgb'], pixels) - self.optimizer.zero_grad() - self.grad_scaler.scale(loss).backward() - self.optimizer.step() - self.scheduler.step() - - if step % self.log_every_n_steps == 0: - - elapsed_time = time.time() - tic - logger.info( - f'elapsed_time={elapsed_time:.2f}s | step={step} | ' - f'loss={loss:.4f} | ' - f'train/num_rays={self.train_num_rays:d} |' - f'PSNR={psnr:.4f} ') - - step += 1 - - save_video_path = os.path.join(data_dir, 'render.mp4') - self.render_video(data_dir, save_video_path) - if self.save_ckpt: - save_ckpt_dir = os.path.join(data_dir, 'ckpt') - os.makedirs(save_ckpt_dir, exist_ok=True) - torch.save( - { - 'global_step': self.max_step, - 'network_state_dict': self.model.state_dict(), - 'optimizer_state_dict': self.optimizer.state_dict(), - }, os.path.join(save_ckpt_dir, '{}.ckpt'.format(step))) - logger.info('save checkpoints done') - - logger.info('reconstruction finish') - return save_video_path - - def render_video(self, data_dir, save_video_path): - self.model.eval() + def render_video(self, render_dir): with torch.no_grad(): psnr = 0 for i in tqdm.tqdm(range(len(self.test_dataset))): @@ -251,19 +131,21 @@ class NeRFReconAcc(TorchModel): W, H = image_wh img = out['comp_rgb'].view(H, W, 3) - save_img_dir = os.path.join(data_dir, 'render') + save_img_dir = os.path.join(render_dir, 'render') os.makedirs(save_img_dir, exist_ok=True) save_img_path = os.path.join(save_img_dir, f'{i:d}.png') self.save_image(save_img_path, img) + save_video_path = os.path.join(render_dir, 'render.mp4') self.save_video(save_video_path, save_img_dir) logger.info('test psnr: {}'.format(psnr / len(self.test_dataset))) logger.info('save render video done.') if self.save_mesh: mesh = self.model.isosurface() - save_mesh_path = os.path.join(data_dir, 'out.obj') + save_mesh_path = os.path.join(render_dir, 'render.obj') self.save_obj(save_mesh_path, mesh['v_pos'], mesh['t_pos_idx']) + logger.info('save render mesh done.') def save_image(self, filename, img): img = img.clip(0, 1).cpu().numpy() diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index 78fe0911..bc3c64d7 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -376,7 +376,7 @@ TASK_OUTPUTS = { Tasks.video_frame_interpolation: [OutputKeys.OUTPUT_VIDEO], Tasks.video_super_resolution: [OutputKeys.OUTPUT_VIDEO], Tasks.video_deinterlace: [OutputKeys.OUTPUT_VIDEO], - Tasks.nerf_recon_acc: [OutputKeys.OUTPUT_VIDEO], + Tasks.nerf_recon_acc: [OutputKeys.OUTPUT], Tasks.video_colorization: [OutputKeys.OUTPUT_VIDEO], # image quality assessment degradation result for single image diff --git a/modelscope/pipelines/cv/nerf_recon_acc_pipeline.py b/modelscope/pipelines/cv/nerf_recon_acc_pipeline.py index 27bbb2e3..a940ecfe 100644 --- a/modelscope/pipelines/cv/nerf_recon_acc_pipeline.py +++ b/modelscope/pipelines/cv/nerf_recon_acc_pipeline.py @@ -2,11 +2,11 @@ from typing import Any, Dict from modelscope.metainfo import Pipelines -from modelscope.models.cv.nerf_recon_acc import NeRFReconPreprocessor from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Model, Pipeline from modelscope.pipelines.builder import PIPELINES -from modelscope.utils.constant import Tasks +from modelscope.pipelines.util import is_model, is_official_hub_path +from modelscope.utils.constant import Invoke, Tasks from modelscope.utils.logger import get_logger logger = get_logger() @@ -23,23 +23,47 @@ class NeRFReconAccPipeline(Pipeline): >>> nerf_recon_acc = pipeline(Tasks.nerf_recon_acc, 'damo/cv_nerf-3d-reconstruction-accelerate_damo') >>> nerf_recon_acc({ - 'video_input_path': 'input.mp4', # input video path (str) 'data_dir': '/data/lego', # data dir path (str) + 'render_dir': 'save_dir', # save dir path (str) }) - { - "output": 'render.mp4' # saved path of render video (str) - } >>> # ``` """ - def __init__(self, model, data_type='colmap', use_mask=True, **kwargs): + def __init__(self, + model, + data_type='blender', + use_mask=True, + ckpt_path='', + save_mesh=False, + n_test_traj_steps=120, + test_ray_chunk=1024, + device='gpu', + **kwargs): """ - use `model` to create a image sky change pipeline for image editing + use model to create a image sky change pipeline for image editing Args: - model (`str` or `Model`): model_id on modelscope hub - preprocessor(`Preprocessor`, *optional*, defaults to None): `NeRFReconPreprocessor`. + model (str or Model): model_id on modelscope hub + data_type (str): currently only support 'blender' and 'colmap' + use_mask (bool): segment the object or not + ckpt_path (str): the checkpoint ckpt_path + save_mesh (bool): render mesh or not + n_test_traj_steps (int): number of random sampled images for test view, only for colmap data. + test_ray_chunk (int): ray chunk size for test, avoid GPU OOM + device (str): only support gpu """ + model = Model.from_pretrained( + model, + device=device, + model_prefetched=True, + invoked_by=Invoke.PIPELINE, + data_type=data_type, + use_mask=use_mask, + ckpt_path=ckpt_path, + save_mesh=save_mesh, + n_test_traj_steps=n_test_traj_steps, + test_ray_chunk=test_ray_chunk) if is_model(model) else model + super().__init__(model=model, **kwargs) if not isinstance(self.model, Model): logger.error('model object is not initialized.') @@ -49,15 +73,16 @@ class NeRFReconAccPipeline(Pipeline): raise Exception('data type {} is not support currently'.format( self.data_type)) self.use_mask = use_mask - - self.preprocessor = NeRFReconPreprocessor( - data_type=self.data_type, use_mask=self.use_mask) logger.info('load model done') + def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: data_dir = input['data_dir'] - result = self.model.nerf_reconstruction(data_dir) - return {OutputKeys.OUTPUT_VIDEO: result} + render_dir = input['render_dir'] + self.model.nerf_reconstruction(data_dir, render_dir) + return {OutputKeys.OUTPUT: 'Done'} def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: return inputs diff --git a/modelscope/trainers/cv/__init__.py b/modelscope/trainers/cv/__init__.py index 78023c2c..c31342ae 100644 --- a/modelscope/trainers/cv/__init__.py +++ b/modelscope/trainers/cv/__init__.py @@ -12,6 +12,7 @@ if TYPE_CHECKING: from .referring_video_object_segmentation_trainer import ReferringVideoObjectSegmentationTrainer from .image_defrcn_fewshot_detection_trainer import ImageDefrcnFewshotTrainer from .cartoon_translation_trainer import CartoonTranslationTrainer + from .nerf_recon_acc_trainer import NeRFReconAccTrainer else: _import_structure = { @@ -25,7 +26,8 @@ else: ['ReferringVideoObjectSegmentationTrainer'], 'image_defrcn_fewshot_detection_trainer': ['ImageDefrcnFewshotTrainer'], - 'cartoon_translation_trainer': ['CartoonTranslationTrainer'] + 'cartoon_translation_trainer': ['CartoonTranslationTrainer'], + 'nerf_recon_acc_trainer': ['NeRFReconAccTrainer'], } import sys diff --git a/modelscope/trainers/cv/nerf_recon_acc_trainer.py b/modelscope/trainers/cv/nerf_recon_acc_trainer.py new file mode 100644 index 00000000..51d70080 --- /dev/null +++ b/modelscope/trainers/cv/nerf_recon_acc_trainer.py @@ -0,0 +1,484 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import glob +import os +import os.path as osp +import random +import time +from datetime import datetime +from typing import Dict, Optional + +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +import tqdm + +from modelscope.metainfo import Trainers +from modelscope.models.cv.nerf_recon_acc import NeRFReconPreprocessor +from modelscope.models.cv.nerf_recon_acc.dataloader.nerf_dataset import ( + BlenderDataset, ColmapDataset) +from modelscope.models.cv.nerf_recon_acc.network.nerf import NeRFModel +from modelscope.models.cv.nerf_recon_acc.network.segmenter import \ + ObjectSegmenter +from modelscope.models.cv.nerf_recon_acc.network.utils import PSNR +from modelscope.trainers.base import BaseTrainer +from modelscope.trainers.builder import TRAINERS +from modelscope.utils.constant import ModelFile +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@TRAINERS.register_module(module_name=Trainers.nerf_recon_acc) +class NeRFReconAccTrainer(BaseTrainer): + """initialize the acceleration version of nerf reconstruction model for object. + + Args: + model (str): the model path. + cfg_file (str): cfg json file + data_type (str): only support 'blender' or 'colmap' + use_mask (bool): whether use mask of objects, default True + max_step (int): max train steps, default 30000 + train_num_rays (int): init number of rays in training, default 256 + num_samples_per_ray (int): sampling numbers for each ray, default 1024 + max_train_num_rays (int): max number of rays in training, default 8192 + test_ray_chunk (int): chunk size for rendering, default 1024 + dynamic_ray_sampling (bool): whether use dynamic ray sampling when training, default True + max_size (int): max size of (width, height) when training, default 800 + n_test_traj_steps (int): number of testing images, default 120 + log_every_n_steps (int): print log info every n steps, default 1000 + work_dir (str): dir to save ckpt and other results + render_images (bool): whether to render test image after training + save_mesh (bool): whether to save the reconstructed mesh of object, default False + save_ckpt (bool): whether to save the checkpoints in data_dir, default False + network_cfg (dict): args of network config + match_type (str): colmap feature matching type, only for colmap data + frame_count (str): extract number of frames, only for video input + use_distortion (bool): whether run colmap undistortion + """ + + def __init__(self, + model: str, + cfg_file: str = None, + data_type=None, + use_mask=None, + max_step=None, + train_num_rays=None, + max_train_num_rays=None, + log_every_n_steps=None, + work_dir=None, + render_images=None, + save_ckpt=None, + frame_count=None, + use_distortion=None, + *args, + **kwargs): + + model = self.get_or_download_model_dir(model) + self.model_dir = model + if cfg_file is None: + cfg_file = osp.join(model, ModelFile.CONFIGURATION) + super().__init__(cfg_file) + + if not torch.cuda.is_available(): + raise Exception('GPU is required') + + self.params = {} + self._override_params_from_file() + for key, value in kwargs.items(): + self.params[key] = value + if data_type is not None: + self.params['data_type'] = data_type + if use_mask is not None: + self.params['use_mask'] = use_mask + if max_step is not None: + self.params['max_step'] = max_step + if train_num_rays is not None: + self.params['train_num_rays'] = train_num_rays + if max_train_num_rays is not None: + self.params['max_train_num_rays'] = max_train_num_rays + if log_every_n_steps is not None: + self.params['log_every_n_steps'] = log_every_n_steps + if work_dir is not None: + self.params['work_dir'] = work_dir + if render_images is not None: + self.params['render_images'] = render_images + if save_ckpt is not None: + self.params['save_ckpt'] = save_ckpt + if frame_count is not None: + self.params['frame_count'] = frame_count + if use_distortion is not None: + self.params['use_distortion'] = use_distortion + + self.data_type = self.params['data_type'] + if self.data_type != 'blender' and self.data_type != 'colmap': + raise Exception('data type {} is not support currently'.format( + self.data_type)) + + self.use_mask = self.params['use_mask'] + self.max_step = self.params['max_step'] + self.train_num_rays = self.params['train_num_rays'] + self.num_samples_per_ray = self.params['num_samples_per_ray'] + self.train_num_samples = self.train_num_rays * self.num_samples_per_ray + self.max_train_num_rays = self.params['max_train_num_rays'] + self.test_ray_chunk = self.params['test_ray_chunk'] + self.dynamic_ray_sampling = self.params['dynamic_ray_sampling'] + self.max_size = self.params['max_size'] + self.n_test_traj_steps = self.params['n_test_traj_steps'] + self.log_every_n_steps = self.params['log_every_n_steps'] + self.render_images = self.params['render_images'] + self.save_mesh = self.params['save_mesh'] + self.save_ckpt = self.params['save_ckpt'] + self.work_dir = self.params['work_dir'] + self.network_cfg = self.params['network_cfg'] + self.match_type = self.params['match_type'] + self.frame_count = self.params['frame_count'] + self.use_distortion = self.params['use_distortion'] + logger.info('params:{}'.format(self.params)) + + if not os.path.exists(self.work_dir): + os.makedirs(self.work_dir) + + self.preprocessor = NeRFReconPreprocessor( + data_type=self.data_type, + use_mask=self.use_mask, + match_type=self.match_type, + frame_count=self.frame_count, + use_distortion=self.use_distortion) + + if self.use_mask and self.data_type == 'colmap': + segment_path = os.path.join(self.model_dir, 'matting.pb') + self.segmenter = ObjectSegmenter(segment_path) + + if self.data_type == 'blender': + self.img_wh = (800, 800) + self.network_cfg['radius'] = 1.5 + self.background = 'white' + self.network_cfg['background'] = 'white' + elif self.data_type == 'colmap': + self.img_wh = None + self.max_size = self.max_size + self.n_test_traj_steps = self.n_test_traj_steps + self.network_cfg['radius'] = 0.5 + if self.use_mask: + self.background = 'white' + self.network_cfg['background'] = 'white' + logger.info('run nerf with mask data') + else: + self.background = 'random' + self.network_cfg['background'] = 'random' + logger.info('run nerf without mask data') + + logger.info(self.network_cfg) + + self.model = NeRFModel( + self.network_cfg, + num_samples_per_ray=self.num_samples_per_ray, + test_ray_chunk=self.test_ray_chunk).cuda() + self.optimizer = torch.optim.Adam( + self.model.parameters(), lr=0.01, eps=1e-15) + self.grad_scaler = torch.cuda.amp.GradScaler(2**10) + + self.scheduler = torch.optim.lr_scheduler.MultiStepLR( + self.optimizer, + milestones=[ + self.max_step // 2, self.max_step * 3 // 4, + self.max_step * 9 // 10 + ], + gamma=0.33, + ) + self.criterions = PSNR() + self.set_random_seed(42) + + def set_random_seed(self, seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + def _override_params_from_file(self): + + self.params['data_type'] = self.cfg['train']['data_type'] + self.params['use_mask'] = self.cfg['train']['use_mask'] + self.params['max_step'] = self.cfg['train']['max_step'] + self.params['train_num_rays'] = self.cfg['train']['train_num_rays'] + self.params['max_train_num_rays'] = self.cfg['train'][ + 'max_train_num_rays'] + self.params['dynamic_ray_sampling'] = self.cfg['train'][ + 'dynamic_ray_sampling'] + self.params['log_every_n_steps'] = self.cfg['train'][ + 'log_every_n_steps'] + self.params['render_images'] = self.cfg['train']['render_images'] + self.params['save_ckpt'] = self.cfg['train']['save_ckpt'] + self.params['work_dir'] = self.cfg['train']['work_dir'] + + self.params['num_samples_per_ray'] = self.cfg['model'][ + 'num_samples_per_ray'] + self.params['test_ray_chunk'] = self.cfg['model']['test_ray_chunk'] + self.params['max_size'] = self.cfg['model']['max_size'] + self.params['n_test_traj_steps'] = self.cfg['model'][ + 'n_test_traj_steps'] + self.params['save_mesh'] = self.cfg['model']['save_mesh'] + self.params['network_cfg'] = self.cfg['model']['network_cfg'] + + self.params['match_type'] = self.cfg['preprocessor']['match_type'] + self.params['frame_count'] = self.cfg['preprocessor']['frame_count'] + self.params['use_distortion'] = self.cfg['preprocessor'][ + 'use_distortion'] + + def train(self, *args, **kwargs): + logger.info('Begin nerf reconstruction training') + processor_input = {} + if self.data_type == 'blender': + if 'data_dir' not in kwargs: + raise Exception( + 'Please specify data_dir of nerf_synthetic data') + data_dir = kwargs['data_dir'] + processor_input['data_dir'] = data_dir + processor_input['video_input_path'] = '' + + if self.data_type == 'colmap': + if 'video_input_path' in kwargs: + video_input_path = kwargs['video_input_path'] + processor_input['data_dir'] = self.work_dir + processor_input['video_input_path'] = video_input_path + elif 'data_dir' in kwargs: + data_dir = kwargs['data_dir'] + images_dir = os.path.join(data_dir, 'images') + if os.path.exists(images_dir): + image_list = glob.glob('{}/*.*g'.format(images_dir)) + if len(image_list) == 0: + raise Exception('no images found in images dir') + else: + processor_input['data_dir'] = data_dir + processor_input['video_input_path'] = '' + else: + raise Exception('images dir not found in data_dir') + else: + raise Exception( + 'Please specify video_path or images path for colmap process' + ) + + processor_output = self.preprocessor(processor_input) + data_dir = processor_output['data_dir'] + logger.info( + 'nerf reconstruction preprocess done, data_dir is {}'.format( + data_dir)) + + if self.data_type == 'colmap' and self.use_mask: + if os.path.exists(os.path.join(data_dir, 'preprocess')): + image_dir = os.path.join(data_dir, 'preprocess/images') + save_mask_dir = os.path.join(data_dir, 'preprocess/masks') + else: + image_dir = os.path.join(data_dir, 'images') + save_mask_dir = os.path.join(data_dir, 'masks') + os.makedirs(save_mask_dir, exist_ok=True) + img_list = glob.glob('{}/*.*g'.format(image_dir)) + glob.glob( + '{}/*.*G'.format(image_dir)) + for img_path in img_list: + img = cv2.imread(img_path) + mask = self.segmenter.run_mask(img) + outpath = os.path.join(save_mask_dir, + os.path.basename(img_path)) + cv2.imwrite(outpath, mask) + logger.info('segment images done!') + + if self.data_type == 'blender': + self.train_dataset = BlenderDataset( + root_fp=data_dir, + split='train', + img_wh=self.img_wh, + num_rays=self.train_num_rays, + color_bkgd_aug=self.background, + ) + + self.test_dataset = BlenderDataset( + root_fp=data_dir, + split='test', + img_wh=self.img_wh, + num_rays=self.train_num_rays, + ) + + elif self.data_type == 'colmap': + self.train_dataset = ColmapDataset( + root_fp=data_dir, + split='train', + img_wh=self.img_wh, + max_size=self.max_size, + num_rays=self.train_num_rays, + color_bkgd_aug=self.background, + ) + + self.test_dataset = ColmapDataset( + root_fp=data_dir, + split='test', + img_wh=self.img_wh, + max_size=self.max_size, + num_rays=self.train_num_rays, + n_test_traj_steps=self.n_test_traj_steps, + ) + + step = 0 + tic = time.time() + while step < self.max_step: + for i in range(len(self.train_dataset)): + self.model.train() + data = self.train_dataset[i] + self.model.update_step(step) + rays = data['rays'].cuda() + pixels = data['pixels'].cuda() + + out = self.model(rays) + + if out['num_samples'] == 0: + continue + + loss = 0. + + if self.dynamic_ray_sampling: + temp = self.train_num_samples / sum(out['num_samples']) + train_num_rays = int(self.train_num_rays * temp) + self.train_num_rays = min( + int(self.train_num_rays * 0.9 + train_num_rays * 0.1), + self.max_train_num_rays) + + self.train_dataset.update_num_rays(self.train_num_rays) + loss_rgb = F.smooth_l1_loss(out['comp_rgb'][out['rays_valid']], + pixels[out['rays_valid']]) + loss += loss_rgb + psnr = self.criterions(out['comp_rgb'], pixels) + self.optimizer.zero_grad() + self.grad_scaler.scale(loss).backward() + self.optimizer.step() + self.scheduler.step() + + if step % self.log_every_n_steps == 0: + elapsed_time = time.time() - tic + logger.info( + f'elapsed_time={elapsed_time:.2f}s | step={step} | ' + f'loss={loss:.4f} | ' + f'train/num_rays={self.train_num_rays:d} |' + f'PSNR={psnr:.4f} ') + + step += 1 + + if self.save_ckpt: + save_ckpt_name = os.path.join(self.work_dir, 'model.ckpt') + torch.save( + { + 'global_step': self.max_step, + 'network_state_dict': self.model.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + }, save_ckpt_name) + logger.info( + 'save checkpoints done, saved as {}'.format(save_ckpt_name)) + if self.render_images: + save_video_path = os.path.join(self.work_dir, 'render.mp4') + self.render_video(self.work_dir, save_video_path) + + logger.info('NeRF reconstruction finish') + + def evaluate(self, + checkpoint_path: Optional[str] = None, + *args, + **kwargs) -> Dict[str, float]: + """evaluate a dataset + + evaluate a dataset via a specific model from the `checkpoint_path` path, if the `checkpoint_path` + does not exist, read from the config file. + + Args: + checkpoint_path (Optional[str], optional): the model path. Defaults to None. + + Returns: + Dict[str, float]: the results about the evaluation + Example: + {"accuracy": 0.5091743119266054, "f1": 0.673780487804878} + """ + raise NotImplementedError('evaluate is not supported currently') + + def render_video(self, save_dir, save_video_path): + self.model.eval() + with torch.no_grad(): + psnr = 0 + for i in tqdm.tqdm(range(len(self.test_dataset))): + + data = self.test_dataset[i] + rays = data['rays'].cuda() + pixels = data['pixels'].cuda() + image_wh = data['image_wh'] + out = self.model.inference(rays) + + psnr += self.criterions(out['comp_rgb'], pixels) + + W, H = image_wh + img = out['comp_rgb'].view(H, W, 3) + save_img_dir = os.path.join(save_dir, 'render') + os.makedirs(save_img_dir, exist_ok=True) + save_img_path = os.path.join(save_img_dir, f'{i:d}.png') + self.save_image(save_img_path, img) + + self.save_video(save_video_path, save_img_dir) + logger.info('test psnr: {}'.format(psnr / len(self.test_dataset))) + logger.info( + 'save render video done. saved as {}'.format(save_video_path)) + + if self.save_mesh: + mesh = self.model.isosurface() + save_mesh_path = os.path.join(save_dir, 'render.obj') + self.save_obj(save_mesh_path, mesh['v_pos'], mesh['t_pos_idx']) + logger.info('save render mesh done. saved as {}'.format( + save_mesh_path)) + + def save_image(self, filename, img): + img = img.clip(0, 1).cpu().numpy() + img = (img * 255.).astype(np.uint8) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + save_dir = os.path.dirname(filename) + os.makedirs(save_dir, exist_ok=True) + cv2.imwrite(filename, img) + + def save_video(self, filename, img_dir, fps=20): + img_paths = glob.glob('{}/*.png'.format(img_dir)) + img_paths = sorted( + img_paths, key=lambda f: int(os.path.basename(f)[:-4])) + imgs = [cv2.imread(f) for f in img_paths] + + H, W, _ = imgs[0].shape + writer = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc(*'mp4v'), + fps, (W, H), True) + for img in imgs: + writer.write(img) + writer.release() + + def write_obj(self, filename, v_pos, t_pos_idx, v_tex, t_tex_idx): + with open(filename, 'w') as f: + for v in v_pos: + f.write('v {} {} {} \n'.format(v[0], v[1], v[2])) + + if v_tex is not None: + assert (len(t_pos_idx) == len(t_tex_idx)) + for v in v_tex: + f.write('vt {} {} \n'.format(v[0], 1.0 - v[1])) + + for i in range(len(t_pos_idx)): + f.write('f ') + for j in range(3): + f.write( + ' %s/%s' % + (str(t_pos_idx[i][j] + 1), + '' if v_tex is None else str(t_tex_idx[i][j] + 1))) + f.write('\n') + + def save_obj(self, filename, v_pos, t_pos_idx, v_tex=None, t_tex_idx=None): + + v_pos = v_pos.cpu().numpy() + t_pos_idx = t_pos_idx.cpu().numpy() + save_dir = os.path.dirname(filename) + os.makedirs(save_dir, exist_ok=True) + if v_tex is not None and t_tex_idx is not None: + v_tex = v_tex.cpu().numpy() + t_tex_idx = t_tex_idx.cpu().numpy() + self.write_obj(filename, v_pos, t_pos_idx, v_tex, t_tex_idx) + else: + self.write_obj(filename, v_pos, t_pos_idx, v_tex, t_tex_idx) diff --git a/tests/pipelines/test_nerf_recon_acc.py b/tests/pipelines/test_nerf_recon_acc.py index 502d153e..bc5ad1b2 100644 --- a/tests/pipelines/test_nerf_recon_acc.py +++ b/tests/pipelines/test_nerf_recon_acc.py @@ -1,9 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import os import unittest import torch from modelscope.hub.snapshot_download import snapshot_download +from modelscope.msdatasets import MsDataset from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks @@ -14,10 +16,14 @@ from modelscope.utils.test_utils import test_level class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck): def setUp(self) -> None: - self.task = Tasks.image_face_fusion self.model_id = 'damo/cv_nerf-3d-reconstruction-accelerate_damo' - self.video_path = 'data/test/videos/video_nerf_recon_test.mp4' - self.data_dir = 'data/test/videos/nerf_dir' + data_dir = MsDataset.load( + 'nerf_recon_dataset', namespace='damo', + split='train').config_kwargs['split_config']['train'] + nerf_synthetic_dataset = os.path.join(data_dir, 'nerf_synthetic') + blender_scene = 'lego' + self.data_dir = os.path.join(nerf_synthetic_dataset, blender_scene) + self.render_dir = 'exp' @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') @@ -27,31 +33,31 @@ class NeRFReconAccTest(unittest.TestCase, DemoCompatibilityCheck): nerf_recon_acc = pipeline( Tasks.nerf_recon_acc, model=snapshot_path, + data_type='blender', ) - result = nerf_recon_acc( - dict(data_dir=self.data_dir, video_input_path=self.video_path)) - print(result[OutputKeys.OUTPUT_VIDEO]) - print('facefusion.test_run_direct_model_download done') + nerf_recon_acc( + dict(data_dir=self.data_dir, render_dir=self.render_dir)) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') def test_run_modelhub(self): - nerf_recon_acc = pipeline(Tasks.nerf_recon_acc, model=self.model_id) + nerf_recon_acc = pipeline( + Tasks.nerf_recon_acc, + model=self.model_id, + data_type='blender', + ) - result = nerf_recon_acc( - dict(data_dir=self.data_dir, video_input_path=self.video_path)) - print(result[OutputKeys.OUTPUT_VIDEO]) + nerf_recon_acc( + dict(data_dir=self.data_dir, render_dir=self.render_dir)) print('facefusion.test_run_modelhub done') @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only') def test_run_modelhub_default_model(self): nerf_recon_acc = pipeline(Tasks.nerf_recon_acc) - - result = nerf_recon_acc( - dict(data_dir=self.data_dir, video_input_path=self.video_path)) - print(result[OutputKeys.OUTPUT_VIDEO]) + nerf_recon_acc( + dict(data_dir=self.data_dir, render_dir=self.render_dir)) print('facefusion.test_run_modelhub_default_model done') @unittest.skip('demo compatibility test is only enabled on a needed-basis') diff --git a/tests/trainers/test_nerf_recon_acc_trainer.py b/tests/trainers/test_nerf_recon_acc_trainer.py new file mode 100644 index 00000000..514aa262 --- /dev/null +++ b/tests/trainers/test_nerf_recon_acc_trainer.py @@ -0,0 +1,34 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import unittest + +from modelscope.msdatasets import MsDataset +from modelscope.trainers.cv import NeRFReconAccTrainer +from modelscope.utils.test_utils import test_level + + +class TestNeRFReconAccTrainer(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + model_id = 'damo/cv_nerf-3d-reconstruction-accelerate_damo' + + data_dir = MsDataset.load( + 'nerf_recon_dataset', namespace='damo', + split='train').config_kwargs['split_config']['train'] + + trainer = NeRFReconAccTrainer( + model=model_id, + data_type='blender', + work_dir='exp_nerf', + render_images=False) + + nerf_synthetic_dataset = os.path.join(data_dir, 'nerf_synthetic') + blender_scene = 'lego' + nerf_synthetic_dataset = os.path.join(nerf_synthetic_dataset, + blender_scene) + trainer.train(data_dir=nerf_synthetic_dataset) + + +if __name__ == '__main__': + unittest.main()