mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-16 08:17:45 +01:00
Merge branch 'master-github' into master-merge-github-230728
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py \
|
||||
--model 'AI-ModelScope/stable-diffusion-v1-5' \
|
||||
--model 'AI-ModelScope/stable-diffusion-v2-1' \
|
||||
--model_revision 'v1.0.8' \
|
||||
--work_dir './tmp/dreambooth_diffusion' \
|
||||
--train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py \
|
||||
--model 'AI-ModelScope/stable-diffusion-v1-5' \
|
||||
--model 'AI-ModelScope/stable-diffusion-v2-1' \
|
||||
--model_revision 'v1.0.9' \
|
||||
--prompt "a dog" \
|
||||
--work_dir './tmp/lora_diffusion' \
|
||||
|
||||
29598
modelscope.models.nlp.llama.backbone
Normal file
29598
modelscope.models.nlp.llama.backbone
Normal file
File diff suppressed because it is too large
Load Diff
@@ -585,8 +585,6 @@ class HubApi:
|
||||
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={revision}'
|
||||
cookies = ModelScopeConfig.get_cookies()
|
||||
r = self.session.get(datahub_url, cookies=cookies, headers=self.headers)
|
||||
r = self.session.get(
|
||||
datahub_url, cookies=cookies, headers=self.headers)
|
||||
resp = r.json()
|
||||
datahub_raise_on_error(datahub_url, resp)
|
||||
file_list = resp['Data']
|
||||
|
||||
@@ -21,20 +21,20 @@ def check_local_model_is_latest(
|
||||
"""Check local model repo is latest.
|
||||
Check local model repo is same as hub latest version.
|
||||
"""
|
||||
model_cache = None
|
||||
# download with git
|
||||
if os.path.exists(os.path.join(model_root_path, '.git')):
|
||||
git_cmd_wrapper = GitCommandWrapper()
|
||||
git_url = git_cmd_wrapper.get_repo_remote_url(model_root_path)
|
||||
if git_url.endswith('.git'):
|
||||
git_url = git_url[:-4]
|
||||
u_parse = urlparse(git_url)
|
||||
model_id = u_parse.path[1:]
|
||||
else: # snapshot_download
|
||||
model_cache = ModelFileSystemCache(model_root_path)
|
||||
model_id = model_cache.get_model_id()
|
||||
|
||||
try:
|
||||
model_cache = None
|
||||
# download with git
|
||||
if os.path.exists(os.path.join(model_root_path, '.git')):
|
||||
git_cmd_wrapper = GitCommandWrapper()
|
||||
git_url = git_cmd_wrapper.get_repo_remote_url(model_root_path)
|
||||
if git_url.endswith('.git'):
|
||||
git_url = git_url[:-4]
|
||||
u_parse = urlparse(git_url)
|
||||
model_id = u_parse.path[1:]
|
||||
else: # snapshot_download
|
||||
model_cache = ModelFileSystemCache(model_root_path)
|
||||
model_id = model_cache.get_model_id()
|
||||
|
||||
# make headers
|
||||
headers = {
|
||||
'user-agent':
|
||||
@@ -75,7 +75,8 @@ def check_local_model_is_latest(
|
||||
continue
|
||||
else:
|
||||
logger.info(
|
||||
'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.'
|
||||
f'Model file {model_file["Name"]} is different from the latest version `{latest_revision}`,'
|
||||
f'This is because you are using an older version or the file is updated manually.'
|
||||
)
|
||||
break
|
||||
else:
|
||||
@@ -86,7 +87,8 @@ def check_local_model_is_latest(
|
||||
continue
|
||||
else:
|
||||
logger.info(
|
||||
'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.'
|
||||
f'Model file {model_file["Name"]} is different from the latest version `{latest_revision}`,'
|
||||
f'This is because you are using an older version or the file is updated manually.'
|
||||
)
|
||||
break
|
||||
except: # noqa: E722
|
||||
|
||||
@@ -112,6 +112,7 @@ class Models(object):
|
||||
image_quality_assessment_degradation = 'image-quality-assessment-degradation'
|
||||
m2fp = 'm2fp'
|
||||
nerf_recon_acc = 'nerf-recon-acc'
|
||||
nerf_recon_4k = 'nerf-recon-4k'
|
||||
nerf_recon_vq_compression = 'nerf-recon-vq-compression'
|
||||
bts_depth_estimation = 'bts-depth-estimation'
|
||||
vision_efficient_tuning = 'vision-efficient-tuning'
|
||||
@@ -411,6 +412,7 @@ class Pipelines(object):
|
||||
image_human_parsing = 'm2fp-image-human-parsing'
|
||||
object_detection_3d_depe = 'object-detection-3d-depe'
|
||||
nerf_recon_acc = 'nerf-recon-acc'
|
||||
nerf_recon_4k = 'nerf-recon-4k'
|
||||
nerf_recon_vq_compression = 'nerf-recon-vq-compression'
|
||||
bad_image_detecting = 'bad-image-detecting'
|
||||
controllable_image_generation = 'controllable-image-generation'
|
||||
@@ -858,6 +860,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
|
||||
'damo/cv_mobilenet-v2_bad-image-detecting'),
|
||||
Tasks.nerf_recon_acc: (Pipelines.nerf_recon_acc,
|
||||
'damo/cv_nerf-3d-reconstruction-accelerate_damo'),
|
||||
Tasks.nerf_recon_4k: (Pipelines.nerf_recon_4k,
|
||||
'damo/cv_nerf-3d-reconstruction-4k-nerf_damo'),
|
||||
Tasks.nerf_recon_vq_compression: (
|
||||
Pipelines.nerf_recon_vq_compression,
|
||||
'damo/cv_nerf-3d-reconstruction-vq-compression_damo'),
|
||||
@@ -890,6 +894,7 @@ class CVTrainers(object):
|
||||
ocr_recognition = 'ocr-recognition'
|
||||
ocr_detection_db = 'ocr-detection-db'
|
||||
nerf_recon_acc = 'nerf-recon-acc'
|
||||
nerf_recon_4k = 'nerf-recon-4k'
|
||||
action_detection = 'action-detection'
|
||||
vision_efficient_tuning = 'vision-efficient-tuning'
|
||||
|
||||
@@ -1006,6 +1011,7 @@ class Preprocessors(object):
|
||||
ocr_detection = 'ocr-detection'
|
||||
bad_image_detecting_preprocessor = 'bad-image-detecting-preprocessor'
|
||||
nerf_recon_acc_preprocessor = 'nerf-recon-acc-preprocessor'
|
||||
nerf_recon_4k_preprocessor = 'nerf-recon-4k-preprocessor'
|
||||
nerf_recon_vq_compression_preprocessor = 'nerf-recon-vq-compression-preprocessor'
|
||||
controllable_image_generation_preprocessor = 'controllable-image-generation-preprocessor'
|
||||
image_classification_preprocessor = 'image-classification-preprocessor'
|
||||
|
||||
22
modelscope/models/cv/nerf_recon_4k/__init__.py
Normal file
22
modelscope/models/cv/nerf_recon_4k/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .nerf_recon_4k import NeRFRecon4K
|
||||
from .nerf_preprocess import NeRFReconPreprocessor
|
||||
|
||||
else:
|
||||
_import_structure = {'nerf_recon_4k': ['NeRFRecon4K']}
|
||||
_import_structure = {'nerf_preprocess': ['NeRFReconPreprocessor']}
|
||||
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__,
|
||||
extra_objects={},
|
||||
)
|
||||
97
modelscope/models/cv/nerf_recon_4k/dataloader/load_blender.py
Executable file
97
modelscope/models/cv/nerf_recon_4k/dataloader/load_blender.py
Executable file
@@ -0,0 +1,97 @@
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import imageio
|
||||
import json
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def trans_t(t):
|
||||
return torch.Tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, t],
|
||||
[0, 0, 0, 1]]).float()
|
||||
|
||||
|
||||
def rot_phi(phi):
|
||||
return torch.Tensor([[1, 0, 0, 0], [0, np.cos(phi), -np.sin(phi), 0],
|
||||
[0, np.sin(phi), np.cos(phi), 0], [0, 0, 0,
|
||||
1]]).float()
|
||||
|
||||
|
||||
def rot_theta(th):
|
||||
return torch.Tensor([[np.cos(th), 0, -np.sin(th), 0], [0, 1, 0, 0],
|
||||
[np.sin(th), 0, np.cos(th), 0], [0, 0, 0,
|
||||
1]]).float()
|
||||
|
||||
|
||||
def pose_spherical(theta, phi, radius):
|
||||
c2w = trans_t(radius)
|
||||
c2w = rot_phi(phi / 180. * np.pi) @ c2w
|
||||
c2w = rot_theta(theta / 180. * np.pi) @ c2w
|
||||
c2w = torch.Tensor(
|
||||
np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]
|
||||
])) @ c2w
|
||||
return c2w
|
||||
|
||||
|
||||
def load_blender_data(basedir, half_res=False, testskip=1):
|
||||
splits = ['train', 'val', 'test']
|
||||
metas = {}
|
||||
for s in splits:
|
||||
with open(os.path.join(basedir, 'transforms_{}.json'.format(s)),
|
||||
'r') as fp:
|
||||
metas[s] = json.load(fp)
|
||||
|
||||
all_imgs = []
|
||||
all_poses = []
|
||||
counts = [0]
|
||||
for s in splits:
|
||||
meta = metas[s]
|
||||
imgs = []
|
||||
poses = []
|
||||
if s == 'train' or testskip == 0:
|
||||
skip = 1
|
||||
elif s == 'val':
|
||||
skip = 50
|
||||
else:
|
||||
skip = testskip
|
||||
|
||||
for frame in meta['frames'][::skip]:
|
||||
fname = os.path.join(basedir, frame['file_path'] + '.png')
|
||||
imgs.append(imageio.imread(fname))
|
||||
poses.append(np.array(frame['transform_matrix']))
|
||||
imgs = (np.array(imgs) / 255.).astype(
|
||||
np.float32) # keep all 4 channels (RGBA)
|
||||
poses = np.array(poses).astype(np.float32)
|
||||
counts.append(counts[-1] + imgs.shape[0])
|
||||
all_imgs.append(imgs)
|
||||
all_poses.append(poses)
|
||||
|
||||
i_split = [np.arange(counts[i], counts[i + 1]) for i in range(3)]
|
||||
|
||||
imgs = np.concatenate(all_imgs, 0)
|
||||
poses = np.concatenate(all_poses, 0)
|
||||
|
||||
H, W = imgs[0].shape[:2]
|
||||
camera_angle_x = float(meta['camera_angle_x'])
|
||||
focal = .5 * W / np.tan(.5 * camera_angle_x)
|
||||
|
||||
render_poses = torch.stack([
|
||||
pose_spherical(angle, -30.0, 4.0)
|
||||
for angle in np.linspace(-180, 180, 160 + 1)[:-1]
|
||||
], 0)
|
||||
|
||||
if half_res:
|
||||
H = H // 2
|
||||
W = W // 2
|
||||
focal = focal / 2.
|
||||
|
||||
imgs_half_res = np.zeros((imgs.shape[0], H, W, 4))
|
||||
for i, img in enumerate(imgs):
|
||||
imgs_half_res[i] = cv2.resize(
|
||||
img, (W, H), interpolation=cv2.INTER_AREA)
|
||||
imgs = imgs_half_res
|
||||
# imgs = tf.image.resize_area(imgs, [400, 400]).numpy()
|
||||
|
||||
return imgs, poses, render_poses, [H, W, focal], i_split
|
||||
143
modelscope/models/cv/nerf_recon_4k/dataloader/load_data.py
Executable file
143
modelscope/models/cv/nerf_recon_4k/dataloader/load_data.py
Executable file
@@ -0,0 +1,143 @@
|
||||
import numpy as np
|
||||
|
||||
from .load_blender import load_blender_data
|
||||
from .load_llff import load_llff_data
|
||||
from .load_tankstemple import load_tankstemple_data
|
||||
|
||||
|
||||
def load_data(args):
|
||||
|
||||
K, depths = None, None
|
||||
near_clip = None
|
||||
|
||||
if args.dataset_type == 'llff':
|
||||
images, depths, poses, bds, render_poses, i_test, *srgt = load_llff_data(
|
||||
args.datadir,
|
||||
args.factor,
|
||||
None,
|
||||
None,
|
||||
recenter=True,
|
||||
bd_factor=0.75,
|
||||
spherify=False,
|
||||
load_depths=False,
|
||||
load_SR=args.load_sr,
|
||||
movie_render_kwargs=dict())
|
||||
hwf = poses[0, :3, -1]
|
||||
poses = poses[:, :3, :4]
|
||||
print('Loaded llff', images.shape, render_poses.shape, hwf,
|
||||
args.datadir)
|
||||
if not isinstance(i_test, list):
|
||||
i_test = [i_test]
|
||||
|
||||
llffhold = 8
|
||||
if llffhold > 0:
|
||||
print('Auto LLFF holdout,', llffhold)
|
||||
i_test = np.arange(images.shape[0])[::llffhold]
|
||||
|
||||
i_val = [i_test[0]]
|
||||
i_train = np.array([
|
||||
i for i in np.arange(int(images.shape[0]))
|
||||
if (i not in i_test and i not in i_val)
|
||||
])
|
||||
|
||||
print('DEFINING BOUNDS')
|
||||
if args.ndc:
|
||||
near = 0.
|
||||
far = 1.
|
||||
else:
|
||||
near_clip = max(np.ndarray.min(bds) * .9, 0)
|
||||
_far = max(np.ndarray.max(bds) * 1., 0)
|
||||
near = 0
|
||||
far = inward_nearfar_heuristic(poses[i_train, :3, 3])[1]
|
||||
print('near_clip', near_clip)
|
||||
print('original far', _far)
|
||||
print('NEAR FAR', near, far)
|
||||
|
||||
elif args.dataset_type == 'blender':
|
||||
images, poses, render_poses, hwf, i_split = load_blender_data(
|
||||
args.datadir, args.half_res, args.testskip)
|
||||
print('Loaded blender', images.shape, render_poses.shape, hwf,
|
||||
args.datadir)
|
||||
i_train, i_val, i_test = i_split
|
||||
|
||||
near, far = 2., 6.
|
||||
|
||||
if images.shape[-1] == 4:
|
||||
if args.white_bkgd:
|
||||
images = images[..., :3] * images[..., -1:] + (
|
||||
1. - images[..., -1:])
|
||||
else:
|
||||
images = images[..., :3] * images[..., -1:]
|
||||
|
||||
srgt = [images, 0]
|
||||
|
||||
elif args.dataset_type == 'tankstemple':
|
||||
images, poses, render_poses, hwf, K, i_split = load_tankstemple_data(
|
||||
args.datadir, movie_render_kwargs=args.movie_render_kwargs)
|
||||
print('Loaded tankstemple', images.shape, render_poses.shape, hwf,
|
||||
args.datadir)
|
||||
i_train, i_val, i_test = i_split
|
||||
|
||||
near, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0)
|
||||
|
||||
if images.shape[-1] == 4:
|
||||
if args.white_bkgd:
|
||||
images = images[..., :3] * images[..., -1:] + (
|
||||
1. - images[..., -1:])
|
||||
else:
|
||||
images = images[..., :3] * images[..., -1:]
|
||||
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f'Unknown dataset type {args.dataset_type} exiting')
|
||||
|
||||
# Cast intrinsics to right types
|
||||
H, W, focal = hwf
|
||||
H, W = int(H), int(W)
|
||||
hwf = [H, W, focal]
|
||||
HW = np.array([im.shape[:2] for im in images])
|
||||
irregular_shape = (images.dtype is np.dtype('object'))
|
||||
|
||||
if K is None:
|
||||
K = np.array([[focal, 0, 0.5 * W], [0, focal, 0.5 * H], [0, 0, 1]])
|
||||
|
||||
if len(K.shape) == 2:
|
||||
Ks = K[None].repeat(len(poses), axis=0)
|
||||
else:
|
||||
Ks = K
|
||||
|
||||
render_poses = render_poses[..., :4]
|
||||
|
||||
if args.load_sr:
|
||||
srgt, w2c = srgt[0], srgt[1]
|
||||
else:
|
||||
srgt, w2c = 0, 0
|
||||
|
||||
data_dict = dict(
|
||||
hwf=hwf,
|
||||
HW=HW,
|
||||
Ks=Ks,
|
||||
near=near,
|
||||
far=far,
|
||||
near_clip=near_clip,
|
||||
i_train=i_train,
|
||||
i_val=i_val,
|
||||
i_test=i_test,
|
||||
poses=poses,
|
||||
render_poses=render_poses,
|
||||
images=images,
|
||||
depths=depths,
|
||||
white_bkgd=args.white_bkgd,
|
||||
irregular_shape=irregular_shape,
|
||||
srgt=srgt,
|
||||
w2c=w2c)
|
||||
return data_dict
|
||||
|
||||
|
||||
def inward_nearfar_heuristic(cam_o, ratio=0.05):
|
||||
dist = np.linalg.norm(cam_o[:, None] - cam_o, axis=-1)
|
||||
far = dist.max() # could be too small to exist the scene bbox
|
||||
# it is only used to determined scene bbox
|
||||
# lib/dvgo use 1e9 as far
|
||||
near = far * ratio
|
||||
return near, far
|
||||
548
modelscope/models/cv/nerf_recon_4k/dataloader/load_llff.py
Executable file
548
modelscope/models/cv/nerf_recon_4k/dataloader/load_llff.py
Executable file
@@ -0,0 +1,548 @@
|
||||
import os
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
import scipy
|
||||
import torch
|
||||
|
||||
|
||||
# Slightly modified version of LLFF data loading code
|
||||
# see https://github.com/Fyusion/LLFF for original
|
||||
def imread(f):
|
||||
if f.endswith('png'):
|
||||
return imageio.imread(f, format='PNG-PIL', ignoregamma=True)
|
||||
else:
|
||||
return imageio.imread(f)
|
||||
|
||||
|
||||
def depthread(path):
|
||||
with open(path, 'rb') as fid:
|
||||
width, height, channels = np.genfromtxt(
|
||||
fid, delimiter='&', max_rows=1, usecols=(0, 1, 2), dtype=int)
|
||||
fid.seek(0)
|
||||
num_delimiter = 0
|
||||
byte = fid.read(1)
|
||||
while True:
|
||||
if byte == b'&':
|
||||
num_delimiter += 1
|
||||
if num_delimiter >= 3:
|
||||
break
|
||||
byte = fid.read(1)
|
||||
array = np.fromfile(fid, np.float32)
|
||||
array = array.reshape((width, height, channels), order='F')
|
||||
return np.transpose(array, (1, 0, 2)).squeeze()
|
||||
|
||||
|
||||
def _minify(basedir, factors=[], resolutions=[]):
|
||||
needtoload = False
|
||||
for r in factors:
|
||||
imgdir = os.path.join(basedir, 'images_{}'.format(r))
|
||||
if not os.path.exists(imgdir):
|
||||
needtoload = True
|
||||
for r in resolutions:
|
||||
imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0]))
|
||||
if not os.path.exists(imgdir):
|
||||
needtoload = True
|
||||
if not needtoload:
|
||||
return
|
||||
|
||||
from shutil import copy
|
||||
from subprocess import check_output
|
||||
|
||||
imgdir = os.path.join(basedir, 'images')
|
||||
imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))]
|
||||
imgs = [
|
||||
f for f in imgs
|
||||
if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])
|
||||
]
|
||||
imgdir_orig = imgdir
|
||||
|
||||
wd = os.getcwd()
|
||||
|
||||
for r in factors + resolutions:
|
||||
if isinstance(r, int):
|
||||
name = 'images_{}'.format(r)
|
||||
resizearg = '{}%'.format(100. / r)
|
||||
else:
|
||||
name = 'images_{}x{}'.format(r[1], r[0])
|
||||
resizearg = '{}x{}'.format(r[1], r[0])
|
||||
imgdir = os.path.join(basedir, name)
|
||||
if os.path.exists(imgdir):
|
||||
continue
|
||||
|
||||
print('Minifying', r, basedir)
|
||||
|
||||
os.makedirs(imgdir)
|
||||
check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True)
|
||||
|
||||
ext = imgs[0].split('.')[-1]
|
||||
args = ' '.join([
|
||||
'mogrify', '-resize', resizearg, '-format', 'png',
|
||||
'*.{}'.format(ext)
|
||||
])
|
||||
print(args)
|
||||
os.chdir(imgdir)
|
||||
check_output(args, shell=True)
|
||||
os.chdir(wd)
|
||||
|
||||
if ext != 'png':
|
||||
check_output('rm {}/*.{}'.format(imgdir, ext), shell=True)
|
||||
print('Removed duplicates')
|
||||
print('Done')
|
||||
|
||||
|
||||
def _load_data(basedir,
|
||||
factor=None,
|
||||
width=None,
|
||||
height=None,
|
||||
load_imgs=True,
|
||||
load_depths=False,
|
||||
load_SR=False):
|
||||
|
||||
poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
|
||||
if poses_arr.shape[1] == 17:
|
||||
poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1, 2, 0])
|
||||
elif poses_arr.shape[1] == 14:
|
||||
poses = poses_arr[:, :-2].reshape([-1, 3, 4]).transpose([1, 2, 0])
|
||||
else:
|
||||
raise NotImplementedError
|
||||
bds = poses_arr[:, -2:].transpose([1, 0])
|
||||
|
||||
img0 = [
|
||||
os.path.join(basedir, 'images', f)
|
||||
for f in sorted(os.listdir(os.path.join(basedir, 'images')))
|
||||
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')
|
||||
][0]
|
||||
sh = imageio.imread(img0).shape
|
||||
|
||||
sfx = ''
|
||||
|
||||
if height is not None and width is not None:
|
||||
_minify(basedir, resolutions=[[height, width]])
|
||||
sfx = '_{}x{}'.format(width, height)
|
||||
elif factor is not None and factor != 1:
|
||||
sfx = '_{}'.format(factor)
|
||||
_minify(basedir, factors=[factor])
|
||||
factor = factor
|
||||
elif height is not None:
|
||||
factor = sh[0] / float(height)
|
||||
width = int(sh[1] / factor)
|
||||
_minify(basedir, resolutions=[[height, width]])
|
||||
sfx = '_{}x{}'.format(width, height)
|
||||
elif width is not None:
|
||||
factor = sh[1] / float(width)
|
||||
height = int(sh[0] / factor)
|
||||
_minify(basedir, resolutions=[[height, width]])
|
||||
sfx = '_{}x{}'.format(width, height)
|
||||
else:
|
||||
factor = 1
|
||||
|
||||
imgdir = os.path.join(basedir, 'images' + sfx)
|
||||
print(f'Loading images from {imgdir}')
|
||||
if not os.path.exists(imgdir):
|
||||
print(imgdir, 'does not exist, returning')
|
||||
return
|
||||
|
||||
imgfiles = [
|
||||
os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))
|
||||
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')
|
||||
]
|
||||
if poses.shape[-1] != len(imgfiles):
|
||||
print()
|
||||
print('Mismatch between imgs {} and poses {} !!!!'.format(
|
||||
len(imgfiles), poses.shape[-1]))
|
||||
names = set(
|
||||
name[:-4]
|
||||
for name in np.load(os.path.join(basedir, 'poses_names.npy')))
|
||||
assert len(names) == poses.shape[-1]
|
||||
print('Below failed files are skip due to SfM failure:')
|
||||
new_imgfiles = []
|
||||
for i in imgfiles:
|
||||
fname = os.path.split(i)[1][:-4]
|
||||
if fname in names:
|
||||
new_imgfiles.append(i)
|
||||
else:
|
||||
print('==>', i)
|
||||
imgfiles = new_imgfiles
|
||||
|
||||
if len(imgfiles) < 3:
|
||||
print('Too few images...')
|
||||
import sys
|
||||
sys.exit()
|
||||
|
||||
sh = imageio.imread(imgfiles[0]).shape
|
||||
if poses.shape[1] == 4:
|
||||
poses = np.concatenate([poses, np.zeros_like(poses[:, [0]])], 1)
|
||||
poses[2, 4, :] = np.load(os.path.join(basedir, 'hwf_cxcy.npy'))[2]
|
||||
poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
|
||||
poses[2, 4, :] = poses[2, 4, :] * 1. / factor
|
||||
|
||||
if not load_imgs:
|
||||
return poses, bds
|
||||
|
||||
imgs = [imread(f)[..., :3] / 255. for f in imgfiles]
|
||||
imgs = np.stack(imgs, -1)
|
||||
|
||||
if load_SR:
|
||||
if load_SR == 16:
|
||||
imgdir_sr = os.path.join(basedir, 'images_16')
|
||||
elif load_SR == 8:
|
||||
imgdir_sr = os.path.join(basedir, 'images_8')
|
||||
elif load_SR == 4:
|
||||
imgdir_sr = os.path.join(basedir, 'images_4')
|
||||
elif load_SR == 2:
|
||||
imgdir_sr = os.path.join(basedir, 'images_2')
|
||||
elif load_SR == 1:
|
||||
imgdir_sr = os.path.join(basedir, 'images')
|
||||
imgfiles_sr = [
|
||||
os.path.join(imgdir_sr, f) for f in sorted(os.listdir(imgdir_sr))
|
||||
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')
|
||||
]
|
||||
imgs_sr = [imread(f)[..., :3] / 255. for f in imgfiles_sr]
|
||||
imgs_sr = np.stack(imgs_sr, -1)
|
||||
|
||||
print('Loaded image data', imgs.shape, poses[:, -1, 0])
|
||||
|
||||
if not load_depths and load_SR:
|
||||
return poses, bds, imgs, imgs_sr
|
||||
|
||||
if not load_depths:
|
||||
return poses, bds, imgs
|
||||
|
||||
depthdir = os.path.join(basedir, 'stereo', 'depth_maps')
|
||||
assert os.path.exists(depthdir), f'Dir not found: {depthdir}'
|
||||
|
||||
depthfiles = [
|
||||
os.path.join(depthdir, f) for f in sorted(os.listdir(depthdir))
|
||||
if f.endswith('.geometric.bin')
|
||||
]
|
||||
assert poses.shape[-1] == len(
|
||||
depthfiles), 'Mismatch between imgs {} and poses {} !!!!'.format(
|
||||
len(depthfiles), poses.shape[-1])
|
||||
|
||||
depths = [depthread(f) for f in depthfiles]
|
||||
depths = np.stack(depths, -1)
|
||||
print('Loaded depth data', depths.shape)
|
||||
return poses, bds, imgs, depths
|
||||
|
||||
|
||||
def normalize(x):
|
||||
return x / np.linalg.norm(x)
|
||||
|
||||
|
||||
def viewmatrix(z, up, pos):
|
||||
vec2 = normalize(z)
|
||||
vec1_avg = up
|
||||
vec0 = normalize(np.cross(vec1_avg, vec2))
|
||||
vec1 = normalize(np.cross(vec2, vec0))
|
||||
m = np.stack([vec0, vec1, vec2, pos], 1)
|
||||
return m
|
||||
|
||||
|
||||
def ptstocam(pts, c2w):
|
||||
tt = np.matmul(c2w[:3, :3].T, (pts - c2w[:3, 3])[..., np.newaxis])[..., 0]
|
||||
return tt
|
||||
|
||||
|
||||
def poses_avg(poses):
|
||||
|
||||
hwf = poses[0, :3, -1:]
|
||||
|
||||
center = poses[:, :3, 3].mean(0)
|
||||
vec2 = normalize(poses[:, :3, 2].sum(0))
|
||||
up = poses[:, :3, 1].sum(0)
|
||||
c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
|
||||
|
||||
return c2w
|
||||
|
||||
|
||||
def w2c_gen(poses):
|
||||
final_pose = []
|
||||
for idx in range(len(poses)):
|
||||
pose = poses[idx, ...]
|
||||
z = normalize(pose[:3, 2])
|
||||
up = pose[:3, 1]
|
||||
vec2 = normalize(z)
|
||||
vec0 = normalize(np.cross(up, vec2))
|
||||
vec1 = normalize(np.cross(vec2, vec0))
|
||||
m = np.stack([vec0, vec1, vec2], 1)
|
||||
mt = np.linalg.inv(m)
|
||||
final_pose.append(mt)
|
||||
final_pose = np.stack(final_pose, 0)
|
||||
return final_pose
|
||||
|
||||
|
||||
def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
|
||||
render_poses = []
|
||||
rads = np.array(list(rads) + [1.])
|
||||
hwf = c2w[:, 4:5]
|
||||
|
||||
# -np.sin(theta), -np.sin(theta*zrate)*zdelta
|
||||
# 0, 0
|
||||
for theta in np.linspace(0., 2 * np.pi * rots, N + 1)[:-1]:
|
||||
c = np.dot(
|
||||
c2w[:3, :4],
|
||||
np.array([
|
||||
np.cos(theta), -np.sin(theta), -np.sin(theta * zrate) * zdelta,
|
||||
1.
|
||||
]) * rads)
|
||||
z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.])))
|
||||
render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
|
||||
return render_poses
|
||||
|
||||
|
||||
def recenter_poses(poses):
|
||||
|
||||
poses_ = poses + 0
|
||||
bottom = np.reshape([0, 0, 0, 1.], [1, 4])
|
||||
c2w = poses_avg(poses)
|
||||
c2w = np.concatenate([c2w[:3, :4], bottom], -2)
|
||||
bottom = np.tile(np.reshape(bottom, [1, 1, 4]), [poses.shape[0], 1, 1])
|
||||
poses = np.concatenate([poses[:, :3, :4], bottom], -2)
|
||||
|
||||
poses = np.linalg.inv(c2w) @ poses
|
||||
poses_[:, :3, :4] = poses[:, :3, :4]
|
||||
poses = poses_
|
||||
return poses
|
||||
|
||||
|
||||
def rerotate_poses(poses):
|
||||
poses = np.copy(poses)
|
||||
centroid = poses[:, :3, 3].mean(0)
|
||||
|
||||
poses[:, :3, 3] = poses[:, :3, 3] - centroid
|
||||
|
||||
# Find the minimum pca vector with minimum eigen value
|
||||
x = poses[:, :, 3]
|
||||
mu = x.mean(0)
|
||||
cov = np.cov((x - mu).T)
|
||||
ev, eig = np.linalg.eig(cov)
|
||||
cams_up = eig[:, np.argmin(ev)]
|
||||
if cams_up[1] < 0:
|
||||
cams_up = -cams_up
|
||||
|
||||
# Find rotation matrix that align cams_up with [0,1,0]
|
||||
R = scipy.spatial.transform.Rotation.align_vectors(
|
||||
[[0, 1, 0]], cams_up[None])[0].as_matrix()
|
||||
|
||||
# Apply rotation and add back the centroid position
|
||||
poses[:, :3, :3] = R @ poses[:, :3, :3]
|
||||
poses[:, :3, [3]] = R @ poses[:, :3, [3]]
|
||||
poses[:, :3, 3] = poses[:, :3, 3] + centroid
|
||||
return poses
|
||||
|
||||
|
||||
#####################
|
||||
|
||||
|
||||
def spherify_poses(poses, bds, depths):
|
||||
|
||||
def p34_to_44(p):
|
||||
return np.concatenate([
|
||||
p,
|
||||
np.tile(
|
||||
np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])
|
||||
], 1)
|
||||
|
||||
rays_d = poses[:, :3, 2:3]
|
||||
rays_o = poses[:, :3, 3:4]
|
||||
|
||||
def min_line_dist(rays_o, rays_d):
|
||||
A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0, 2, 1])
|
||||
b_i = -A_i @ rays_o
|
||||
pt_mindist = np.squeeze(-np.linalg.inv(
|
||||
(np.transpose(A_i, [0, 2, 1]) @ A_i).mean(0)) @ (b_i).mean(0))
|
||||
return pt_mindist
|
||||
|
||||
pt_mindist = min_line_dist(rays_o, rays_d)
|
||||
|
||||
center = pt_mindist
|
||||
up = (poses[:, :3, 3] - center).mean(0)
|
||||
|
||||
vec0 = normalize(up)
|
||||
vec1 = normalize(np.cross([.1, .2, .3], vec0))
|
||||
vec2 = normalize(np.cross(vec0, vec1))
|
||||
pos = center
|
||||
c2w = np.stack([vec1, vec2, vec0, pos], 1)
|
||||
|
||||
poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(
|
||||
poses[:, :3, :4])
|
||||
|
||||
radius = np.sqrt(np.mean(np.sum(np.square(poses_reset[:, :3, 3]), -1)))
|
||||
|
||||
sc = 1. / radius
|
||||
poses_reset[:, :3, 3] *= sc
|
||||
bds *= sc
|
||||
radius *= sc
|
||||
depths *= sc
|
||||
|
||||
poses_reset = np.concatenate([
|
||||
poses_reset[:, :3, :4],
|
||||
np.broadcast_to(poses[0, :3, -1:], poses_reset[:, :3, -1:].shape)
|
||||
], -1)
|
||||
|
||||
return poses_reset, radius, bds, depths
|
||||
|
||||
|
||||
def load_llff_data(basedir,
|
||||
factor=8,
|
||||
width=None,
|
||||
height=None,
|
||||
recenter=True,
|
||||
rerotate=True,
|
||||
bd_factor=.75,
|
||||
spherify=False,
|
||||
path_zflat=False,
|
||||
load_depths=False,
|
||||
load_SR=False,
|
||||
movie_render_kwargs={}):
|
||||
|
||||
poses, bds, imgs, *depths = _load_data(
|
||||
basedir,
|
||||
factor=factor,
|
||||
width=width,
|
||||
height=height,
|
||||
load_depths=load_depths,
|
||||
load_SR=load_SR) # factor=8 downsamples original imgs by 8x
|
||||
print('Loaded', basedir, bds.min(), bds.max())
|
||||
if load_depths:
|
||||
depths = depths[0]
|
||||
elif load_SR and not load_depths:
|
||||
imgs_SRGT = depths[0]
|
||||
depths = 0
|
||||
else:
|
||||
depths = 0
|
||||
|
||||
# Correct rotation matrix ordering and move variable dim to axis 0
|
||||
poses = np.concatenate(
|
||||
[poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
|
||||
poses = np.moveaxis(poses, -1, 0).astype(np.float32)
|
||||
imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
|
||||
images = imgs
|
||||
bds = np.moveaxis(bds, -1, 0).astype(np.float32)
|
||||
|
||||
# Rescale if bd_factor is provided
|
||||
if bds.min() < 0 and bd_factor is not None:
|
||||
print('Found negative z values from SfM sparse points!?')
|
||||
print('Please try bd_factor=None')
|
||||
import sys
|
||||
sys.exit()
|
||||
sc = 1. if bd_factor is None else 1. / (bds.min() * bd_factor)
|
||||
poses[:, :3, 3] *= sc
|
||||
bds *= sc
|
||||
depths *= sc
|
||||
|
||||
if recenter:
|
||||
poses = recenter_poses(poses)
|
||||
|
||||
if spherify:
|
||||
poses, radius, bds, depths = spherify_poses(poses, bds, depths)
|
||||
if rerotate:
|
||||
poses = rerotate_poses(poses)
|
||||
|
||||
# generate spiral poses for rendering fly-through movie
|
||||
centroid = poses[:, :3, 3].mean(0)
|
||||
radcircle = movie_render_kwargs.get('scale_r', 1) * np.linalg.norm(
|
||||
poses[:, :3, 3] - centroid, axis=-1).mean()
|
||||
centroid[0] += movie_render_kwargs.get('shift_x', 0)
|
||||
centroid[1] += movie_render_kwargs.get('shift_y', 0)
|
||||
centroid[2] += movie_render_kwargs.get('shift_z', 0)
|
||||
new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180
|
||||
target_y = radcircle * np.tan(new_up_rad)
|
||||
|
||||
render_poses = []
|
||||
|
||||
for th in np.linspace(0., 2. * np.pi, 200):
|
||||
camorigin = np.array(
|
||||
[radcircle * np.cos(th), 0, radcircle * np.sin(th)])
|
||||
if movie_render_kwargs.get('flip_up', False):
|
||||
up = np.array([0, 1., 0])
|
||||
else:
|
||||
up = np.array([0, -1., 0])
|
||||
vec2 = normalize(camorigin)
|
||||
vec0 = normalize(np.cross(vec2, up))
|
||||
vec1 = normalize(np.cross(vec2, vec0))
|
||||
pos = camorigin + centroid
|
||||
# rotate to align with new pitch rotation
|
||||
lookat = -vec2
|
||||
lookat[1] = target_y
|
||||
lookat = normalize(lookat)
|
||||
vec2 = -lookat
|
||||
vec1 = normalize(np.cross(vec2, vec0))
|
||||
|
||||
p = np.stack([vec0, vec1, vec2, pos], 1)
|
||||
|
||||
render_poses.append(p)
|
||||
|
||||
render_poses = np.stack(render_poses, 0)
|
||||
render_poses = np.concatenate([
|
||||
render_poses,
|
||||
np.broadcast_to(poses[0, :3, -1:], render_poses[:, :3, -1:].shape)
|
||||
], -1)
|
||||
|
||||
else:
|
||||
|
||||
c2w = poses_avg(poses)
|
||||
print('recentered', c2w.shape)
|
||||
print(c2w[:3, :4])
|
||||
|
||||
# Get spiral
|
||||
# Get average pose
|
||||
up = normalize(poses[:, :3, 1].sum(0))
|
||||
|
||||
# Find a reasonable "focus depth" for this dataset
|
||||
close_depth, inf_depth = bds.min() * .9, bds.max() * 5.
|
||||
dt = .75
|
||||
mean_dz = 1. / (((1. - dt) / close_depth + dt / inf_depth))
|
||||
focal = mean_dz * movie_render_kwargs.get('scale_f', 1)
|
||||
|
||||
# Get radii for spiral path
|
||||
zdelta = movie_render_kwargs.get('zdelta', 0.5)
|
||||
zrate = movie_render_kwargs.get('zrate', 1.0)
|
||||
tt = poses[:, :3, 3] # ptstocam(poses[:3,3,:].T, c2w).T
|
||||
rads = np.percentile(np.abs(tt), 90, 0) * movie_render_kwargs.get(
|
||||
'scale_r', 1)
|
||||
c2w_path = c2w
|
||||
N_views = 120
|
||||
N_rots = movie_render_kwargs.get('N_rots', 1)
|
||||
if path_zflat:
|
||||
# zloc = np.percentile(tt, 10, 0)[2]
|
||||
zloc = -close_depth * .1
|
||||
c2w_path[:3, 3] = c2w_path[:3, 3] + zloc * c2w_path[:3, 2]
|
||||
rads[2] = 0.
|
||||
N_rots = 1
|
||||
N_views /= 2
|
||||
|
||||
# Generate poses for spiral path
|
||||
render_poses = render_path_spiral(
|
||||
c2w_path,
|
||||
up,
|
||||
rads,
|
||||
focal,
|
||||
zdelta,
|
||||
zrate=zrate,
|
||||
rots=N_rots,
|
||||
N=N_views)
|
||||
|
||||
render_poses = torch.Tensor(render_poses)
|
||||
|
||||
# Because both world croodnate system and camera croodnate system are 3-d system, they can be transfer by a:
|
||||
# 3x3 rotate matrix and 3x1 moving matrix
|
||||
c2w = poses_avg(poses)
|
||||
w2c = w2c_gen(poses)
|
||||
print('Data:')
|
||||
print(poses.shape, images.shape, bds.shape)
|
||||
|
||||
dists = np.sum(np.square(c2w[:3, 3] - poses[:, :3, 3]), -1)
|
||||
i_test = np.argmin(dists)
|
||||
print('HOLDOUT view is', i_test)
|
||||
|
||||
images = images.astype(np.float32)
|
||||
poses = poses.astype(np.float32)
|
||||
|
||||
if load_SR:
|
||||
imgs_SRGT = np.moveaxis(imgs_SRGT, [-1, -2], [0, 1]).astype(np.float32)
|
||||
else:
|
||||
imgs_SRGT = None
|
||||
|
||||
return images, depths, poses, bds, render_poses, i_test, imgs_SRGT, w2c
|
||||
75
modelscope/models/cv/nerf_recon_4k/dataloader/load_tankstemple.py
Executable file
75
modelscope/models/cv/nerf_recon_4k/dataloader/load_tankstemple.py
Executable file
@@ -0,0 +1,75 @@
|
||||
import glob
|
||||
import os
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
|
||||
|
||||
def normalize(x):
|
||||
return x / np.linalg.norm(x)
|
||||
|
||||
|
||||
def load_tankstemple_data(basedir, movie_render_kwargs={}):
|
||||
pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt')))
|
||||
rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png')))
|
||||
|
||||
all_poses = []
|
||||
all_imgs = []
|
||||
i_split = [[], []]
|
||||
for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)):
|
||||
i_set = int(os.path.split(rgb_path)[-1][0])
|
||||
all_poses.append(np.loadtxt(pose_path).astype(np.float32))
|
||||
all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32))
|
||||
i_split[i_set].append(i)
|
||||
|
||||
imgs = np.stack(all_imgs, 0)
|
||||
poses = np.stack(all_poses, 0)
|
||||
i_split.append(i_split[-1])
|
||||
|
||||
path_intrinsics = os.path.join(basedir, 'intrinsics.txt')
|
||||
H, W = imgs[0].shape[:2]
|
||||
K = np.loadtxt(path_intrinsics)
|
||||
focal = float(K[0, 0])
|
||||
|
||||
# generate spiral poses for rendering fly-through movie
|
||||
centroid = poses[:, :3, 3].mean(0)
|
||||
radcircle = movie_render_kwargs.get('scale_r', 1.0) * np.linalg.norm(
|
||||
poses[:, :3, 3] - centroid, axis=-1).mean()
|
||||
centroid[0] += movie_render_kwargs.get('shift_x', 0)
|
||||
centroid[1] += movie_render_kwargs.get('shift_y', 0)
|
||||
centroid[2] += movie_render_kwargs.get('shift_z', 0)
|
||||
new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180
|
||||
target_y = radcircle * np.tan(new_up_rad)
|
||||
|
||||
render_poses = []
|
||||
|
||||
for th in np.linspace(0., 2. * np.pi, 200):
|
||||
camorigin = np.array(
|
||||
[radcircle * np.cos(th), 0, radcircle * np.sin(th)])
|
||||
if movie_render_kwargs.get('flip_up_vec', False):
|
||||
up = np.array([0, -1., 0])
|
||||
else:
|
||||
up = np.array([0, 1., 0])
|
||||
vec2 = normalize(camorigin)
|
||||
vec0 = normalize(np.cross(vec2, up))
|
||||
vec1 = normalize(np.cross(vec2, vec0))
|
||||
pos = camorigin + centroid
|
||||
# rotate to align with new pitch rotation
|
||||
lookat = -vec2
|
||||
lookat[1] = target_y
|
||||
lookat = normalize(lookat)
|
||||
lookat *= -1
|
||||
vec2 = -lookat
|
||||
vec1 = normalize(np.cross(vec2, vec0))
|
||||
|
||||
p = np.stack([vec0, vec1, vec2, pos], 1)
|
||||
|
||||
render_poses.append(p)
|
||||
|
||||
render_poses = np.stack(render_poses, 0)
|
||||
render_poses = np.concatenate([
|
||||
render_poses,
|
||||
np.broadcast_to(poses[0, :3, -1:], render_poses[:, :3, -1:].shape)
|
||||
], -1)
|
||||
|
||||
return imgs, poses, render_poses, [H, W, focal], K, i_split
|
||||
@@ -0,0 +1,500 @@
|
||||
# Copyright (c) 2023, ETH Zurich and UNC Chapel Hill.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
|
||||
# its contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import os
|
||||
import struct
|
||||
|
||||
import numpy as np
|
||||
|
||||
CameraModel = collections.namedtuple('CameraModel',
|
||||
['model_id', 'model_name', 'num_params'])
|
||||
Camera = collections.namedtuple('Camera',
|
||||
['id', 'model', 'width', 'height', 'params'])
|
||||
BaseImage = collections.namedtuple(
|
||||
'Image', ['id', 'qvec', 'tvec', 'camera_id', 'name', 'xys', 'point3D_ids'])
|
||||
Point3D = collections.namedtuple(
|
||||
'Point3D', ['id', 'xyz', 'rgb', 'error', 'image_ids', 'point2D_idxs'])
|
||||
|
||||
|
||||
class Image(BaseImage):
|
||||
|
||||
def qvec2rotmat(self):
|
||||
return qvec2rotmat(self.qvec)
|
||||
|
||||
|
||||
CAMERA_MODELS = {
|
||||
CameraModel(model_id=0, model_name='SIMPLE_PINHOLE', num_params=3),
|
||||
CameraModel(model_id=1, model_name='PINHOLE', num_params=4),
|
||||
CameraModel(model_id=2, model_name='SIMPLE_RADIAL', num_params=4),
|
||||
CameraModel(model_id=3, model_name='RADIAL', num_params=5),
|
||||
CameraModel(model_id=4, model_name='OPENCV', num_params=8),
|
||||
CameraModel(model_id=5, model_name='OPENCV_FISHEYE', num_params=8),
|
||||
CameraModel(model_id=6, model_name='FULL_OPENCV', num_params=12),
|
||||
CameraModel(model_id=7, model_name='FOV', num_params=5),
|
||||
CameraModel(model_id=8, model_name='SIMPLE_RADIAL_FISHEYE', num_params=4),
|
||||
CameraModel(model_id=9, model_name='RADIAL_FISHEYE', num_params=5),
|
||||
CameraModel(model_id=10, model_name='THIN_PRISM_FISHEYE', num_params=12)
|
||||
}
|
||||
CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
|
||||
for camera_model in CAMERA_MODELS])
|
||||
CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
|
||||
for camera_model in CAMERA_MODELS])
|
||||
|
||||
|
||||
def read_next_bytes(fid,
|
||||
num_bytes,
|
||||
format_char_sequence,
|
||||
endian_character='<'):
|
||||
"""Read and unpack the next bytes from a binary file.
|
||||
:param fid:
|
||||
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
|
||||
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
|
||||
:param endian_character: Any of {@, =, <, >, !}
|
||||
:return: Tuple of read and unpacked values.
|
||||
"""
|
||||
data = fid.read(num_bytes)
|
||||
return struct.unpack(endian_character + format_char_sequence, data)
|
||||
|
||||
|
||||
def write_next_bytes(fid, data, format_char_sequence, endian_character='<'):
|
||||
"""pack and write to a binary file.
|
||||
:param fid:
|
||||
:param data: data to send, if multiple elements are sent at the same time,
|
||||
they should be encapsuled either in a list or a tuple
|
||||
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
|
||||
should be the same length as the data list or tuple
|
||||
:param endian_character: Any of {@, =, <, >, !}
|
||||
"""
|
||||
if isinstance(data, (list, tuple)):
|
||||
bytes = struct.pack(endian_character + format_char_sequence, *data)
|
||||
else:
|
||||
bytes = struct.pack(endian_character + format_char_sequence, data)
|
||||
fid.write(bytes)
|
||||
|
||||
|
||||
def read_cameras_text(path):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasText(const std::string& path)
|
||||
void Reconstruction::ReadCamerasText(const std::string& path)
|
||||
"""
|
||||
cameras = {}
|
||||
with open(path, 'r') as fid:
|
||||
while True:
|
||||
line = fid.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if len(line) > 0 and line[0] != '#':
|
||||
elems = line.split()
|
||||
camera_id = int(elems[0])
|
||||
model = elems[1]
|
||||
width = int(elems[2])
|
||||
height = int(elems[3])
|
||||
params = np.array(tuple(map(float, elems[4:])))
|
||||
cameras[camera_id] = Camera(
|
||||
id=camera_id,
|
||||
model=model,
|
||||
width=width,
|
||||
height=height,
|
||||
params=params)
|
||||
return cameras
|
||||
|
||||
|
||||
def read_cameras_binary(path_to_model_file):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasBinary(const std::string& path)
|
||||
void Reconstruction::ReadCamerasBinary(const std::string& path)
|
||||
"""
|
||||
cameras = {}
|
||||
with open(path_to_model_file, 'rb') as fid:
|
||||
num_cameras = read_next_bytes(fid, 8, 'Q')[0]
|
||||
for _ in range(num_cameras):
|
||||
camera_properties = read_next_bytes(
|
||||
fid, num_bytes=24, format_char_sequence='iiQQ')
|
||||
camera_id = camera_properties[0]
|
||||
model_id = camera_properties[1]
|
||||
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
|
||||
width = camera_properties[2]
|
||||
height = camera_properties[3]
|
||||
num_params = CAMERA_MODEL_IDS[model_id].num_params
|
||||
params = read_next_bytes(
|
||||
fid,
|
||||
num_bytes=8 * num_params,
|
||||
format_char_sequence='d' * num_params)
|
||||
cameras[camera_id] = Camera(
|
||||
id=camera_id,
|
||||
model=model_name,
|
||||
width=width,
|
||||
height=height,
|
||||
params=np.array(params))
|
||||
assert len(cameras) == num_cameras
|
||||
return cameras
|
||||
|
||||
|
||||
def write_cameras_text(cameras, path):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasText(const std::string& path)
|
||||
void Reconstruction::ReadCamerasText(const std::string& path)
|
||||
"""
|
||||
HEADER = '# Camera list with one line of data per camera:\n' + \
|
||||
'# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n' + \
|
||||
'# Number of cameras: {}\n'.format(len(cameras))
|
||||
with open(path, 'w') as fid:
|
||||
fid.write(HEADER)
|
||||
for _, cam in cameras.items():
|
||||
to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
|
||||
line = ' '.join([str(elem) for elem in to_write])
|
||||
fid.write(line + '\n')
|
||||
|
||||
|
||||
def write_cameras_binary(cameras, path_to_model_file):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasBinary(const std::string& path)
|
||||
void Reconstruction::ReadCamerasBinary(const std::string& path)
|
||||
"""
|
||||
with open(path_to_model_file, 'wb') as fid:
|
||||
write_next_bytes(fid, len(cameras), 'Q')
|
||||
for _, cam in cameras.items():
|
||||
model_id = CAMERA_MODEL_NAMES[cam.model].model_id
|
||||
camera_properties = [cam.id, model_id, cam.width, cam.height]
|
||||
write_next_bytes(fid, camera_properties, 'iiQQ')
|
||||
for p in cam.params:
|
||||
write_next_bytes(fid, float(p), 'd')
|
||||
return cameras
|
||||
|
||||
|
||||
def read_images_text(path):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadImagesText(const std::string& path)
|
||||
void Reconstruction::WriteImagesText(const std::string& path)
|
||||
"""
|
||||
images = {}
|
||||
with open(path, 'r') as fid:
|
||||
while True:
|
||||
line = fid.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if len(line) > 0 and line[0] != '#':
|
||||
elems = line.split()
|
||||
image_id = int(elems[0])
|
||||
qvec = np.array(tuple(map(float, elems[1:5])))
|
||||
tvec = np.array(tuple(map(float, elems[5:8])))
|
||||
camera_id = int(elems[8])
|
||||
image_name = elems[9]
|
||||
elems = fid.readline().split()
|
||||
xys = np.column_stack([
|
||||
tuple(map(float, elems[0::3])),
|
||||
tuple(map(float, elems[1::3]))
|
||||
])
|
||||
point3D_ids = np.array(tuple(map(int, elems[2::3])))
|
||||
images[image_id] = Image(
|
||||
id=image_id,
|
||||
qvec=qvec,
|
||||
tvec=tvec,
|
||||
camera_id=camera_id,
|
||||
name=image_name,
|
||||
xys=xys,
|
||||
point3D_ids=point3D_ids)
|
||||
return images
|
||||
|
||||
|
||||
def read_images_binary(path_to_model_file):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadImagesBinary(const std::string& path)
|
||||
void Reconstruction::WriteImagesBinary(const std::string& path)
|
||||
"""
|
||||
images = {}
|
||||
with open(path_to_model_file, 'rb') as fid:
|
||||
num_reg_images = read_next_bytes(fid, 8, 'Q')[0]
|
||||
for _ in range(num_reg_images):
|
||||
binary_image_properties = read_next_bytes(
|
||||
fid, num_bytes=64, format_char_sequence='idddddddi')
|
||||
image_id = binary_image_properties[0]
|
||||
qvec = np.array(binary_image_properties[1:5])
|
||||
tvec = np.array(binary_image_properties[5:8])
|
||||
camera_id = binary_image_properties[8]
|
||||
image_name = ''
|
||||
current_char = read_next_bytes(fid, 1, 'c')[0]
|
||||
while current_char != b'\x00': # look for the ASCII 0 entry
|
||||
image_name += current_char.decode('utf-8')
|
||||
current_char = read_next_bytes(fid, 1, 'c')[0]
|
||||
num_points2D = read_next_bytes(
|
||||
fid, num_bytes=8, format_char_sequence='Q')[0]
|
||||
x_y_id_s = read_next_bytes(
|
||||
fid,
|
||||
num_bytes=24 * num_points2D,
|
||||
format_char_sequence='ddq' * num_points2D)
|
||||
xys = np.column_stack([
|
||||
tuple(map(float, x_y_id_s[0::3])),
|
||||
tuple(map(float, x_y_id_s[1::3]))
|
||||
])
|
||||
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
|
||||
images[image_id] = Image(
|
||||
id=image_id,
|
||||
qvec=qvec,
|
||||
tvec=tvec,
|
||||
camera_id=camera_id,
|
||||
name=image_name,
|
||||
xys=xys,
|
||||
point3D_ids=point3D_ids)
|
||||
return images
|
||||
|
||||
|
||||
def write_images_text(images, path):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadImagesText(const std::string& path)
|
||||
void Reconstruction::WriteImagesText(const std::string& path)
|
||||
"""
|
||||
if len(images) == 0:
|
||||
mean_observations = 0
|
||||
else:
|
||||
mean_observations = sum(
|
||||
(len(img.point3D_ids) for _, img in images.items())) / len(images)
|
||||
HEADER = '# Image list with two lines of data per image:\n' + \
|
||||
'# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n' + \
|
||||
'# POINTS2D[] as (X, Y, POINT3D_ID)\n' + \
|
||||
'# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations)
|
||||
|
||||
with open(path, 'w') as fid:
|
||||
fid.write(HEADER)
|
||||
for _, img in images.items():
|
||||
image_header = [
|
||||
img.id, *img.qvec, *img.tvec, img.camera_id, img.name
|
||||
]
|
||||
first_line = ' '.join(map(str, image_header))
|
||||
fid.write(first_line + '\n')
|
||||
|
||||
points_strings = []
|
||||
for xy, point3D_id in zip(img.xys, img.point3D_ids):
|
||||
points_strings.append(' '.join(map(str, [*xy, point3D_id])))
|
||||
fid.write(' '.join(points_strings) + '\n')
|
||||
|
||||
|
||||
def write_images_binary(images, path_to_model_file):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadImagesBinary(const std::string& path)
|
||||
void Reconstruction::WriteImagesBinary(const std::string& path)
|
||||
"""
|
||||
with open(path_to_model_file, 'wb') as fid:
|
||||
write_next_bytes(fid, len(images), 'Q')
|
||||
for _, img in images.items():
|
||||
write_next_bytes(fid, img.id, 'i')
|
||||
write_next_bytes(fid, img.qvec.tolist(), 'dddd')
|
||||
write_next_bytes(fid, img.tvec.tolist(), 'ddd')
|
||||
write_next_bytes(fid, img.camera_id, 'i')
|
||||
for char in img.name:
|
||||
write_next_bytes(fid, char.encode('utf-8'), 'c')
|
||||
write_next_bytes(fid, b'\x00', 'c')
|
||||
write_next_bytes(fid, len(img.point3D_ids), 'Q')
|
||||
for xy, p3d_id in zip(img.xys, img.point3D_ids):
|
||||
write_next_bytes(fid, [*xy, p3d_id], 'ddq')
|
||||
|
||||
|
||||
def read_points3D_text(path):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DText(const std::string& path)
|
||||
void Reconstruction::WritePoints3DText(const std::string& path)
|
||||
"""
|
||||
points3D = {}
|
||||
with open(path, 'r') as fid:
|
||||
while True:
|
||||
line = fid.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if len(line) > 0 and line[0] != '#':
|
||||
elems = line.split()
|
||||
point3D_id = int(elems[0])
|
||||
xyz = np.array(tuple(map(float, elems[1:4])))
|
||||
rgb = np.array(tuple(map(int, elems[4:7])))
|
||||
error = float(elems[7])
|
||||
image_ids = np.array(tuple(map(int, elems[8::2])))
|
||||
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
|
||||
points3D[point3D_id] = Point3D(
|
||||
id=point3D_id,
|
||||
xyz=xyz,
|
||||
rgb=rgb,
|
||||
error=error,
|
||||
image_ids=image_ids,
|
||||
point2D_idxs=point2D_idxs)
|
||||
return points3D
|
||||
|
||||
|
||||
def read_points3D_binary(path_to_model_file):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
||||
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
||||
"""
|
||||
points3D = {}
|
||||
with open(path_to_model_file, 'rb') as fid:
|
||||
num_points = read_next_bytes(fid, 8, 'Q')[0]
|
||||
for _ in range(num_points):
|
||||
binary_point_line_properties = read_next_bytes(
|
||||
fid, num_bytes=43, format_char_sequence='QdddBBBd')
|
||||
point3D_id = binary_point_line_properties[0]
|
||||
xyz = np.array(binary_point_line_properties[1:4])
|
||||
rgb = np.array(binary_point_line_properties[4:7])
|
||||
error = np.array(binary_point_line_properties[7])
|
||||
track_length = read_next_bytes(
|
||||
fid, num_bytes=8, format_char_sequence='Q')[0]
|
||||
track_elems = read_next_bytes(
|
||||
fid,
|
||||
num_bytes=8 * track_length,
|
||||
format_char_sequence='ii' * track_length)
|
||||
image_ids = np.array(tuple(map(int, track_elems[0::2])))
|
||||
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
|
||||
points3D[point3D_id] = Point3D(
|
||||
id=point3D_id,
|
||||
xyz=xyz,
|
||||
rgb=rgb,
|
||||
error=error,
|
||||
image_ids=image_ids,
|
||||
point2D_idxs=point2D_idxs)
|
||||
return points3D
|
||||
|
||||
|
||||
def write_points3D_text(points3D, path):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DText(const std::string& path)
|
||||
void Reconstruction::WritePoints3DText(const std::string& path)
|
||||
"""
|
||||
if len(points3D) == 0:
|
||||
mean_track_length = 0
|
||||
else:
|
||||
mean_track_length = sum(
|
||||
(len(pt.image_ids) for _, pt in points3D.items())) / len(points3D)
|
||||
HEADER = '# 3D point list with one line of data per point:\n' + \
|
||||
'# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n' + \
|
||||
'# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length)
|
||||
|
||||
with open(path, 'w') as fid:
|
||||
fid.write(HEADER)
|
||||
for _, pt in points3D.items():
|
||||
point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
|
||||
fid.write(' '.join(map(str, point_header)) + ' ')
|
||||
track_strings = []
|
||||
for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
|
||||
track_strings.append(' '.join(map(str, [image_id, point2D])))
|
||||
fid.write(' '.join(track_strings) + '\n')
|
||||
|
||||
|
||||
def write_points3D_binary(points3D, path_to_model_file):
|
||||
"""
|
||||
see: src/base/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
||||
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
||||
"""
|
||||
with open(path_to_model_file, 'wb') as fid:
|
||||
write_next_bytes(fid, len(points3D), 'Q')
|
||||
for _, pt in points3D.items():
|
||||
write_next_bytes(fid, pt.id, 'Q')
|
||||
write_next_bytes(fid, pt.xyz.tolist(), 'ddd')
|
||||
write_next_bytes(fid, pt.rgb.tolist(), 'BBB')
|
||||
write_next_bytes(fid, pt.error, 'd')
|
||||
track_length = pt.image_ids.shape[0]
|
||||
write_next_bytes(fid, track_length, 'Q')
|
||||
for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
|
||||
write_next_bytes(fid, [image_id, point2D_id], 'ii')
|
||||
|
||||
|
||||
def detect_model_format(path, ext):
|
||||
if os.path.isfile(os.path.join(path, 'cameras' + ext)) and \
|
||||
os.path.isfile(os.path.join(path, 'images' + ext)) and \
|
||||
os.path.isfile(os.path.join(path, 'points3D' + ext)):
|
||||
print("Detected model format: '" + ext + "'")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def read_model(path, ext=''):
|
||||
# try to detect the extension automatically
|
||||
if ext == '':
|
||||
if detect_model_format(path, '.bin'):
|
||||
ext = '.bin'
|
||||
elif detect_model_format(path, '.txt'):
|
||||
ext = '.txt'
|
||||
else:
|
||||
print("Provide model format: '.bin' or '.txt'")
|
||||
return
|
||||
|
||||
if ext == '.txt':
|
||||
cameras = read_cameras_text(os.path.join(path, 'cameras' + ext))
|
||||
images = read_images_text(os.path.join(path, 'images' + ext))
|
||||
points3D = read_points3D_text(os.path.join(path, 'points3D') + ext)
|
||||
else:
|
||||
cameras = read_cameras_binary(os.path.join(path, 'cameras' + ext))
|
||||
images = read_images_binary(os.path.join(path, 'images' + ext))
|
||||
points3D = read_points3D_binary(os.path.join(path, 'points3D') + ext)
|
||||
return cameras, images, points3D
|
||||
|
||||
|
||||
def write_model(cameras, images, points3D, path, ext='.bin'):
|
||||
if ext == '.txt':
|
||||
write_cameras_text(cameras, os.path.join(path, 'cameras' + ext))
|
||||
write_images_text(images, os.path.join(path, 'images' + ext))
|
||||
write_points3D_text(points3D, os.path.join(path, 'points3D') + ext)
|
||||
else:
|
||||
write_cameras_binary(cameras, os.path.join(path, 'cameras' + ext))
|
||||
write_images_binary(images, os.path.join(path, 'images' + ext))
|
||||
write_points3D_binary(points3D, os.path.join(path, 'points3D') + ext)
|
||||
return cameras, images, points3D
|
||||
|
||||
|
||||
def qvec2rotmat(qvec):
|
||||
array_10 = 1 - 2 * qvec[2]**2 - 2 * qvec[3]**2
|
||||
array_11 = 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3]
|
||||
array_12 = 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]
|
||||
array_1 = [array_10, array_11, array_12]
|
||||
array_20 = 2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3]
|
||||
array_21 = 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2
|
||||
array_22 = 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]
|
||||
array_2 = [array_20, array_21, array_22]
|
||||
array_30 = 2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2]
|
||||
array_31 = 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1]
|
||||
array_32 = 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2
|
||||
array_3 = [array_30, array_31, array_32]
|
||||
|
||||
return np.array([array_1, array_2, array_3])
|
||||
200
modelscope/models/cv/nerf_recon_4k/nerf_preprocess.py
Normal file
200
modelscope/models/cv/nerf_recon_4k/nerf_preprocess.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import glob
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Any, Dict, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from modelscope.metainfo import Preprocessors
|
||||
from modelscope.preprocessors import Preprocessor
|
||||
from modelscope.preprocessors.builder import PREPROCESSORS
|
||||
from modelscope.utils.constant import Fields, ModeKeys
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
@PREPROCESSORS.register_module(
|
||||
Fields.cv, module_name=Preprocessors.nerf_recon_acc_preprocessor)
|
||||
class NeRFReconPreprocessor(Preprocessor):
|
||||
|
||||
def __init__(self,
|
||||
mode=ModeKeys.INFERENCE,
|
||||
data_type='colmap',
|
||||
use_mask=True,
|
||||
match_type='exhaustive_matcher',
|
||||
frame_count=60,
|
||||
use_distortion=False,
|
||||
*args,
|
||||
**kwargs):
|
||||
|
||||
super().__init__(mode)
|
||||
|
||||
# set preprocessor info
|
||||
self.data_type = data_type
|
||||
self.use_mask = use_mask
|
||||
|
||||
self.match_type = match_type
|
||||
if match_type != 'exhaustive_matcher' and match_type != 'sequential_matcher':
|
||||
raise Exception('matcher type {} is not valid'.format(match_type))
|
||||
self.frame_count = frame_count
|
||||
self.use_distortion = use_distortion
|
||||
|
||||
def __call__(self, data: Union[str, Dict], **kwargs) -> Dict[str, Any]:
|
||||
|
||||
if self.data_type != 'blender' and self.data_type != 'colmap':
|
||||
raise Exception('data type {} is not support currently'.format(
|
||||
self.data_type))
|
||||
|
||||
data_dir = data['data_dir']
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
if self.data_type == 'blender':
|
||||
transform_file = os.path.join(data_dir, 'transforms_train.json')
|
||||
if not os.path.exists(transform_file):
|
||||
raise Exception('Blender dataset is not found')
|
||||
|
||||
if self.data_type == 'colmap':
|
||||
video_path = data['video_input_path']
|
||||
if video_path != '':
|
||||
self.split_frames(video_path, data_dir, self.frame_count)
|
||||
self.gen_poses(data_dir, self.match_type, self.use_distortion)
|
||||
files_needed = [
|
||||
'{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']
|
||||
]
|
||||
if self.use_distortion:
|
||||
colmap_dir = os.path.join(data_dir, 'preprocess/sparse')
|
||||
files_had = os.listdir(colmap_dir)
|
||||
else:
|
||||
colmap_dir = os.path.join(data_dir, 'sparse/0')
|
||||
files_had = os.listdir(colmap_dir)
|
||||
if not all([f in files_had for f in files_needed]):
|
||||
raise Exception('colmap run failed')
|
||||
|
||||
data = {}
|
||||
data['data_dir'] = data_dir
|
||||
return data
|
||||
|
||||
def split_frames(self, video_path, basedir, frame_count=60):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
fps = round(cap.get(cv2.CAP_PROP_FPS))
|
||||
frame_total = round(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
if not os.path.exists(os.path.join(basedir, 'images')):
|
||||
logger.info('Need to run ffmpeg')
|
||||
image_dir = os.path.join(basedir, 'images')
|
||||
os.makedirs(image_dir, exist_ok=True)
|
||||
fps = int(frame_count * fps / frame_total)
|
||||
cmd = f"ffmpeg -i {video_path} -qscale:v 1 -qmin 1 -vf \"fps={fps}\" {image_dir}/%04d.png"
|
||||
os.system(cmd)
|
||||
logger.info('split frames done')
|
||||
else:
|
||||
logger.info('Don\'t need to run ffmpeg')
|
||||
|
||||
def run_colmap(self, basedir, match_type, use_distortion):
|
||||
logfile_name = os.path.join(basedir, 'colmap_output.txt')
|
||||
logfile = open(logfile_name, 'w')
|
||||
|
||||
feature_extractor_args = [
|
||||
'colmap', 'feature_extractor', '--database_path',
|
||||
os.path.join(basedir, 'database.db'), '--image_path',
|
||||
os.path.join(basedir, 'images'), '--ImageReader.single_camera', '1'
|
||||
]
|
||||
feat_output = (
|
||||
subprocess.check_output(
|
||||
feature_extractor_args, universal_newlines=True))
|
||||
logfile.write(feat_output)
|
||||
logger.info('Features extracted done')
|
||||
|
||||
exhaustive_matcher_args = [
|
||||
'colmap',
|
||||
match_type,
|
||||
'--database_path',
|
||||
os.path.join(basedir, 'database.db'),
|
||||
]
|
||||
|
||||
match_output = (
|
||||
subprocess.check_output(
|
||||
exhaustive_matcher_args, universal_newlines=True))
|
||||
logfile.write(match_output)
|
||||
logger.info('Features matched done')
|
||||
|
||||
p = os.path.join(basedir, 'sparse')
|
||||
if not os.path.exists(p):
|
||||
os.makedirs(p)
|
||||
|
||||
mapper_args = [
|
||||
'colmap',
|
||||
'mapper',
|
||||
'--database_path',
|
||||
os.path.join(basedir, 'database.db'),
|
||||
'--image_path',
|
||||
os.path.join(basedir, 'images'),
|
||||
'--output_path',
|
||||
os.path.join(
|
||||
basedir, 'sparse'
|
||||
), # --export_path changed to --output_path in colmap 3.6
|
||||
'--Mapper.num_threads',
|
||||
'16',
|
||||
'--Mapper.init_min_tri_angle',
|
||||
'4',
|
||||
'--Mapper.multiple_models',
|
||||
'0',
|
||||
'--Mapper.extract_colors',
|
||||
'0',
|
||||
]
|
||||
|
||||
map_output = (
|
||||
subprocess.check_output(mapper_args, universal_newlines=True))
|
||||
logfile.write(map_output)
|
||||
logger.info('Sparse map created done.')
|
||||
|
||||
bundle_adjuster_cmd = [
|
||||
'colmap',
|
||||
'bundle_adjuster',
|
||||
'--input_path',
|
||||
os.path.join(basedir, 'sparse/0'),
|
||||
'--output_path',
|
||||
os.path.join(basedir, 'sparse/0'),
|
||||
'--BundleAdjustment.refine_principal_point',
|
||||
'1',
|
||||
]
|
||||
map_output = (
|
||||
subprocess.check_output(
|
||||
bundle_adjuster_cmd, universal_newlines=True))
|
||||
logfile.write(map_output)
|
||||
logger.info('Refining intrinsics done.')
|
||||
|
||||
if use_distortion:
|
||||
os.makedirs(os.path.join(basedir, 'preprocess'), exist_ok=True)
|
||||
distort_cmd = [
|
||||
'colmap', 'image_undistorter', '--image_path',
|
||||
os.path.join(basedir, 'images'), '--input_path',
|
||||
os.path.join(basedir, 'sparse/0'), '--output_path',
|
||||
os.path.join(basedir, 'preprocess'), '--output_type', 'COLMAP'
|
||||
]
|
||||
map_output = (
|
||||
subprocess.check_output(distort_cmd, universal_newlines=True))
|
||||
logfile.write(map_output)
|
||||
logger.info('Image distortion done.')
|
||||
|
||||
logfile.close()
|
||||
logger.info(
|
||||
'Finished running COLMAP, see {} for logs'.format(logfile_name))
|
||||
|
||||
def gen_poses(self, basedir, match_type, use_distortion):
|
||||
files_needed = [
|
||||
'{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']
|
||||
]
|
||||
if os.path.exists(os.path.join(basedir, 'sparse/0')):
|
||||
files_had = os.listdir(os.path.join(basedir, 'sparse/0'))
|
||||
else:
|
||||
files_had = []
|
||||
if not all([f in files_had for f in files_needed]):
|
||||
logger.info('Need to run COLMAP')
|
||||
self.run_colmap(basedir, match_type, use_distortion)
|
||||
else:
|
||||
logger.info('Don\'t need to run COLMAP')
|
||||
289
modelscope/models/cv/nerf_recon_4k/nerf_recon_4k.py
Executable file
289
modelscope/models/cv/nerf_recon_4k/nerf_recon_4k.py
Executable file
@@ -0,0 +1,289 @@
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
import imageio
|
||||
import mmcv
|
||||
import numpy as np
|
||||
import torch
|
||||
from tqdm import tqdm, trange
|
||||
|
||||
from modelscope.metainfo import Models
|
||||
from modelscope.models.base import Tensor, TorchModel
|
||||
from modelscope.models.builder import MODELS
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.logger import get_logger
|
||||
from .dataloader.load_data import load_data
|
||||
from .network.dvgo import DirectMPIGO, DirectVoxGO, SFTNet, get_rays_of_a_view
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def to8b(x):
|
||||
return (255 * np.clip(x, 0, 1)).astype(np.uint8)
|
||||
|
||||
|
||||
__all__ = ['NeRFRecon4K']
|
||||
|
||||
|
||||
@MODELS.register_module(Tasks.nerf_recon_4k, module_name=Models.nerf_recon_4k)
|
||||
class NeRFRecon4K(TorchModel):
|
||||
|
||||
def __init__(self, model_dir, **kwargs):
|
||||
super().__init__(model_dir, **kwargs)
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
raise Exception('GPU is required')
|
||||
self.device = torch.device('cuda')
|
||||
logger.info('model params:{}'.format(kwargs))
|
||||
self.data_type = kwargs['data_type']
|
||||
# self.use_mask = kwargs['use_mask']
|
||||
# self.num_samples_per_ray = kwargs['num_samples_per_ray']
|
||||
self.test_ray_chunk = kwargs['test_ray_chunk']
|
||||
# self.enc_ckpt_path = kwargs['enc_ckpt_path']
|
||||
# self.dec_ckpt_path = kwargs['dec_ckpt_path']
|
||||
|
||||
self.enc_ckpt_path = os.path.join(model_dir, 'fine_100000.tar')
|
||||
if not os.path.exists(self.enc_ckpt_path):
|
||||
raise Exception('encoder ckpt path not found')
|
||||
# if self.dec_ckpt_path == '':
|
||||
self.dec_ckpt_path = os.path.join(model_dir, 'sresrnet_100000.pth')
|
||||
if not os.path.exists(self.dec_ckpt_path):
|
||||
raise Exception('decoder ckpt path not found')
|
||||
|
||||
self.ckpt_name = self.dec_ckpt_path.split('/')[-1][:-4]
|
||||
self.ndc = True if self.data_type == 'llff' else False
|
||||
self.sr_ratio = int(kwargs['factor'] / kwargs['load_sr'])
|
||||
self.load_existed_model()
|
||||
|
||||
self.test_tile = kwargs['test_tile']
|
||||
self.stepsize = kwargs['stepsize']
|
||||
|
||||
def load_existed_model(self):
|
||||
if self.ndc:
|
||||
model_class = DirectMPIGO
|
||||
ckpt = torch.load(self.enc_ckpt_path, map_location='cpu')
|
||||
else:
|
||||
model_class = DirectVoxGO
|
||||
ckpt = torch.load(self.enc_ckpt_path, map_location='cpu')
|
||||
ckpt['model_kwargs']['mask_cache_path'] = self.enc_ckpt_path
|
||||
self.encoder = model_class(**ckpt['model_kwargs'])
|
||||
self.encoder.load_state_dict(ckpt['model_state_dict'])
|
||||
self.encoder = self.encoder.to(self.device)
|
||||
|
||||
self.decoder = SFTNet(
|
||||
n_in_colors=3,
|
||||
scale=self.sr_ratio,
|
||||
num_feat=64,
|
||||
num_block=5,
|
||||
num_grow_ch=32,
|
||||
num_cond=1,
|
||||
dswise=False).to(self.device)
|
||||
self.decoder.load_network(
|
||||
load_path=self.dec_ckpt_path, device=self.device)
|
||||
self.decoder.eval()
|
||||
|
||||
def nerf_reconstruction(self, data_cfg, render_dir):
|
||||
data_dict = load_everything(cfg_data=data_cfg)
|
||||
|
||||
self.render_viewpoints_kwargs = {
|
||||
'render_kwargs': {
|
||||
'near': data_dict['near'],
|
||||
'far': data_dict['far'],
|
||||
'bg': 1 if data_dict['white_bkgd'] else 0,
|
||||
'stepsize': self.stepsize,
|
||||
'inverse_y': False,
|
||||
'flip_x': False,
|
||||
'flip_y': False,
|
||||
'render_depth': True,
|
||||
},
|
||||
}
|
||||
|
||||
os.makedirs(render_dir, exist_ok=True)
|
||||
print('All results are dumped into', render_dir)
|
||||
rgbs, depths, bgmaps, _, _, rgb_features = self.render_viewpoints(
|
||||
render_poses=data_dict['poses'][data_dict['i_test']],
|
||||
HW=data_dict['HW'][data_dict['i_test']],
|
||||
Ks=data_dict['Ks'][data_dict['i_test']],
|
||||
gt_imgs=[
|
||||
data_dict['images'][i].cpu().numpy()
|
||||
for i in data_dict['i_test']
|
||||
],
|
||||
savedir=render_dir,
|
||||
dump_images=False,
|
||||
**self.render_viewpoints_kwargs)
|
||||
|
||||
rgbsr = []
|
||||
for idx, rgbsave in enumerate(tqdm(rgb_features)):
|
||||
rgbtest = torch.from_numpy(rgbsave).movedim(-1, 0).unsqueeze(0).to(
|
||||
self.device)
|
||||
# rgb = torch.from_numpy(rgbs[idx]).movedim(-1, 0).unsqueeze(0).to(self.device)
|
||||
|
||||
input_cond = torch.from_numpy(depths).movedim(-1, 1)
|
||||
input_cond = input_cond[idx, :, :, :].to(self.device)
|
||||
|
||||
if self.test_tile:
|
||||
rgb_srtest = self.decoder.tile_process(
|
||||
rgbtest, input_cond, tile_size=self.test_tile)
|
||||
else:
|
||||
rgb_srtest = self.decoder(rgbtest,
|
||||
input_cond).detach().to('cpu')
|
||||
|
||||
rgb_srsave = rgb_srtest.squeeze().movedim(0, -1).detach().clamp(
|
||||
0, 1).numpy()
|
||||
rgbsr.append(rgb_srsave)
|
||||
print(
|
||||
'''all inference process has done, saving images... because our images are
|
||||
4K (4032x3024), the saving process may be time-consuming.''')
|
||||
rgbsr = np.array(rgbsr)
|
||||
for i in trange(len(rgbsr)):
|
||||
rgb8 = to8b(rgbsr[i])
|
||||
filename = os.path.join(render_dir, '{:03d}_dec.png'.format(i))
|
||||
imageio.imwrite(filename, rgb8)
|
||||
|
||||
imageio.mimwrite(
|
||||
os.path.join(render_dir, 'result_dec.mp4'),
|
||||
to8b(rgbsr),
|
||||
fps=25,
|
||||
codec='libx264',
|
||||
quality=8)
|
||||
|
||||
@torch.no_grad()
|
||||
def render_viewpoints(self,
|
||||
render_poses,
|
||||
HW,
|
||||
Ks,
|
||||
render_kwargs,
|
||||
gt_imgs=None,
|
||||
savedir=None,
|
||||
dump_images=False,
|
||||
render_factor=0,
|
||||
eval_ssim=False,
|
||||
eval_lpips_alex=False,
|
||||
eval_lpips_vgg=False):
|
||||
'''Render images for the given viewpoints; run evaluation if gt given.
|
||||
'''
|
||||
assert len(render_poses) == len(HW) and len(HW) == len(Ks)
|
||||
|
||||
if render_factor != 0:
|
||||
HW = np.copy(HW)
|
||||
Ks = np.copy(Ks)
|
||||
HW = (HW / render_factor).astype(int)
|
||||
Ks[:, :2, :3] /= render_factor
|
||||
|
||||
rgbs = []
|
||||
rgb_features = []
|
||||
depths = []
|
||||
bgmaps = []
|
||||
psnrs = []
|
||||
viewdirs_all = []
|
||||
ssims = []
|
||||
lpips_alex = []
|
||||
lpips_vgg = []
|
||||
|
||||
for i, c2w in enumerate(tqdm(render_poses)):
|
||||
|
||||
H, W = HW[i]
|
||||
K = Ks[i]
|
||||
c2w = torch.Tensor(c2w)
|
||||
rays_o, rays_d, viewdirs = get_rays_of_a_view(
|
||||
H,
|
||||
W,
|
||||
K,
|
||||
c2w,
|
||||
self.ndc,
|
||||
inverse_y=False,
|
||||
flip_x=False,
|
||||
flip_y=False)
|
||||
keys = ['rgb_marched', 'depth', 'alphainv_last', 'rgb_feature']
|
||||
rays_o = rays_o.flatten(0, -2).to('cuda')
|
||||
rays_d = rays_d.flatten(0, -2).to('cuda')
|
||||
viewdirs = viewdirs.flatten(0, -2).to('cuda')
|
||||
time_rdstart = time.time()
|
||||
render_result_chunks = [{
|
||||
k: v
|
||||
for k, v in self.encoder(ro, rd, vd, **render_kwargs).items()
|
||||
if k in keys
|
||||
} for ro, rd, vd in zip(
|
||||
rays_o.split(self.test_ray_chunk, 0),
|
||||
rays_d.split(self.test_ray_chunk, 0),
|
||||
viewdirs.split(self.test_ray_chunk, 0))]
|
||||
render_result = {
|
||||
k:
|
||||
torch.cat([ret[k]
|
||||
for ret in render_result_chunks]).reshape(H, W, -1)
|
||||
for k in render_result_chunks[0].keys()
|
||||
}
|
||||
print(f'render time is: {time.time() - time_rdstart}')
|
||||
rgb = render_result['rgb_marched'].clamp(0, 1).cpu().numpy()
|
||||
rgb_feature = render_result['rgb_feature'].cpu().numpy()
|
||||
depth = render_result['depth'].cpu().numpy()
|
||||
bgmap = render_result['alphainv_last'].cpu().numpy()
|
||||
|
||||
rgbs.append(rgb)
|
||||
rgb_features.append(rgb_feature)
|
||||
depths.append(depth)
|
||||
bgmaps.append(bgmap)
|
||||
viewdirs_all.append(viewdirs)
|
||||
if i == 0:
|
||||
print('Testing', rgb.shape)
|
||||
|
||||
if gt_imgs is not None and render_factor == 0:
|
||||
p = -10. * np.log10(np.mean(np.square(rgb - gt_imgs[i])))
|
||||
psnrs.append(p)
|
||||
|
||||
if len(psnrs):
|
||||
print('Testing psnr', np.mean(psnrs), '(avg)')
|
||||
if eval_ssim:
|
||||
print('Testing ssim', np.mean(ssims), '(avg)')
|
||||
if eval_lpips_vgg:
|
||||
print('Testing lpips (vgg)', np.mean(lpips_vgg), '(avg)')
|
||||
if eval_lpips_alex:
|
||||
print('Testing lpips (alex)', np.mean(lpips_alex), '(avg)')
|
||||
|
||||
if savedir is not None and dump_images:
|
||||
for i in trange(len(rgbs)):
|
||||
rgb8 = to8b(rgbs[i])
|
||||
filename = os.path.join(savedir, '{:03d}_enc.png'.format(i))
|
||||
imageio.imwrite(filename, rgb8)
|
||||
|
||||
rgbs = np.array(rgbs)
|
||||
rgb_features = np.array(rgb_features)
|
||||
depths = np.array(depths)
|
||||
bgmaps = np.array(bgmaps)
|
||||
|
||||
return rgbs, depths, bgmaps, psnrs, viewdirs_all, rgb_features
|
||||
|
||||
|
||||
def load_everything(cfg_data):
|
||||
'''Load images / poses / camera settings / data split.
|
||||
'''
|
||||
cfg_data = mmcv.Config(cfg_data)
|
||||
data_dict = load_data(cfg_data)
|
||||
|
||||
# remove useless field
|
||||
kept_keys = {
|
||||
'hwf', 'HW', 'Ks', 'near', 'far', 'near_clip', 'i_train', 'i_val',
|
||||
'i_test', 'irregular_shape', 'poses', 'render_poses', 'images',
|
||||
'white_bkgd'
|
||||
}
|
||||
# if cfg.data.load_sr:
|
||||
kept_keys.add('srgt')
|
||||
kept_keys.add('w2c')
|
||||
data_dict['srgt'] = torch.FloatTensor(data_dict['srgt'], device='cpu')
|
||||
data_dict['w2c'] = torch.FloatTensor(data_dict['w2c'], device='cpu')
|
||||
for k in list(data_dict.keys()):
|
||||
if k not in kept_keys:
|
||||
data_dict.pop(k)
|
||||
|
||||
# construct data tensor
|
||||
if data_dict['irregular_shape']:
|
||||
data_dict['images'] = [
|
||||
torch.FloatTensor(im, device='cpu') for im in data_dict['images']
|
||||
]
|
||||
else:
|
||||
data_dict['images'] = torch.FloatTensor(
|
||||
data_dict['images'], device='cpu')
|
||||
data_dict['poses'] = torch.Tensor(data_dict['poses'])
|
||||
return data_dict
|
||||
1957
modelscope/models/cv/nerf_recon_4k/network/dvgo.py
Executable file
1957
modelscope/models/cv/nerf_recon_4k/network/dvgo.py
Executable file
File diff suppressed because it is too large
Load Diff
176
modelscope/models/cv/nerf_recon_4k/network/utils.py
Normal file
176
modelscope/models/cv/nerf_recon_4k/network/utils.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# The implementation is partly adopted from nerfacc, made publicly available under the MIT License
|
||||
# at https://github.com/KAIR-BAIR/nerfacc/blob/master/examples/radiance_fields/ngp.py
|
||||
import gc
|
||||
from collections import defaultdict
|
||||
|
||||
import mcubes
|
||||
import numpy as np
|
||||
import tinycudann as tcnn
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Function
|
||||
from torch.cuda.amp import custom_bwd, custom_fwd
|
||||
|
||||
|
||||
class PSNR(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, inputs, targets, valid_mask=None, reduction='mean'):
|
||||
assert reduction in ['mean', 'none']
|
||||
value = (inputs - targets)**2
|
||||
if valid_mask is not None:
|
||||
value = value[valid_mask]
|
||||
if reduction == 'mean':
|
||||
return -10 * torch.log10(torch.mean(value))
|
||||
elif reduction == 'none':
|
||||
return -10 * torch.log10(
|
||||
torch.mean(value, dim=tuple(range(value.ndim)[1:])))
|
||||
|
||||
|
||||
def extract_fields(bound_min, bound_max, resolution, query_func):
|
||||
N = 64
|
||||
X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N)
|
||||
Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N)
|
||||
Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N)
|
||||
|
||||
u = np.zeros([resolution, resolution, resolution], dtype=np.float32)
|
||||
with torch.no_grad():
|
||||
for xi, xs in enumerate(X):
|
||||
for yi, ys in enumerate(Y):
|
||||
for zi, zs in enumerate(Z):
|
||||
xx, yy, zz = torch.meshgrid(xs, ys, zs)
|
||||
xx = xx.reshape(-1, 1)
|
||||
yy = yy.reshape(-1, 1)
|
||||
zz = zz.reshape(-1, 1)
|
||||
pts = torch.cat([xx, yy, zz], dim=-1).cuda()
|
||||
val = query_func(pts).reshape(
|
||||
len(xs), len(ys), len(zs)).detach().cpu().numpy()
|
||||
u[xi * N:xi * N + len(xs), yi * N:yi * N + len(ys),
|
||||
zi * N:zi * N + len(zs)] = val
|
||||
return u
|
||||
|
||||
|
||||
def extract_geometry(bound_min, bound_max, resolution, threshold, query_func):
|
||||
u = extract_fields(bound_min, bound_max, resolution, query_func)
|
||||
vertices, triangles = mcubes.marching_cubes(u, threshold)
|
||||
b_max_np = bound_max.detach().cpu().numpy()
|
||||
b_min_np = bound_min.detach().cpu().numpy()
|
||||
|
||||
vertices = vertices / (resolution - 1.0) * (
|
||||
b_max_np - b_min_np)[None, :] + b_min_np[None, :]
|
||||
return vertices, triangles
|
||||
|
||||
|
||||
def chunk_batch(func, chunk_size, *args, **kwargs):
|
||||
B = None
|
||||
for arg in args:
|
||||
if isinstance(arg, torch.Tensor):
|
||||
B = arg.shape[0]
|
||||
break
|
||||
out = defaultdict(list)
|
||||
out_type = None
|
||||
for i in range(0, B, chunk_size):
|
||||
out_chunk = func(
|
||||
*[
|
||||
arg[i:i + chunk_size] if isinstance(arg, torch.Tensor) else arg
|
||||
for arg in args
|
||||
], **kwargs)
|
||||
if out_chunk is None:
|
||||
continue
|
||||
out_type = type(out_chunk)
|
||||
if isinstance(out_chunk, torch.Tensor):
|
||||
out_chunk = {0: out_chunk}
|
||||
elif isinstance(out_chunk, tuple) or isinstance(out_chunk, list):
|
||||
chunk_length = len(out_chunk)
|
||||
out_chunk = {i: chunk for i, chunk in enumerate(out_chunk)}
|
||||
elif isinstance(out_chunk, dict):
|
||||
pass
|
||||
else:
|
||||
exit(1)
|
||||
for k, v in out_chunk.items():
|
||||
out[k].append(v if torch.is_grad_enabled() else v.detach())
|
||||
|
||||
if out_type is None:
|
||||
return
|
||||
|
||||
out = {k: torch.cat(v, dim=0) for k, v in out.items()}
|
||||
if out_type is torch.Tensor:
|
||||
return out[0]
|
||||
elif out_type in [tuple, list]:
|
||||
return out_type([out[i] for i in range(chunk_length)])
|
||||
elif out_type is dict:
|
||||
return out
|
||||
|
||||
|
||||
def get_activation(name):
|
||||
name = name.lower()
|
||||
if name is None or name == 'none':
|
||||
return nn.Identity()
|
||||
elif name.startswith('scale'):
|
||||
scale_factor = float(name[5:])
|
||||
return lambda x: x.clamp(0., scale_factor) / scale_factor
|
||||
elif name.startswith('clamp'):
|
||||
clamp_max = float(name[5:])
|
||||
return lambda x: x.clamp(0., clamp_max)
|
||||
elif name.startswith('mul'):
|
||||
mul_factor = float(name[3:])
|
||||
return lambda x: x * mul_factor
|
||||
elif name == 'trunc_exp':
|
||||
return trunc_exp
|
||||
elif name.startswith('+') or name.startswith('-'):
|
||||
return lambda x: x + float(name)
|
||||
elif name.lower() == 'sigmoid':
|
||||
return lambda x: torch.sigmoid(x)
|
||||
elif name.lower() == 'tanh':
|
||||
return lambda x: torch.tanh(x)
|
||||
else:
|
||||
return getattr(F, name)
|
||||
|
||||
|
||||
class _TruncExp(Function):
|
||||
# Implementation from torch-ngp:
|
||||
# https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py
|
||||
@staticmethod
|
||||
@custom_fwd(cast_inputs=torch.float32)
|
||||
def forward(ctx, x): # pylint: disable=arguments-differ
|
||||
ctx.save_for_backward(x)
|
||||
return torch.exp(x)
|
||||
|
||||
@staticmethod
|
||||
@custom_bwd
|
||||
def backward(ctx, g):
|
||||
x = ctx.saved_tensors[0]
|
||||
return g * torch.exp(torch.clamp(x, max=15))
|
||||
|
||||
|
||||
trunc_exp = _TruncExp.apply
|
||||
|
||||
|
||||
def dot(x, y):
|
||||
return torch.sum(x * y, -1, keepdim=True)
|
||||
|
||||
|
||||
def reflect(x, n):
|
||||
return 2 * dot(x, n) * n - x
|
||||
|
||||
|
||||
def normalize(dat, inp_scale, tgt_scale):
|
||||
if inp_scale is None:
|
||||
inp_scale = [dat.min(), dat.max()]
|
||||
dat = (dat - inp_scale[0]) / (inp_scale[1] - inp_scale[0])
|
||||
dat = dat * (tgt_scale[1] - tgt_scale[0]) + tgt_scale[0]
|
||||
return dat
|
||||
|
||||
|
||||
def cleanup():
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
tcnn.free_temporary_memory()
|
||||
|
||||
|
||||
def update_module_step(m, epoch, global_step):
|
||||
if hasattr(m, 'update_step'):
|
||||
m.update_step(epoch, global_step)
|
||||
0
modelscope/ops/4knerf/__init__.py
Normal file
0
modelscope/ops/4knerf/__init__.py
Normal file
85
modelscope/ops/4knerf/adam_upd.cpp
Executable file
85
modelscope/ops/4knerf/adam_upd.cpp
Executable file
@@ -0,0 +1,85 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
void adam_upd_cuda(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
int step, float beta1, float beta2, float lr, float eps);
|
||||
|
||||
void masked_adam_upd_cuda(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
int step, float beta1, float beta2, float lr, float eps);
|
||||
|
||||
void adam_upd_with_perlr_cuda(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
torch::Tensor perlr,
|
||||
int step, float beta1, float beta2, float lr, float eps);
|
||||
|
||||
|
||||
// C++ interface
|
||||
|
||||
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
|
||||
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
|
||||
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
||||
|
||||
void adam_upd(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
int step, float beta1, float beta2, float lr, float eps) {
|
||||
CHECK_INPUT(param);
|
||||
CHECK_INPUT(grad);
|
||||
CHECK_INPUT(exp_avg);
|
||||
CHECK_INPUT(exp_avg_sq);
|
||||
adam_upd_cuda(param, grad, exp_avg, exp_avg_sq,
|
||||
step, beta1, beta2, lr, eps);
|
||||
}
|
||||
|
||||
void masked_adam_upd(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
int step, float beta1, float beta2, float lr, float eps) {
|
||||
CHECK_INPUT(param);
|
||||
CHECK_INPUT(grad);
|
||||
CHECK_INPUT(exp_avg);
|
||||
CHECK_INPUT(exp_avg_sq);
|
||||
masked_adam_upd_cuda(param, grad, exp_avg, exp_avg_sq,
|
||||
step, beta1, beta2, lr, eps);
|
||||
}
|
||||
|
||||
void adam_upd_with_perlr(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
torch::Tensor perlr,
|
||||
int step, float beta1, float beta2, float lr, float eps) {
|
||||
CHECK_INPUT(param);
|
||||
CHECK_INPUT(grad);
|
||||
CHECK_INPUT(exp_avg);
|
||||
CHECK_INPUT(exp_avg_sq);
|
||||
adam_upd_with_perlr_cuda(param, grad, exp_avg, exp_avg_sq, perlr,
|
||||
step, beta1, beta2, lr, eps);
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("adam_upd", &adam_upd,
|
||||
"Adam update");
|
||||
m.def("masked_adam_upd", &masked_adam_upd,
|
||||
"Adam update ignoring zero grad");
|
||||
m.def("adam_upd_with_perlr", &adam_upd_with_perlr,
|
||||
"Adam update ignoring zero grad with per-voxel lr");
|
||||
}
|
||||
132
modelscope/ops/4knerf/adam_upd_kernel.cu
Executable file
132
modelscope/ops/4knerf/adam_upd_kernel.cu
Executable file
@@ -0,0 +1,132 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void adam_upd_cuda_kernel(
|
||||
scalar_t* __restrict__ param,
|
||||
const scalar_t* __restrict__ grad,
|
||||
scalar_t* __restrict__ exp_avg,
|
||||
scalar_t* __restrict__ exp_avg_sq,
|
||||
const size_t N,
|
||||
const float step_size, const float beta1, const float beta2, const float eps) {
|
||||
|
||||
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(index<N) {
|
||||
exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
|
||||
exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
|
||||
param[index] -= step_size * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void masked_adam_upd_cuda_kernel(
|
||||
scalar_t* __restrict__ param,
|
||||
const scalar_t* __restrict__ grad,
|
||||
scalar_t* __restrict__ exp_avg,
|
||||
scalar_t* __restrict__ exp_avg_sq,
|
||||
const size_t N,
|
||||
const float step_size, const float beta1, const float beta2, const float eps) {
|
||||
|
||||
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(index<N && grad[index]!=0) {
|
||||
exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
|
||||
exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
|
||||
param[index] -= step_size * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void adam_upd_with_perlr_cuda_kernel(
|
||||
scalar_t* __restrict__ param,
|
||||
const scalar_t* __restrict__ grad,
|
||||
scalar_t* __restrict__ exp_avg,
|
||||
scalar_t* __restrict__ exp_avg_sq,
|
||||
scalar_t* __restrict__ perlr,
|
||||
const size_t N,
|
||||
const float step_size, const float beta1, const float beta2, const float eps) {
|
||||
|
||||
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(index<N) {
|
||||
exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
|
||||
exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
|
||||
param[index] -= step_size * perlr[index] * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
|
||||
}
|
||||
}
|
||||
|
||||
void adam_upd_cuda(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
const int step, const float beta1, const float beta2, const float lr, const float eps) {
|
||||
|
||||
const size_t N = param.numel();
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (N + threads - 1) / threads;
|
||||
|
||||
const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_cuda", ([&] {
|
||||
adam_upd_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
param.data<scalar_t>(),
|
||||
grad.data<scalar_t>(),
|
||||
exp_avg.data<scalar_t>(),
|
||||
exp_avg_sq.data<scalar_t>(),
|
||||
N, step_size, beta1, beta2, eps);
|
||||
}));
|
||||
}
|
||||
|
||||
void masked_adam_upd_cuda(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
const int step, const float beta1, const float beta2, const float lr, const float eps) {
|
||||
|
||||
const size_t N = param.numel();
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (N + threads - 1) / threads;
|
||||
|
||||
const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(param.type(), "masked_adam_upd_cuda", ([&] {
|
||||
masked_adam_upd_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
param.data<scalar_t>(),
|
||||
grad.data<scalar_t>(),
|
||||
exp_avg.data<scalar_t>(),
|
||||
exp_avg_sq.data<scalar_t>(),
|
||||
N, step_size, beta1, beta2, eps);
|
||||
}));
|
||||
}
|
||||
|
||||
void adam_upd_with_perlr_cuda(
|
||||
torch::Tensor param,
|
||||
torch::Tensor grad,
|
||||
torch::Tensor exp_avg,
|
||||
torch::Tensor exp_avg_sq,
|
||||
torch::Tensor perlr,
|
||||
const int step, const float beta1, const float beta2, const float lr, const float eps) {
|
||||
|
||||
const size_t N = param.numel();
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (N + threads - 1) / threads;
|
||||
|
||||
const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_with_perlr_cuda", ([&] {
|
||||
adam_upd_with_perlr_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
param.data<scalar_t>(),
|
||||
grad.data<scalar_t>(),
|
||||
exp_avg.data<scalar_t>(),
|
||||
exp_avg_sq.data<scalar_t>(),
|
||||
perlr.data<scalar_t>(),
|
||||
N, step_size, beta1, beta2, eps);
|
||||
}));
|
||||
}
|
||||
182
modelscope/ops/4knerf/render_utils.cpp
Executable file
182
modelscope/ops/4knerf/render_utils.cpp
Executable file
@@ -0,0 +1,182 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
std::vector<torch::Tensor> infer_t_minmax_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const float near, const float far);
|
||||
|
||||
torch::Tensor infer_n_samples_cuda(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist);
|
||||
|
||||
std::vector<torch::Tensor> infer_ray_start_dir_cuda(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min);
|
||||
|
||||
std::vector<torch::Tensor> sample_pts_on_rays_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d,
|
||||
torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const float near, const float far, const float stepdist);
|
||||
|
||||
std::vector<torch::Tensor> sample_ndc_pts_on_rays_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d,
|
||||
torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const int N_samples);
|
||||
|
||||
torch::Tensor sample_bg_pts_on_rays_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
|
||||
const float bg_preserve, const int N_samples);
|
||||
|
||||
torch::Tensor maskcache_lookup_cuda(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift);
|
||||
|
||||
std::vector<torch::Tensor> raw2alpha_cuda(torch::Tensor density, const float shift, const float interval);
|
||||
std::vector<torch::Tensor> raw2alpha_nonuni_cuda(torch::Tensor density, const float shift, torch::Tensor interval);
|
||||
|
||||
torch::Tensor raw2alpha_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, const float interval);
|
||||
torch::Tensor raw2alpha_nonuni_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval);
|
||||
|
||||
std::vector<torch::Tensor> alpha2weight_cuda(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays);
|
||||
|
||||
torch::Tensor alpha2weight_backward_cuda(
|
||||
torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
|
||||
torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
|
||||
torch::Tensor grad_weights, torch::Tensor grad_last);
|
||||
|
||||
|
||||
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
|
||||
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
|
||||
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
||||
|
||||
std::vector<torch::Tensor> infer_t_minmax(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const float near, const float far) {
|
||||
CHECK_INPUT(rays_o);
|
||||
CHECK_INPUT(rays_d);
|
||||
CHECK_INPUT(xyz_min);
|
||||
CHECK_INPUT(xyz_max);
|
||||
return infer_t_minmax_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far);
|
||||
}
|
||||
|
||||
torch::Tensor infer_n_samples(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist) {
|
||||
CHECK_INPUT(rays_d);
|
||||
CHECK_INPUT(t_min);
|
||||
CHECK_INPUT(t_max);
|
||||
return infer_n_samples_cuda(rays_d, t_min, t_max, stepdist);
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> infer_ray_start_dir(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min) {
|
||||
CHECK_INPUT(rays_o);
|
||||
CHECK_INPUT(rays_d);
|
||||
CHECK_INPUT(t_min);
|
||||
return infer_ray_start_dir_cuda(rays_o, rays_d, t_min);
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> sample_pts_on_rays(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d,
|
||||
torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const float near, const float far, const float stepdist) {
|
||||
CHECK_INPUT(rays_o);
|
||||
CHECK_INPUT(rays_d);
|
||||
CHECK_INPUT(xyz_min);
|
||||
CHECK_INPUT(xyz_max);
|
||||
assert(rays_o.dim()==2);
|
||||
assert(rays_o.size(1)==3);
|
||||
return sample_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far, stepdist);
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> sample_ndc_pts_on_rays(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d,
|
||||
torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const int N_samples) {
|
||||
CHECK_INPUT(rays_o);
|
||||
CHECK_INPUT(rays_d);
|
||||
CHECK_INPUT(xyz_min);
|
||||
CHECK_INPUT(xyz_max);
|
||||
assert(rays_o.dim()==2);
|
||||
assert(rays_o.size(1)==3);
|
||||
return sample_ndc_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, N_samples);
|
||||
}
|
||||
|
||||
torch::Tensor sample_bg_pts_on_rays(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
|
||||
const float bg_preserve, const int N_samples) {
|
||||
CHECK_INPUT(rays_o);
|
||||
CHECK_INPUT(rays_d);
|
||||
CHECK_INPUT(t_max);
|
||||
return sample_bg_pts_on_rays_cuda(rays_o, rays_d, t_max, bg_preserve, N_samples);
|
||||
}
|
||||
|
||||
torch::Tensor maskcache_lookup(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift) {
|
||||
CHECK_INPUT(world);
|
||||
CHECK_INPUT(xyz);
|
||||
CHECK_INPUT(xyz2ijk_scale);
|
||||
CHECK_INPUT(xyz2ijk_shift);
|
||||
assert(world.dim()==3);
|
||||
assert(xyz.dim()==2);
|
||||
assert(xyz.size(1)==3);
|
||||
return maskcache_lookup_cuda(world, xyz, xyz2ijk_scale, xyz2ijk_shift);
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> raw2alpha(torch::Tensor density, const float shift, const float interval) {
|
||||
CHECK_INPUT(density);
|
||||
assert(density.dim()==1);
|
||||
return raw2alpha_cuda(density, shift, interval);
|
||||
}
|
||||
std::vector<torch::Tensor> raw2alpha_nonuni(torch::Tensor density, const float shift, torch::Tensor interval) {
|
||||
CHECK_INPUT(density);
|
||||
assert(density.dim()==1);
|
||||
return raw2alpha_nonuni_cuda(density, shift, interval);
|
||||
}
|
||||
|
||||
torch::Tensor raw2alpha_backward(torch::Tensor exp, torch::Tensor grad_back, const float interval) {
|
||||
CHECK_INPUT(exp);
|
||||
CHECK_INPUT(grad_back);
|
||||
return raw2alpha_backward_cuda(exp, grad_back, interval);
|
||||
}
|
||||
torch::Tensor raw2alpha_nonuni_backward(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval) {
|
||||
CHECK_INPUT(exp);
|
||||
CHECK_INPUT(grad_back);
|
||||
return raw2alpha_nonuni_backward_cuda(exp, grad_back, interval);
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> alpha2weight(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays) {
|
||||
CHECK_INPUT(alpha);
|
||||
CHECK_INPUT(ray_id);
|
||||
assert(alpha.dim()==1);
|
||||
assert(ray_id.dim()==1);
|
||||
assert(alpha.sizes()==ray_id.sizes());
|
||||
return alpha2weight_cuda(alpha, ray_id, n_rays);
|
||||
}
|
||||
|
||||
torch::Tensor alpha2weight_backward(
|
||||
torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
|
||||
torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
|
||||
torch::Tensor grad_weights, torch::Tensor grad_last) {
|
||||
CHECK_INPUT(alpha);
|
||||
CHECK_INPUT(weight);
|
||||
CHECK_INPUT(T);
|
||||
CHECK_INPUT(alphainv_last);
|
||||
CHECK_INPUT(i_start);
|
||||
CHECK_INPUT(i_end);
|
||||
CHECK_INPUT(grad_weights);
|
||||
CHECK_INPUT(grad_last);
|
||||
return alpha2weight_backward_cuda(
|
||||
alpha, weight, T, alphainv_last,
|
||||
i_start, i_end, n_rays,
|
||||
grad_weights, grad_last);
|
||||
}
|
||||
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("infer_t_minmax", &infer_t_minmax, "Inference t_min and t_max of ray-bbox intersection");
|
||||
m.def("infer_n_samples", &infer_n_samples, "Inference the number of points to sample on each ray");
|
||||
m.def("infer_ray_start_dir", &infer_ray_start_dir, "Inference the starting point and shooting direction of each ray");
|
||||
m.def("sample_pts_on_rays", &sample_pts_on_rays, "Sample points on rays");
|
||||
m.def("sample_ndc_pts_on_rays", &sample_ndc_pts_on_rays, "Sample points on rays");
|
||||
m.def("sample_bg_pts_on_rays", &sample_bg_pts_on_rays, "Sample points on bg");
|
||||
m.def("maskcache_lookup", &maskcache_lookup, "Lookup to skip know freespace.");
|
||||
m.def("raw2alpha", &raw2alpha, "Raw values [-inf, inf] to alpha [0, 1].");
|
||||
m.def("raw2alpha_backward", &raw2alpha_backward, "Backward pass of the raw to alpha");
|
||||
m.def("raw2alpha_nonuni", &raw2alpha_nonuni, "Raw values [-inf, inf] to alpha [0, 1].");
|
||||
m.def("raw2alpha_nonuni_backward", &raw2alpha_nonuni_backward, "Backward pass of the raw to alpha");
|
||||
m.def("alpha2weight", &alpha2weight, "Per-point alpha to accumulated blending weight");
|
||||
m.def("alpha2weight_backward", &alpha2weight_backward, "Backward pass of alpha2weight");
|
||||
}
|
||||
707
modelscope/ops/4knerf/render_utils_kernel.cu
Executable file
707
modelscope/ops/4knerf/render_utils_kernel.cu
Executable file
@@ -0,0 +1,707 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
Points sampling helper functions.
|
||||
*/
|
||||
template <typename scalar_t>
|
||||
__global__ void infer_t_minmax_cuda_kernel(
|
||||
scalar_t* __restrict__ rays_o,
|
||||
scalar_t* __restrict__ rays_d,
|
||||
scalar_t* __restrict__ xyz_min,
|
||||
scalar_t* __restrict__ xyz_max,
|
||||
const float near, const float far, const int n_rays,
|
||||
scalar_t* __restrict__ t_min,
|
||||
scalar_t* __restrict__ t_max) {
|
||||
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_ray<n_rays) {
|
||||
const int offset = i_ray * 3;
|
||||
float vx = ((rays_d[offset ]==0) ? 1e-6 : rays_d[offset ]);
|
||||
float vy = ((rays_d[offset+1]==0) ? 1e-6 : rays_d[offset+1]);
|
||||
float vz = ((rays_d[offset+2]==0) ? 1e-6 : rays_d[offset+2]);
|
||||
float ax = (xyz_max[0] - rays_o[offset ]) / vx;
|
||||
float ay = (xyz_max[1] - rays_o[offset+1]) / vy;
|
||||
float az = (xyz_max[2] - rays_o[offset+2]) / vz;
|
||||
float bx = (xyz_min[0] - rays_o[offset ]) / vx;
|
||||
float by = (xyz_min[1] - rays_o[offset+1]) / vy;
|
||||
float bz = (xyz_min[2] - rays_o[offset+2]) / vz;
|
||||
t_min[i_ray] = max(min(max(max(min(ax, bx), min(ay, by)), min(az, bz)), far), near);
|
||||
t_max[i_ray] = max(min(min(min(max(ax, bx), max(ay, by)), max(az, bz)), far), near);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void infer_n_samples_cuda_kernel(
|
||||
scalar_t* __restrict__ rays_d,
|
||||
scalar_t* __restrict__ t_min,
|
||||
scalar_t* __restrict__ t_max,
|
||||
const float stepdist,
|
||||
const int n_rays,
|
||||
int64_t* __restrict__ n_samples) {
|
||||
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_ray<n_rays) {
|
||||
const int offset = i_ray * 3;
|
||||
const float rnorm = sqrt(
|
||||
rays_d[offset ]*rays_d[offset ] +\
|
||||
rays_d[offset+1]*rays_d[offset+1] +\
|
||||
rays_d[offset+2]*rays_d[offset+2]);
|
||||
// at least 1 point for easier implementation in the later sample_pts_on_rays_cuda
|
||||
n_samples[i_ray] = max(ceil((t_max[i_ray]-t_min[i_ray]) * rnorm / stepdist), 1.);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void infer_ray_start_dir_cuda_kernel(
|
||||
scalar_t* __restrict__ rays_o,
|
||||
scalar_t* __restrict__ rays_d,
|
||||
scalar_t* __restrict__ t_min,
|
||||
const int n_rays,
|
||||
scalar_t* __restrict__ rays_start,
|
||||
scalar_t* __restrict__ rays_dir) {
|
||||
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_ray<n_rays) {
|
||||
const int offset = i_ray * 3;
|
||||
const float rnorm = sqrt(
|
||||
rays_d[offset ]*rays_d[offset ] +\
|
||||
rays_d[offset+1]*rays_d[offset+1] +\
|
||||
rays_d[offset+2]*rays_d[offset+2]);
|
||||
rays_start[offset ] = rays_o[offset ] + rays_d[offset ] * t_min[i_ray];
|
||||
rays_start[offset+1] = rays_o[offset+1] + rays_d[offset+1] * t_min[i_ray];
|
||||
rays_start[offset+2] = rays_o[offset+2] + rays_d[offset+2] * t_min[i_ray];
|
||||
rays_dir [offset ] = rays_d[offset ] / rnorm;
|
||||
rays_dir [offset+1] = rays_d[offset+1] / rnorm;
|
||||
rays_dir [offset+2] = rays_d[offset+2] / rnorm;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<torch::Tensor> infer_t_minmax_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const float near, const float far) {
|
||||
const int n_rays = rays_o.size(0);
|
||||
auto t_min = torch::empty({n_rays}, rays_o.options());
|
||||
auto t_max = torch::empty({n_rays}, rays_o.options());
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_rays + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "infer_t_minmax_cuda", ([&] {
|
||||
infer_t_minmax_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
rays_o.data<scalar_t>(),
|
||||
rays_d.data<scalar_t>(),
|
||||
xyz_min.data<scalar_t>(),
|
||||
xyz_max.data<scalar_t>(),
|
||||
near, far, n_rays,
|
||||
t_min.data<scalar_t>(),
|
||||
t_max.data<scalar_t>());
|
||||
}));
|
||||
|
||||
return {t_min, t_max};
|
||||
}
|
||||
|
||||
torch::Tensor infer_n_samples_cuda(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist) {
|
||||
const int n_rays = t_min.size(0);
|
||||
auto n_samples = torch::empty({n_rays}, torch::dtype(torch::kInt64).device(torch::kCUDA));
|
||||
const int threads = 256;
|
||||
const int blocks = (n_rays + threads - 1) / threads;
|
||||
AT_DISPATCH_FLOATING_TYPES(t_min.type(), "infer_n_samples_cuda", ([&] {
|
||||
infer_n_samples_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
rays_d.data<scalar_t>(),
|
||||
t_min.data<scalar_t>(),
|
||||
t_max.data<scalar_t>(),
|
||||
stepdist,
|
||||
n_rays,
|
||||
n_samples.data<int64_t>());
|
||||
}));
|
||||
return n_samples;
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> infer_ray_start_dir_cuda(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min) {
|
||||
const int n_rays = rays_o.size(0);
|
||||
const int threads = 256;
|
||||
const int blocks = (n_rays + threads - 1) / threads;
|
||||
auto rays_start = torch::empty_like(rays_o);
|
||||
auto rays_dir = torch::empty_like(rays_o);
|
||||
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "infer_ray_start_dir_cuda", ([&] {
|
||||
infer_ray_start_dir_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
rays_o.data<scalar_t>(),
|
||||
rays_d.data<scalar_t>(),
|
||||
t_min.data<scalar_t>(),
|
||||
n_rays,
|
||||
rays_start.data<scalar_t>(),
|
||||
rays_dir.data<scalar_t>());
|
||||
}));
|
||||
return {rays_start, rays_dir};
|
||||
}
|
||||
|
||||
/*
|
||||
Sampling query points on rays.
|
||||
*/
|
||||
__global__ void __set_1_at_ray_seg_start(
|
||||
int64_t* __restrict__ ray_id,
|
||||
int64_t* __restrict__ N_steps_cumsum,
|
||||
const int n_rays) {
|
||||
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(0<idx && idx<n_rays) {
|
||||
ray_id[N_steps_cumsum[idx-1]] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void __set_step_id(
|
||||
int64_t* __restrict__ step_id,
|
||||
int64_t* __restrict__ ray_id,
|
||||
int64_t* __restrict__ N_steps_cumsum,
|
||||
const int total_len) {
|
||||
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(idx<total_len) {
|
||||
const int rid = ray_id[idx];
|
||||
step_id[idx] = idx - ((rid!=0) ? N_steps_cumsum[rid-1] : 0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void sample_pts_on_rays_cuda_kernel(
|
||||
scalar_t* __restrict__ rays_start,
|
||||
scalar_t* __restrict__ rays_dir,
|
||||
scalar_t* __restrict__ xyz_min,
|
||||
scalar_t* __restrict__ xyz_max,
|
||||
int64_t* __restrict__ ray_id,
|
||||
int64_t* __restrict__ step_id,
|
||||
const float stepdist, const int total_len,
|
||||
scalar_t* __restrict__ rays_pts,
|
||||
bool* __restrict__ mask_outbbox) {
|
||||
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(idx<total_len) {
|
||||
const int i_ray = ray_id[idx];
|
||||
const int i_step = step_id[idx];
|
||||
|
||||
const int offset_p = idx * 3;
|
||||
const int offset_r = i_ray * 3;
|
||||
const float dist = stepdist * i_step;
|
||||
const float px = rays_start[offset_r ] + rays_dir[offset_r ] * dist;
|
||||
const float py = rays_start[offset_r+1] + rays_dir[offset_r+1] * dist;
|
||||
const float pz = rays_start[offset_r+2] + rays_dir[offset_r+2] * dist;
|
||||
rays_pts[offset_p ] = px;
|
||||
rays_pts[offset_p+1] = py;
|
||||
rays_pts[offset_p+2] = pz;
|
||||
mask_outbbox[idx] = (xyz_min[0]>px) | (xyz_min[1]>py) | (xyz_min[2]>pz) | \
|
||||
(xyz_max[0]<px) | (xyz_max[1]<py) | (xyz_max[2]<pz);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> sample_pts_on_rays_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d,
|
||||
torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const float near, const float far, const float stepdist) {
|
||||
const int threads = 256;
|
||||
const int n_rays = rays_o.size(0);
|
||||
|
||||
// Compute ray-bbox intersection
|
||||
auto t_minmax = infer_t_minmax_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far);
|
||||
auto t_min = t_minmax[0];
|
||||
auto t_max = t_minmax[1];
|
||||
|
||||
// Compute the number of points required.
|
||||
// Assign ray index and step index to each.
|
||||
auto N_steps = infer_n_samples_cuda(rays_d, t_min, t_max, stepdist);
|
||||
auto N_steps_cumsum = N_steps.cumsum(0);
|
||||
const int total_len = N_steps.sum().item<int>();
|
||||
auto ray_id = torch::zeros({total_len}, torch::dtype(torch::kInt64).device(torch::kCUDA));
|
||||
__set_1_at_ray_seg_start<<<(n_rays+threads-1)/threads, threads>>>(
|
||||
ray_id.data<int64_t>(), N_steps_cumsum.data<int64_t>(), n_rays);
|
||||
ray_id.cumsum_(0);
|
||||
auto step_id = torch::empty({total_len}, ray_id.options());
|
||||
__set_step_id<<<(total_len+threads-1)/threads, threads>>>(
|
||||
step_id.data<int64_t>(), ray_id.data<int64_t>(), N_steps_cumsum.data<int64_t>(), total_len);
|
||||
|
||||
// Compute the global xyz of each point
|
||||
auto rays_start_dir = infer_ray_start_dir_cuda(rays_o, rays_d, t_min);
|
||||
auto rays_start = rays_start_dir[0];
|
||||
auto rays_dir = rays_start_dir[1];
|
||||
|
||||
auto rays_pts = torch::empty({total_len, 3}, torch::dtype(rays_o.dtype()).device(torch::kCUDA));
|
||||
auto mask_outbbox = torch::empty({total_len}, torch::dtype(torch::kBool).device(torch::kCUDA));
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "sample_pts_on_rays_cuda", ([&] {
|
||||
sample_pts_on_rays_cuda_kernel<scalar_t><<<(total_len+threads-1)/threads, threads>>>(
|
||||
rays_start.data<scalar_t>(),
|
||||
rays_dir.data<scalar_t>(),
|
||||
xyz_min.data<scalar_t>(),
|
||||
xyz_max.data<scalar_t>(),
|
||||
ray_id.data<int64_t>(),
|
||||
step_id.data<int64_t>(),
|
||||
stepdist, total_len,
|
||||
rays_pts.data<scalar_t>(),
|
||||
mask_outbbox.data<bool>());
|
||||
}));
|
||||
return {rays_pts, mask_outbbox, ray_id, step_id, N_steps, t_min, t_max};
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void sample_ndc_pts_on_rays_cuda_kernel(
|
||||
const scalar_t* __restrict__ rays_o,
|
||||
const scalar_t* __restrict__ rays_d,
|
||||
const scalar_t* __restrict__ xyz_min,
|
||||
const scalar_t* __restrict__ xyz_max,
|
||||
const int N_samples, const int n_rays,
|
||||
scalar_t* __restrict__ rays_pts,
|
||||
bool* __restrict__ mask_outbbox) {
|
||||
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(idx<N_samples*n_rays) {
|
||||
const int i_ray = idx / N_samples;
|
||||
const int i_step = idx % N_samples;
|
||||
|
||||
const int offset_p = idx * 3;
|
||||
const int offset_r = i_ray * 3;
|
||||
const float dist = ((float)i_step) / (N_samples-1);
|
||||
const float px = rays_o[offset_r ] + rays_d[offset_r ] * dist;
|
||||
const float py = rays_o[offset_r+1] + rays_d[offset_r+1] * dist;
|
||||
const float pz = rays_o[offset_r+2] + rays_d[offset_r+2] * dist;
|
||||
rays_pts[offset_p ] = px;
|
||||
rays_pts[offset_p+1] = py;
|
||||
rays_pts[offset_p+2] = pz;
|
||||
mask_outbbox[idx] = (xyz_min[0]>px) | (xyz_min[1]>py) | (xyz_min[2]>pz) | \
|
||||
(xyz_max[0]<px) | (xyz_max[1]<py) | (xyz_max[2]<pz);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> sample_ndc_pts_on_rays_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d,
|
||||
torch::Tensor xyz_min, torch::Tensor xyz_max,
|
||||
const int N_samples) {
|
||||
const int threads = 256;
|
||||
const int n_rays = rays_o.size(0);
|
||||
|
||||
auto rays_pts = torch::empty({n_rays, N_samples, 3}, torch::dtype(rays_o.dtype()).device(torch::kCUDA));
|
||||
auto mask_outbbox = torch::empty({n_rays, N_samples}, torch::dtype(torch::kBool).device(torch::kCUDA));
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "sample_ndc_pts_on_rays_cuda", ([&] {
|
||||
sample_ndc_pts_on_rays_cuda_kernel<scalar_t><<<(n_rays*N_samples+threads-1)/threads, threads>>>(
|
||||
rays_o.data<scalar_t>(),
|
||||
rays_d.data<scalar_t>(),
|
||||
xyz_min.data<scalar_t>(),
|
||||
xyz_max.data<scalar_t>(),
|
||||
N_samples, n_rays,
|
||||
rays_pts.data<scalar_t>(),
|
||||
mask_outbbox.data<bool>());
|
||||
}));
|
||||
return {rays_pts, mask_outbbox};
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__device__ __forceinline__ scalar_t norm3(const scalar_t x, const scalar_t y, const scalar_t z) {
|
||||
return sqrt(x*x + y*y + z*z);
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void sample_bg_pts_on_rays_cuda_kernel(
|
||||
const scalar_t* __restrict__ rays_o,
|
||||
const scalar_t* __restrict__ rays_d,
|
||||
const scalar_t* __restrict__ t_max,
|
||||
const float bg_preserve,
|
||||
const int N_samples, const int n_rays,
|
||||
scalar_t* __restrict__ rays_pts) {
|
||||
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(idx<N_samples*n_rays) {
|
||||
const int i_ray = idx / N_samples;
|
||||
const int i_step = idx % N_samples;
|
||||
|
||||
const int offset_p = idx * 3;
|
||||
const int offset_r = i_ray * 3;
|
||||
/* Original pytorch implementation
|
||||
ori_t_outer = t_max[:,None] - 1 + 1 / torch.linspace(1, 0, N_outer+1)[:-1]
|
||||
ori_ray_pts_outer = (rays_o[:,None,:] + rays_d[:,None,:] * ori_t_outer[:,:,None]).reshape(-1,3)
|
||||
t_outer = ori_ray_pts_outer.norm(dim=-1)
|
||||
R_outer = t_outer / ori_ray_pts_outer.abs().amax(1)
|
||||
# r = R * R / t
|
||||
o2i_p = R_outer.pow(2) / t_outer.pow(2) * (1-self.bg_preserve) + R_outer / t_outer * self.bg_preserve
|
||||
ray_pts_outer = (ori_ray_pts_outer * o2i_p[:,None]).reshape(len(rays_o), -1, 3)
|
||||
*/
|
||||
const float t_inner = t_max[i_ray];
|
||||
const float ori_t_outer = t_inner - 1. + 1. / (1. - ((float)i_step) / N_samples);
|
||||
const float ori_ray_pts_x = rays_o[offset_r ] + rays_d[offset_r ] * ori_t_outer;
|
||||
const float ori_ray_pts_y = rays_o[offset_r+1] + rays_d[offset_r+1] * ori_t_outer;
|
||||
const float ori_ray_pts_z = rays_o[offset_r+2] + rays_d[offset_r+2] * ori_t_outer;
|
||||
const float t_outer = norm3(ori_ray_pts_x, ori_ray_pts_y, ori_ray_pts_z);
|
||||
const float ori_ray_pts_m = max(abs(ori_ray_pts_x), max(abs(ori_ray_pts_y), abs(ori_ray_pts_z)));
|
||||
const float R_outer = t_outer / ori_ray_pts_m;
|
||||
const float o2i_p = R_outer*R_outer / (t_outer*t_outer) * (1.-bg_preserve) + R_outer / t_outer * bg_preserve;
|
||||
const float px = ori_ray_pts_x * o2i_p;
|
||||
const float py = ori_ray_pts_y * o2i_p;
|
||||
const float pz = ori_ray_pts_z * o2i_p;
|
||||
rays_pts[offset_p ] = px;
|
||||
rays_pts[offset_p+1] = py;
|
||||
rays_pts[offset_p+2] = pz;
|
||||
}
|
||||
}
|
||||
|
||||
torch::Tensor sample_bg_pts_on_rays_cuda(
|
||||
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
|
||||
const float bg_preserve, const int N_samples) {
|
||||
const int threads = 256;
|
||||
const int n_rays = rays_o.size(0);
|
||||
|
||||
auto rays_pts = torch::empty({n_rays, N_samples, 3}, torch::dtype(rays_o.dtype()).device(torch::kCUDA));
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "sample_bg_pts_on_rays_cuda", ([&] {
|
||||
sample_bg_pts_on_rays_cuda_kernel<scalar_t><<<(n_rays*N_samples+threads-1)/threads, threads>>>(
|
||||
rays_o.data<scalar_t>(),
|
||||
rays_d.data<scalar_t>(),
|
||||
t_max.data<scalar_t>(),
|
||||
bg_preserve,
|
||||
N_samples, n_rays,
|
||||
rays_pts.data<scalar_t>());
|
||||
}));
|
||||
return rays_pts;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
MaskCache lookup to skip known freespace.
|
||||
*/
|
||||
|
||||
static __forceinline__ __device__
|
||||
bool check_xyz(int i, int j, int k, int sz_i, int sz_j, int sz_k) {
|
||||
return (0 <= i) && (i < sz_i) && (0 <= j) && (j < sz_j) && (0 <= k) && (k < sz_k);
|
||||
}
|
||||
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void maskcache_lookup_cuda_kernel(
|
||||
bool* __restrict__ world,
|
||||
scalar_t* __restrict__ xyz,
|
||||
bool* __restrict__ out,
|
||||
scalar_t* __restrict__ xyz2ijk_scale,
|
||||
scalar_t* __restrict__ xyz2ijk_shift,
|
||||
const int sz_i, const int sz_j, const int sz_k, const int n_pts) {
|
||||
|
||||
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_pt<n_pts) {
|
||||
const int offset = i_pt * 3;
|
||||
const int i = round(xyz[offset ] * xyz2ijk_scale[0] + xyz2ijk_shift[0]);
|
||||
const int j = round(xyz[offset+1] * xyz2ijk_scale[1] + xyz2ijk_shift[1]);
|
||||
const int k = round(xyz[offset+2] * xyz2ijk_scale[2] + xyz2ijk_shift[2]);
|
||||
if(check_xyz(i, j, k, sz_i, sz_j, sz_k)) {
|
||||
out[i_pt] = world[i*sz_j*sz_k + j*sz_k + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
torch::Tensor maskcache_lookup_cuda(
|
||||
torch::Tensor world,
|
||||
torch::Tensor xyz,
|
||||
torch::Tensor xyz2ijk_scale,
|
||||
torch::Tensor xyz2ijk_shift) {
|
||||
|
||||
const int sz_i = world.size(0);
|
||||
const int sz_j = world.size(1);
|
||||
const int sz_k = world.size(2);
|
||||
const int n_pts = xyz.size(0);
|
||||
|
||||
auto out = torch::zeros({n_pts}, torch::dtype(torch::kBool).device(torch::kCUDA));
|
||||
if(n_pts==0) {
|
||||
return out;
|
||||
}
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_pts + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(xyz.type(), "maskcache_lookup_cuda", ([&] {
|
||||
maskcache_lookup_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
world.data<bool>(),
|
||||
xyz.data<scalar_t>(),
|
||||
out.data<bool>(),
|
||||
xyz2ijk_scale.data<scalar_t>(),
|
||||
xyz2ijk_shift.data<scalar_t>(),
|
||||
sz_i, sz_j, sz_k, n_pts);
|
||||
}));
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Ray marching helper function.
|
||||
*/
|
||||
template <typename scalar_t>
|
||||
__global__ void raw2alpha_cuda_kernel(
|
||||
scalar_t* __restrict__ density,
|
||||
const float shift, const float interval, const int n_pts,
|
||||
scalar_t* __restrict__ exp_d,
|
||||
scalar_t* __restrict__ alpha) {
|
||||
|
||||
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_pt<n_pts) {
|
||||
const scalar_t e = exp(density[i_pt] + shift); // can be inf
|
||||
exp_d[i_pt] = e;
|
||||
alpha[i_pt] = 1 - pow(1 + e, -interval);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void raw2alpha_nonuni_cuda_kernel(
|
||||
scalar_t* __restrict__ density,
|
||||
const float shift, scalar_t* __restrict__ interval, const int n_pts,
|
||||
scalar_t* __restrict__ exp_d,
|
||||
scalar_t* __restrict__ alpha) {
|
||||
|
||||
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_pt<n_pts) {
|
||||
const scalar_t e = exp(density[i_pt] + shift); // can be inf
|
||||
exp_d[i_pt] = e;
|
||||
alpha[i_pt] = 1 - pow(1 + e, -interval[i_pt]);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> raw2alpha_cuda(torch::Tensor density, const float shift, const float interval) {
|
||||
|
||||
const int n_pts = density.size(0);
|
||||
auto exp_d = torch::empty_like(density);
|
||||
auto alpha = torch::empty_like(density);
|
||||
if(n_pts==0) {
|
||||
return {exp_d, alpha};
|
||||
}
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_pts + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(density.type(), "raw2alpha_cuda", ([&] {
|
||||
raw2alpha_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
density.data<scalar_t>(),
|
||||
shift, interval, n_pts,
|
||||
exp_d.data<scalar_t>(),
|
||||
alpha.data<scalar_t>());
|
||||
}));
|
||||
|
||||
return {exp_d, alpha};
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> raw2alpha_nonuni_cuda(torch::Tensor density, const float shift, torch::Tensor interval) {
|
||||
|
||||
const int n_pts = density.size(0);
|
||||
auto exp_d = torch::empty_like(density);
|
||||
auto alpha = torch::empty_like(density);
|
||||
if(n_pts==0) {
|
||||
return {exp_d, alpha};
|
||||
}
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_pts + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(density.type(), "raw2alpha_cuda", ([&] {
|
||||
raw2alpha_nonuni_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
density.data<scalar_t>(),
|
||||
shift, interval.data<scalar_t>(), n_pts,
|
||||
exp_d.data<scalar_t>(),
|
||||
alpha.data<scalar_t>());
|
||||
}));
|
||||
|
||||
return {exp_d, alpha};
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void raw2alpha_backward_cuda_kernel(
|
||||
scalar_t* __restrict__ exp_d,
|
||||
scalar_t* __restrict__ grad_back,
|
||||
const float interval, const int n_pts,
|
||||
scalar_t* __restrict__ grad) {
|
||||
|
||||
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_pt<n_pts) {
|
||||
grad[i_pt] = min(exp_d[i_pt], 1e10) * pow(1+exp_d[i_pt], -interval-1) * interval * grad_back[i_pt];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void raw2alpha_nonuni_backward_cuda_kernel(
|
||||
scalar_t* __restrict__ exp_d,
|
||||
scalar_t* __restrict__ grad_back,
|
||||
scalar_t* __restrict__ interval, const int n_pts,
|
||||
scalar_t* __restrict__ grad) {
|
||||
|
||||
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_pt<n_pts) {
|
||||
grad[i_pt] = min(exp_d[i_pt], 1e10) * pow(1+exp_d[i_pt], -interval[i_pt]-1) * interval[i_pt] * grad_back[i_pt];
|
||||
}
|
||||
}
|
||||
|
||||
torch::Tensor raw2alpha_backward_cuda(torch::Tensor exp_d, torch::Tensor grad_back, const float interval) {
|
||||
|
||||
const int n_pts = exp_d.size(0);
|
||||
auto grad = torch::empty_like(exp_d);
|
||||
if(n_pts==0) {
|
||||
return grad;
|
||||
}
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_pts + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(exp_d.type(), "raw2alpha_backward_cuda", ([&] {
|
||||
raw2alpha_backward_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
exp_d.data<scalar_t>(),
|
||||
grad_back.data<scalar_t>(),
|
||||
interval, n_pts,
|
||||
grad.data<scalar_t>());
|
||||
}));
|
||||
|
||||
return grad;
|
||||
}
|
||||
|
||||
torch::Tensor raw2alpha_nonuni_backward_cuda(torch::Tensor exp_d, torch::Tensor grad_back, torch::Tensor interval) {
|
||||
|
||||
const int n_pts = exp_d.size(0);
|
||||
auto grad = torch::empty_like(exp_d);
|
||||
if(n_pts==0) {
|
||||
return grad;
|
||||
}
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_pts + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(exp_d.type(), "raw2alpha_backward_cuda", ([&] {
|
||||
raw2alpha_nonuni_backward_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
exp_d.data<scalar_t>(),
|
||||
grad_back.data<scalar_t>(),
|
||||
interval.data<scalar_t>(), n_pts,
|
||||
grad.data<scalar_t>());
|
||||
}));
|
||||
|
||||
return grad;
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void alpha2weight_cuda_kernel(
|
||||
scalar_t* __restrict__ alpha,
|
||||
const int n_rays,
|
||||
scalar_t* __restrict__ weight,
|
||||
scalar_t* __restrict__ T,
|
||||
scalar_t* __restrict__ alphainv_last,
|
||||
int64_t* __restrict__ i_start,
|
||||
int64_t* __restrict__ i_end) {
|
||||
|
||||
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_ray<n_rays) {
|
||||
const int i_s = i_start[i_ray];
|
||||
const int i_e_max = i_end[i_ray];
|
||||
|
||||
float T_cum = 1.;
|
||||
int i;
|
||||
for(i=i_s; i<i_e_max; ++i) {
|
||||
T[i] = T_cum;
|
||||
weight[i] = T_cum * alpha[i];
|
||||
T_cum *= (1. - alpha[i]);
|
||||
if(T_cum<1e-3) {
|
||||
i+=1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
i_end[i_ray] = i;
|
||||
alphainv_last[i_ray] = T_cum;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void __set_i_for_segment_start_end(
|
||||
int64_t* __restrict__ ray_id,
|
||||
const int n_pts,
|
||||
int64_t* __restrict__ i_start,
|
||||
int64_t* __restrict__ i_end) {
|
||||
const int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(0<index && index<n_pts && ray_id[index]!=ray_id[index-1]) {
|
||||
i_start[ray_id[index]] = index;
|
||||
i_end[ray_id[index-1]] = index;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<torch::Tensor> alpha2weight_cuda(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays) {
|
||||
|
||||
const int n_pts = alpha.size(0);
|
||||
const int threads = 256;
|
||||
|
||||
auto weight = torch::zeros_like(alpha);
|
||||
auto T = torch::ones_like(alpha);
|
||||
auto alphainv_last = torch::ones({n_rays}, alpha.options());
|
||||
auto i_start = torch::zeros({n_rays}, torch::dtype(torch::kInt64).device(torch::kCUDA));
|
||||
auto i_end = torch::zeros({n_rays}, torch::dtype(torch::kInt64).device(torch::kCUDA));
|
||||
if(n_pts==0) {
|
||||
return {weight, T, alphainv_last, i_start, i_end};
|
||||
}
|
||||
|
||||
__set_i_for_segment_start_end<<<(n_pts+threads-1)/threads, threads>>>(
|
||||
ray_id.data<int64_t>(), n_pts, i_start.data<int64_t>(), i_end.data<int64_t>());
|
||||
i_end[ray_id[n_pts-1]] = n_pts;
|
||||
|
||||
const int blocks = (n_rays + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(alpha.type(), "alpha2weight_cuda", ([&] {
|
||||
alpha2weight_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
alpha.data<scalar_t>(),
|
||||
n_rays,
|
||||
weight.data<scalar_t>(),
|
||||
T.data<scalar_t>(),
|
||||
alphainv_last.data<scalar_t>(),
|
||||
i_start.data<int64_t>(),
|
||||
i_end.data<int64_t>());
|
||||
}));
|
||||
|
||||
return {weight, T, alphainv_last, i_start, i_end};
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void alpha2weight_backward_cuda_kernel(
|
||||
scalar_t* __restrict__ alpha,
|
||||
scalar_t* __restrict__ weight,
|
||||
scalar_t* __restrict__ T,
|
||||
scalar_t* __restrict__ alphainv_last,
|
||||
int64_t* __restrict__ i_start,
|
||||
int64_t* __restrict__ i_end,
|
||||
const int n_rays,
|
||||
scalar_t* __restrict__ grad_weights,
|
||||
scalar_t* __restrict__ grad_last,
|
||||
scalar_t* __restrict__ grad) {
|
||||
|
||||
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_ray<n_rays) {
|
||||
const int i_s = i_start[i_ray];
|
||||
const int i_e = i_end[i_ray];
|
||||
|
||||
float back_cum = grad_last[i_ray] * alphainv_last[i_ray];
|
||||
for(int i=i_e-1; i>=i_s; --i) {
|
||||
grad[i] = grad_weights[i] * T[i] - back_cum / (1-alpha[i] + 1e-10);
|
||||
back_cum += grad_weights[i] * weight[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
torch::Tensor alpha2weight_backward_cuda(
|
||||
torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
|
||||
torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
|
||||
torch::Tensor grad_weights, torch::Tensor grad_last) {
|
||||
|
||||
auto grad = torch::zeros_like(alpha);
|
||||
if(n_rays==0) {
|
||||
return grad;
|
||||
}
|
||||
|
||||
const int threads = 256;
|
||||
const int blocks = (n_rays + threads - 1) / threads;
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(alpha.type(), "alpha2weight_backward_cuda", ([&] {
|
||||
alpha2weight_backward_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
alpha.data<scalar_t>(),
|
||||
weight.data<scalar_t>(),
|
||||
T.data<scalar_t>(),
|
||||
alphainv_last.data<scalar_t>(),
|
||||
i_start.data<int64_t>(),
|
||||
i_end.data<int64_t>(),
|
||||
n_rays,
|
||||
grad_weights.data<scalar_t>(),
|
||||
grad_last.data<scalar_t>(),
|
||||
grad.data<scalar_t>());
|
||||
}));
|
||||
|
||||
return grad;
|
||||
}
|
||||
22
modelscope/ops/4knerf/total_variation.cpp
Executable file
22
modelscope/ops/4knerf/total_variation.cpp
Executable file
@@ -0,0 +1,22 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode);
|
||||
|
||||
|
||||
|
||||
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
|
||||
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
|
||||
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
||||
|
||||
void total_variation_add_grad(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) {
|
||||
CHECK_INPUT(param);
|
||||
CHECK_INPUT(grad);
|
||||
total_variation_add_grad_cuda(param, grad, wx, wy, wz, dense_mode);
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("total_variation_add_grad", &total_variation_add_grad, "Add total variation grad");
|
||||
}
|
||||
67
modelscope/ops/4knerf/total_variation_kernel.cu
Executable file
67
modelscope/ops/4knerf/total_variation_kernel.cu
Executable file
@@ -0,0 +1,67 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
template <typename scalar_t, typename bound_t>
|
||||
__device__ __forceinline__ scalar_t clamp(const scalar_t v, const bound_t lo, const bound_t hi) {
|
||||
return min(max(v, lo), hi);
|
||||
}
|
||||
|
||||
template <typename scalar_t, bool dense_mode>
|
||||
__global__ void total_variation_add_grad_cuda_kernel(
|
||||
const scalar_t* __restrict__ param,
|
||||
scalar_t* __restrict__ grad,
|
||||
float wx, float wy, float wz,
|
||||
const size_t sz_i, const size_t sz_j, const size_t sz_k, const size_t N) {
|
||||
|
||||
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(index<N && (dense_mode || grad[index]!=0)) {
|
||||
const size_t k = index % sz_k;
|
||||
const size_t j = index / sz_k % sz_j;
|
||||
const size_t i = index / sz_k / sz_j % sz_i;
|
||||
|
||||
float grad_to_add = 0;
|
||||
grad_to_add += (k==0 ? 0 : wx * clamp(param[index]-param[index-1], -1.f, 1.f));
|
||||
grad_to_add += (k==sz_k-1 ? 0 : wx * clamp(param[index]-param[index+1], -1.f, 1.f));
|
||||
grad_to_add += (j==0 ? 0 : wy * clamp(param[index]-param[index-sz_k], -1.f, 1.f));
|
||||
grad_to_add += (j==sz_j-1 ? 0 : wy * clamp(param[index]-param[index+sz_k], -1.f, 1.f));
|
||||
grad_to_add += (i==0 ? 0 : wz * clamp(param[index]-param[index-sz_k*sz_j], -1.f, 1.f));
|
||||
grad_to_add += (i==sz_i-1 ? 0 : wz * clamp(param[index]-param[index+sz_k*sz_j], -1.f, 1.f));
|
||||
grad[index] += grad_to_add;
|
||||
}
|
||||
}
|
||||
|
||||
void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) {
|
||||
const size_t N = param.numel();
|
||||
const size_t sz_i = param.size(2);
|
||||
const size_t sz_j = param.size(3);
|
||||
const size_t sz_k = param.size(4);
|
||||
const int threads = 256;
|
||||
const int blocks = (N + threads - 1) / threads;
|
||||
|
||||
wx /= 6;
|
||||
wy /= 6;
|
||||
wz /= 6;
|
||||
|
||||
if(dense_mode) {
|
||||
AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] {
|
||||
total_variation_add_grad_cuda_kernel<scalar_t,true><<<blocks, threads>>>(
|
||||
param.data<scalar_t>(),
|
||||
grad.data<scalar_t>(),
|
||||
wx, wy, wz,
|
||||
sz_i, sz_j, sz_k, N);
|
||||
}));
|
||||
}
|
||||
else {
|
||||
AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] {
|
||||
total_variation_add_grad_cuda_kernel<scalar_t,false><<<blocks, threads>>>(
|
||||
param.data<scalar_t>(),
|
||||
grad.data<scalar_t>(),
|
||||
wx, wy, wz,
|
||||
sz_i, sz_j, sz_k, N);
|
||||
}));
|
||||
}
|
||||
}
|
||||
20
modelscope/ops/4knerf/ub360_utils.cpp
Executable file
20
modelscope/ops/4knerf/ub360_utils.cpp
Executable file
@@ -0,0 +1,20 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres);
|
||||
|
||||
|
||||
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
|
||||
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
|
||||
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
||||
|
||||
torch::Tensor cumdist_thres(torch::Tensor dist, float thres) {
|
||||
CHECK_INPUT(dist);
|
||||
return cumdist_thres_cuda(dist, thres);
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("cumdist_thres", &cumdist_thres, "Generate mask for cumulative dist.");
|
||||
}
|
||||
47
modelscope/ops/4knerf/ub360_utils_kernel.cu
Executable file
47
modelscope/ops/4knerf/ub360_utils_kernel.cu
Executable file
@@ -0,0 +1,47 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
helper function to skip oversampled points,
|
||||
especially near the foreground scene bbox boundary
|
||||
*/
|
||||
template <typename scalar_t>
|
||||
__global__ void cumdist_thres_cuda_kernel(
|
||||
scalar_t* __restrict__ dist,
|
||||
const float thres,
|
||||
const int n_rays,
|
||||
const int n_pts,
|
||||
bool* __restrict__ mask) {
|
||||
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(i_ray<n_rays) {
|
||||
float cum_dist = 0;
|
||||
const int i_s = i_ray * n_pts;
|
||||
const int i_t = i_s + n_pts;
|
||||
int i;
|
||||
for(i=i_s; i<i_t; ++i) {
|
||||
cum_dist += dist[i];
|
||||
bool over = (cum_dist > thres);
|
||||
cum_dist *= float(!over);
|
||||
mask[i] = over;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres) {
|
||||
const int n_rays = dist.size(0);
|
||||
const int n_pts = dist.size(1);
|
||||
const int threads = 256;
|
||||
const int blocks = (n_rays + threads - 1) / threads;
|
||||
auto mask = torch::zeros({n_rays, n_pts}, torch::dtype(torch::kBool).device(torch::kCUDA));
|
||||
AT_DISPATCH_FLOATING_TYPES(dist.type(), "cumdist_thres_cuda", ([&] {
|
||||
cumdist_thres_cuda_kernel<scalar_t><<<blocks, threads>>>(
|
||||
dist.data<scalar_t>(), thres,
|
||||
n_rays, n_pts,
|
||||
mask.data<bool>());
|
||||
}));
|
||||
return mask;
|
||||
}
|
||||
@@ -103,6 +103,7 @@ if TYPE_CHECKING:
|
||||
from .mobile_image_super_resolution_pipeline import MobileImageSuperResolutionPipeline
|
||||
from .image_human_parsing_pipeline import ImageHumanParsingPipeline
|
||||
from .nerf_recon_acc_pipeline import NeRFReconAccPipeline
|
||||
from .nerf_recon_4k_pipeline import NeRFRecon4KPipeline
|
||||
from .controllable_image_generation_pipeline import ControllableImageGenerationPipeline
|
||||
from .image_bts_depth_estimation_pipeline import ImageBTSDepthEstimationPipeline
|
||||
from .pedestrian_attribute_recognition_pipeline import PedestrainAttributeRecognitionPipeline
|
||||
@@ -254,6 +255,7 @@ else:
|
||||
'bad_image_detecting_pipeline': ['BadImageDetecingPipeline'],
|
||||
'image_human_parsing_pipeline': ['ImageHumanParsingPipeline'],
|
||||
'nerf_recon_acc_pipeline': ['NeRFReconAccPipeline'],
|
||||
'nerf_recon_4k_pipeline': ['NeRFRecon4KPipeline'],
|
||||
'controllable_image_generation_pipeline': [
|
||||
'ControllableImageGenerationPipeline'
|
||||
],
|
||||
|
||||
87
modelscope/pipelines/cv/nerf_recon_4k_pipeline.py
Normal file
87
modelscope/pipelines/cv/nerf_recon_4k_pipeline.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from typing import Any, Dict
|
||||
|
||||
from modelscope.metainfo import Pipelines
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.pipelines.base import Input, Model, Pipeline
|
||||
from modelscope.pipelines.builder import PIPELINES
|
||||
from modelscope.pipelines.util import is_model, is_official_hub_path
|
||||
from modelscope.utils.constant import Invoke, Tasks
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
Tasks.nerf_recon_4k, module_name=Pipelines.nerf_recon_4k)
|
||||
class NeRFRecon4KPipeline(Pipeline):
|
||||
""" NeRF reconstruction acceleration pipeline
|
||||
Example:
|
||||
|
||||
```python
|
||||
>>> from modelscope.pipelines import pipeline
|
||||
>>> nerf_recon_acc = pipeline(Tasks.nerf_recon_acc,
|
||||
'damo/cv_nerf-3d-reconstruction-accelerate_damo')
|
||||
>>> nerf_recon_acc({
|
||||
'data_dir': '/data/lego', # data dir path (str)
|
||||
'render_dir': 'save_dir', # save dir path (str)
|
||||
})
|
||||
>>> #
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
model,
|
||||
data_type='blender',
|
||||
test_ray_chunk=8192,
|
||||
test_tile=510,
|
||||
stepsize=1.0,
|
||||
factor=4,
|
||||
load_sr=1,
|
||||
device='gpu',
|
||||
**kwargs):
|
||||
"""
|
||||
use model to create a image sky change pipeline for image editing
|
||||
Args:
|
||||
model (str or Model): model_id on modelscope hub
|
||||
data_type (str): currently only support 'blender' and 'colmap'
|
||||
use_mask (bool): segment the object or not
|
||||
ckpt_path (str): the checkpoint ckpt_path
|
||||
save_mesh (bool): render mesh or not
|
||||
n_test_traj_steps (int): number of random sampled images for test view, only for colmap data.
|
||||
test_ray_chunk (int): ray chunk size for test, avoid GPU OOM
|
||||
device (str): only support gpu
|
||||
"""
|
||||
model = Model.from_pretrained(
|
||||
model,
|
||||
device=device,
|
||||
model_prefetched=True,
|
||||
invoked_by=Invoke.PIPELINE,
|
||||
data_type=data_type,
|
||||
test_ray_chunk=test_ray_chunk,
|
||||
test_tile=test_tile,
|
||||
stepsize=stepsize,
|
||||
factor=factor,
|
||||
load_sr=load_sr) if is_model(model) else model
|
||||
|
||||
super().__init__(model=model, **kwargs)
|
||||
if not isinstance(self.model, Model):
|
||||
logger.error('model object is not initialized.')
|
||||
raise Exception('model object is not initialized.')
|
||||
self.data_type = data_type
|
||||
if self.data_type != 'blender' and self.data_type != 'llff':
|
||||
raise Exception('data type {} is not support currently'.format(
|
||||
self.data_type))
|
||||
logger.info('load model done')
|
||||
|
||||
def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return inputs
|
||||
|
||||
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
||||
data_cfg = input['data_cfg']
|
||||
render_dir = input['render_dir']
|
||||
self.model.nerf_reconstruction(data_cfg, render_dir)
|
||||
return {OutputKeys.OUTPUT: 'Done'}
|
||||
|
||||
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return inputs
|
||||
@@ -15,6 +15,7 @@ from torch.utils.data import DataLoader, Dataset, Sampler
|
||||
from torch.utils.data.dataloader import default_collate
|
||||
from torch.utils.data.distributed import DistributedSampler
|
||||
|
||||
from modelscope.hub.check_model import check_local_model_is_latest
|
||||
from modelscope.metainfo import Trainers
|
||||
from modelscope.metrics import build_metric, task_default_metrics
|
||||
from modelscope.metrics.prediction_saving_wrapper import \
|
||||
@@ -27,6 +28,7 @@ from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
|
||||
from modelscope.msdatasets.ms_dataset import MsDataset
|
||||
from modelscope.outputs import ModelOutputBase
|
||||
from modelscope.preprocessors.base import Preprocessor
|
||||
from modelscope.swift import Swift
|
||||
from modelscope.trainers.hooks.builder import HOOKS
|
||||
from modelscope.trainers.hooks.priority import Priority, get_priority
|
||||
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
|
||||
@@ -34,7 +36,7 @@ from modelscope.trainers.optimizer.builder import build_optimizer
|
||||
from modelscope.utils.config import Config, ConfigDict, JSONIteratorEncoder
|
||||
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigFields,
|
||||
ConfigKeys, DistributedParallelType,
|
||||
ModeKeys, ModelFile, ThirdParty,
|
||||
Invoke, ModeKeys, ModelFile, ThirdParty,
|
||||
TrainerStages)
|
||||
from modelscope.utils.data_utils import to_device
|
||||
from modelscope.utils.device import create_device
|
||||
@@ -45,7 +47,6 @@ from modelscope.utils.torch_utils import (compile_model, get_dist_info,
|
||||
get_local_rank, init_dist, is_dist,
|
||||
is_master, is_on_same_device,
|
||||
set_random_seed)
|
||||
from ..swift import Swift
|
||||
from .base import BaseTrainer
|
||||
from .builder import TRAINERS
|
||||
from .default_config import merge_cfg, merge_hooks, update_cfg
|
||||
@@ -152,6 +153,10 @@ class EpochBasedTrainer(BaseTrainer):
|
||||
assert cfg_file is not None, 'Config file should not be None if model is not from pretrained!'
|
||||
self.model_dir = os.path.dirname(cfg_file)
|
||||
self.input_model_id = None
|
||||
if hasattr(model, 'model_dir'):
|
||||
check_local_model_is_latest(
|
||||
model.model_dir,
|
||||
user_agent={Invoke.KEY: Invoke.LOCAL_TRAINER})
|
||||
|
||||
super().__init__(cfg_file, arg_parse_fn)
|
||||
self.cfg_modify_fn = cfg_modify_fn
|
||||
|
||||
@@ -154,6 +154,7 @@ class CVTasks(object):
|
||||
motion_generation = 'motion-generation'
|
||||
# 3d reconstruction
|
||||
nerf_recon_acc = 'nerf-recon-acc'
|
||||
nerf_recon_4k = 'nerf-recon-4k'
|
||||
nerf_recon_vq_compression = 'nerf-recon-vq-compression'
|
||||
|
||||
# vision efficient tuning
|
||||
|
||||
@@ -95,8 +95,10 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
|
||||
else:
|
||||
model_dir = pretrained_model_name_or_path
|
||||
|
||||
return module_class.from_pretrained(model_dir, *model_args,
|
||||
**kwargs)
|
||||
model = module_class.from_pretrained(model_dir, *model_args,
|
||||
**kwargs)
|
||||
model.model_dir = model_dir
|
||||
return model
|
||||
|
||||
return ClassWrapper
|
||||
|
||||
|
||||
@@ -62,6 +62,7 @@ tensorflow-estimator>=1.15.1
|
||||
tf_slim
|
||||
thop
|
||||
timm>=0.4.9
|
||||
torch-scatter
|
||||
torchmetrics>=0.6.2
|
||||
torchsummary>=1.5.1
|
||||
torchvision
|
||||
|
||||
67
tests/pipelines/test_nerf_recon_4k.py
Normal file
67
tests/pipelines/test_nerf_recon_4k.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from modelscope.hub.snapshot_download import snapshot_download
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import DownloadMode, Tasks
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class NeRFRecon4KTest(unittest.TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.model_id = 'DAMOXR/cv_nerf-3d-reconstruction-4k-nerf_damo'
|
||||
data_dir = MsDataset.load(
|
||||
'DAMOXR/nerf_llff_data',
|
||||
subset_name='default',
|
||||
split='test',
|
||||
download_mode=DownloadMode.FORCE_REDOWNLOAD
|
||||
).config_kwargs['split_config']['test']
|
||||
nerf_llff = os.path.join(data_dir, 'nerf_llff_data')
|
||||
scene = 'fern'
|
||||
data_dir = os.path.join(nerf_llff, scene)
|
||||
self.render_dir = 'exp'
|
||||
self.data_dic = dict(
|
||||
datadir=data_dir,
|
||||
dataset_type='llff',
|
||||
load_sr=1,
|
||||
factor=4,
|
||||
ndc=True,
|
||||
white_bkgd=False)
|
||||
|
||||
# @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
# @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
|
||||
|
||||
# def test_run_by_direct_model_download(self):
|
||||
# snapshot_path = snapshot_download(self.model_id)
|
||||
# print('snapshot_path: {}'.format(snapshot_path))
|
||||
|
||||
# nerf_recon_4k = pipeline(
|
||||
# Tasks.nerf_recon_4k,
|
||||
# model=snapshot_path,
|
||||
# data_type='llff',
|
||||
# )
|
||||
|
||||
# nerf_recon_4k(
|
||||
# dict(data_cfg=self.data_dic, render_dir=self.render_dir))
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
|
||||
def test_run_modelhub(self):
|
||||
nerf_recon_4k = pipeline(
|
||||
Tasks.nerf_recon_4k,
|
||||
model=self.model_id,
|
||||
data_type='llff',
|
||||
)
|
||||
|
||||
nerf_recon_4k(dict(data_cfg=self.data_dic, render_dir=self.render_dir))
|
||||
print('4k-nerf_damo.test_run_modelhub done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user