Merge branch 'master-github' into master-merge-github-230728

This commit is contained in:
suluyan.sly
2023-07-28 16:40:34 +08:00
34 changed files with 35061 additions and 23 deletions

View File

@@ -1,5 +1,5 @@
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/dreambooth/finetune_stable_diffusion_dreambooth.py \
--model 'AI-ModelScope/stable-diffusion-v1-5' \
--model 'AI-ModelScope/stable-diffusion-v2-1' \
--model_revision 'v1.0.8' \
--work_dir './tmp/dreambooth_diffusion' \
--train_dataset_name 'buptwq/lora-stable-diffusion-finetune' \

View File

@@ -1,5 +1,5 @@
PYTHONPATH=. torchrun examples/pytorch/stable_diffusion/lora/finetune_stable_diffusion_lora.py \
--model 'AI-ModelScope/stable-diffusion-v1-5' \
--model 'AI-ModelScope/stable-diffusion-v2-1' \
--model_revision 'v1.0.9' \
--prompt "a dog" \
--work_dir './tmp/lora_diffusion' \

File diff suppressed because it is too large Load Diff

View File

@@ -585,8 +585,6 @@ class HubApi:
datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={revision}'
cookies = ModelScopeConfig.get_cookies()
r = self.session.get(datahub_url, cookies=cookies, headers=self.headers)
r = self.session.get(
datahub_url, cookies=cookies, headers=self.headers)
resp = r.json()
datahub_raise_on_error(datahub_url, resp)
file_list = resp['Data']

View File

@@ -21,6 +21,7 @@ def check_local_model_is_latest(
"""Check local model repo is latest.
Check local model repo is same as hub latest version.
"""
try:
model_cache = None
# download with git
if os.path.exists(os.path.join(model_root_path, '.git')):
@@ -34,7 +35,6 @@ def check_local_model_is_latest(
model_cache = ModelFileSystemCache(model_root_path)
model_id = model_cache.get_model_id()
try:
# make headers
headers = {
'user-agent':
@@ -75,7 +75,8 @@ def check_local_model_is_latest(
continue
else:
logger.info(
'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.'
f'Model file {model_file["Name"]} is different from the latest version `{latest_revision}`,'
f'This is because you are using an older version or the file is updated manually.'
)
break
else:
@@ -86,7 +87,8 @@ def check_local_model_is_latest(
continue
else:
logger.info(
'Model is updated from modelscope hub, you can verify from https://www.modelscope.cn.'
f'Model file {model_file["Name"]} is different from the latest version `{latest_revision}`,'
f'This is because you are using an older version or the file is updated manually.'
)
break
except: # noqa: E722

View File

@@ -112,6 +112,7 @@ class Models(object):
image_quality_assessment_degradation = 'image-quality-assessment-degradation'
m2fp = 'm2fp'
nerf_recon_acc = 'nerf-recon-acc'
nerf_recon_4k = 'nerf-recon-4k'
nerf_recon_vq_compression = 'nerf-recon-vq-compression'
bts_depth_estimation = 'bts-depth-estimation'
vision_efficient_tuning = 'vision-efficient-tuning'
@@ -411,6 +412,7 @@ class Pipelines(object):
image_human_parsing = 'm2fp-image-human-parsing'
object_detection_3d_depe = 'object-detection-3d-depe'
nerf_recon_acc = 'nerf-recon-acc'
nerf_recon_4k = 'nerf-recon-4k'
nerf_recon_vq_compression = 'nerf-recon-vq-compression'
bad_image_detecting = 'bad-image-detecting'
controllable_image_generation = 'controllable-image-generation'
@@ -858,6 +860,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
'damo/cv_mobilenet-v2_bad-image-detecting'),
Tasks.nerf_recon_acc: (Pipelines.nerf_recon_acc,
'damo/cv_nerf-3d-reconstruction-accelerate_damo'),
Tasks.nerf_recon_4k: (Pipelines.nerf_recon_4k,
'damo/cv_nerf-3d-reconstruction-4k-nerf_damo'),
Tasks.nerf_recon_vq_compression: (
Pipelines.nerf_recon_vq_compression,
'damo/cv_nerf-3d-reconstruction-vq-compression_damo'),
@@ -890,6 +894,7 @@ class CVTrainers(object):
ocr_recognition = 'ocr-recognition'
ocr_detection_db = 'ocr-detection-db'
nerf_recon_acc = 'nerf-recon-acc'
nerf_recon_4k = 'nerf-recon-4k'
action_detection = 'action-detection'
vision_efficient_tuning = 'vision-efficient-tuning'
@@ -1006,6 +1011,7 @@ class Preprocessors(object):
ocr_detection = 'ocr-detection'
bad_image_detecting_preprocessor = 'bad-image-detecting-preprocessor'
nerf_recon_acc_preprocessor = 'nerf-recon-acc-preprocessor'
nerf_recon_4k_preprocessor = 'nerf-recon-4k-preprocessor'
nerf_recon_vq_compression_preprocessor = 'nerf-recon-vq-compression-preprocessor'
controllable_image_generation_preprocessor = 'controllable-image-generation-preprocessor'
image_classification_preprocessor = 'image-classification-preprocessor'

View File

@@ -0,0 +1,22 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule
if TYPE_CHECKING:
from .nerf_recon_4k import NeRFRecon4K
from .nerf_preprocess import NeRFReconPreprocessor
else:
_import_structure = {'nerf_recon_4k': ['NeRFRecon4K']}
_import_structure = {'nerf_preprocess': ['NeRFReconPreprocessor']}
import sys
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

View File

@@ -0,0 +1,97 @@
import os
import cv2
import imageio
import json
import numpy as np
import torch
import torch.nn.functional as F
def trans_t(t):
return torch.Tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, t],
[0, 0, 0, 1]]).float()
def rot_phi(phi):
return torch.Tensor([[1, 0, 0, 0], [0, np.cos(phi), -np.sin(phi), 0],
[0, np.sin(phi), np.cos(phi), 0], [0, 0, 0,
1]]).float()
def rot_theta(th):
return torch.Tensor([[np.cos(th), 0, -np.sin(th), 0], [0, 1, 0, 0],
[np.sin(th), 0, np.cos(th), 0], [0, 0, 0,
1]]).float()
def pose_spherical(theta, phi, radius):
c2w = trans_t(radius)
c2w = rot_phi(phi / 180. * np.pi) @ c2w
c2w = rot_theta(theta / 180. * np.pi) @ c2w
c2w = torch.Tensor(
np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]
])) @ c2w
return c2w
def load_blender_data(basedir, half_res=False, testskip=1):
splits = ['train', 'val', 'test']
metas = {}
for s in splits:
with open(os.path.join(basedir, 'transforms_{}.json'.format(s)),
'r') as fp:
metas[s] = json.load(fp)
all_imgs = []
all_poses = []
counts = [0]
for s in splits:
meta = metas[s]
imgs = []
poses = []
if s == 'train' or testskip == 0:
skip = 1
elif s == 'val':
skip = 50
else:
skip = testskip
for frame in meta['frames'][::skip]:
fname = os.path.join(basedir, frame['file_path'] + '.png')
imgs.append(imageio.imread(fname))
poses.append(np.array(frame['transform_matrix']))
imgs = (np.array(imgs) / 255.).astype(
np.float32) # keep all 4 channels (RGBA)
poses = np.array(poses).astype(np.float32)
counts.append(counts[-1] + imgs.shape[0])
all_imgs.append(imgs)
all_poses.append(poses)
i_split = [np.arange(counts[i], counts[i + 1]) for i in range(3)]
imgs = np.concatenate(all_imgs, 0)
poses = np.concatenate(all_poses, 0)
H, W = imgs[0].shape[:2]
camera_angle_x = float(meta['camera_angle_x'])
focal = .5 * W / np.tan(.5 * camera_angle_x)
render_poses = torch.stack([
pose_spherical(angle, -30.0, 4.0)
for angle in np.linspace(-180, 180, 160 + 1)[:-1]
], 0)
if half_res:
H = H // 2
W = W // 2
focal = focal / 2.
imgs_half_res = np.zeros((imgs.shape[0], H, W, 4))
for i, img in enumerate(imgs):
imgs_half_res[i] = cv2.resize(
img, (W, H), interpolation=cv2.INTER_AREA)
imgs = imgs_half_res
# imgs = tf.image.resize_area(imgs, [400, 400]).numpy()
return imgs, poses, render_poses, [H, W, focal], i_split

View File

@@ -0,0 +1,143 @@
import numpy as np
from .load_blender import load_blender_data
from .load_llff import load_llff_data
from .load_tankstemple import load_tankstemple_data
def load_data(args):
K, depths = None, None
near_clip = None
if args.dataset_type == 'llff':
images, depths, poses, bds, render_poses, i_test, *srgt = load_llff_data(
args.datadir,
args.factor,
None,
None,
recenter=True,
bd_factor=0.75,
spherify=False,
load_depths=False,
load_SR=args.load_sr,
movie_render_kwargs=dict())
hwf = poses[0, :3, -1]
poses = poses[:, :3, :4]
print('Loaded llff', images.shape, render_poses.shape, hwf,
args.datadir)
if not isinstance(i_test, list):
i_test = [i_test]
llffhold = 8
if llffhold > 0:
print('Auto LLFF holdout,', llffhold)
i_test = np.arange(images.shape[0])[::llffhold]
i_val = [i_test[0]]
i_train = np.array([
i for i in np.arange(int(images.shape[0]))
if (i not in i_test and i not in i_val)
])
print('DEFINING BOUNDS')
if args.ndc:
near = 0.
far = 1.
else:
near_clip = max(np.ndarray.min(bds) * .9, 0)
_far = max(np.ndarray.max(bds) * 1., 0)
near = 0
far = inward_nearfar_heuristic(poses[i_train, :3, 3])[1]
print('near_clip', near_clip)
print('original far', _far)
print('NEAR FAR', near, far)
elif args.dataset_type == 'blender':
images, poses, render_poses, hwf, i_split = load_blender_data(
args.datadir, args.half_res, args.testskip)
print('Loaded blender', images.shape, render_poses.shape, hwf,
args.datadir)
i_train, i_val, i_test = i_split
near, far = 2., 6.
if images.shape[-1] == 4:
if args.white_bkgd:
images = images[..., :3] * images[..., -1:] + (
1. - images[..., -1:])
else:
images = images[..., :3] * images[..., -1:]
srgt = [images, 0]
elif args.dataset_type == 'tankstemple':
images, poses, render_poses, hwf, K, i_split = load_tankstemple_data(
args.datadir, movie_render_kwargs=args.movie_render_kwargs)
print('Loaded tankstemple', images.shape, render_poses.shape, hwf,
args.datadir)
i_train, i_val, i_test = i_split
near, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0)
if images.shape[-1] == 4:
if args.white_bkgd:
images = images[..., :3] * images[..., -1:] + (
1. - images[..., -1:])
else:
images = images[..., :3] * images[..., -1:]
else:
raise NotImplementedError(
f'Unknown dataset type {args.dataset_type} exiting')
# Cast intrinsics to right types
H, W, focal = hwf
H, W = int(H), int(W)
hwf = [H, W, focal]
HW = np.array([im.shape[:2] for im in images])
irregular_shape = (images.dtype is np.dtype('object'))
if K is None:
K = np.array([[focal, 0, 0.5 * W], [0, focal, 0.5 * H], [0, 0, 1]])
if len(K.shape) == 2:
Ks = K[None].repeat(len(poses), axis=0)
else:
Ks = K
render_poses = render_poses[..., :4]
if args.load_sr:
srgt, w2c = srgt[0], srgt[1]
else:
srgt, w2c = 0, 0
data_dict = dict(
hwf=hwf,
HW=HW,
Ks=Ks,
near=near,
far=far,
near_clip=near_clip,
i_train=i_train,
i_val=i_val,
i_test=i_test,
poses=poses,
render_poses=render_poses,
images=images,
depths=depths,
white_bkgd=args.white_bkgd,
irregular_shape=irregular_shape,
srgt=srgt,
w2c=w2c)
return data_dict
def inward_nearfar_heuristic(cam_o, ratio=0.05):
dist = np.linalg.norm(cam_o[:, None] - cam_o, axis=-1)
far = dist.max() # could be too small to exist the scene bbox
# it is only used to determined scene bbox
# lib/dvgo use 1e9 as far
near = far * ratio
return near, far

View File

@@ -0,0 +1,548 @@
import os
import imageio
import numpy as np
import scipy
import torch
# Slightly modified version of LLFF data loading code
# see https://github.com/Fyusion/LLFF for original
def imread(f):
if f.endswith('png'):
return imageio.imread(f, format='PNG-PIL', ignoregamma=True)
else:
return imageio.imread(f)
def depthread(path):
with open(path, 'rb') as fid:
width, height, channels = np.genfromtxt(
fid, delimiter='&', max_rows=1, usecols=(0, 1, 2), dtype=int)
fid.seek(0)
num_delimiter = 0
byte = fid.read(1)
while True:
if byte == b'&':
num_delimiter += 1
if num_delimiter >= 3:
break
byte = fid.read(1)
array = np.fromfile(fid, np.float32)
array = array.reshape((width, height, channels), order='F')
return np.transpose(array, (1, 0, 2)).squeeze()
def _minify(basedir, factors=[], resolutions=[]):
needtoload = False
for r in factors:
imgdir = os.path.join(basedir, 'images_{}'.format(r))
if not os.path.exists(imgdir):
needtoload = True
for r in resolutions:
imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0]))
if not os.path.exists(imgdir):
needtoload = True
if not needtoload:
return
from shutil import copy
from subprocess import check_output
imgdir = os.path.join(basedir, 'images')
imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))]
imgs = [
f for f in imgs
if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])
]
imgdir_orig = imgdir
wd = os.getcwd()
for r in factors + resolutions:
if isinstance(r, int):
name = 'images_{}'.format(r)
resizearg = '{}%'.format(100. / r)
else:
name = 'images_{}x{}'.format(r[1], r[0])
resizearg = '{}x{}'.format(r[1], r[0])
imgdir = os.path.join(basedir, name)
if os.path.exists(imgdir):
continue
print('Minifying', r, basedir)
os.makedirs(imgdir)
check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True)
ext = imgs[0].split('.')[-1]
args = ' '.join([
'mogrify', '-resize', resizearg, '-format', 'png',
'*.{}'.format(ext)
])
print(args)
os.chdir(imgdir)
check_output(args, shell=True)
os.chdir(wd)
if ext != 'png':
check_output('rm {}/*.{}'.format(imgdir, ext), shell=True)
print('Removed duplicates')
print('Done')
def _load_data(basedir,
factor=None,
width=None,
height=None,
load_imgs=True,
load_depths=False,
load_SR=False):
poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
if poses_arr.shape[1] == 17:
poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1, 2, 0])
elif poses_arr.shape[1] == 14:
poses = poses_arr[:, :-2].reshape([-1, 3, 4]).transpose([1, 2, 0])
else:
raise NotImplementedError
bds = poses_arr[:, -2:].transpose([1, 0])
img0 = [
os.path.join(basedir, 'images', f)
for f in sorted(os.listdir(os.path.join(basedir, 'images')))
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')
][0]
sh = imageio.imread(img0).shape
sfx = ''
if height is not None and width is not None:
_minify(basedir, resolutions=[[height, width]])
sfx = '_{}x{}'.format(width, height)
elif factor is not None and factor != 1:
sfx = '_{}'.format(factor)
_minify(basedir, factors=[factor])
factor = factor
elif height is not None:
factor = sh[0] / float(height)
width = int(sh[1] / factor)
_minify(basedir, resolutions=[[height, width]])
sfx = '_{}x{}'.format(width, height)
elif width is not None:
factor = sh[1] / float(width)
height = int(sh[0] / factor)
_minify(basedir, resolutions=[[height, width]])
sfx = '_{}x{}'.format(width, height)
else:
factor = 1
imgdir = os.path.join(basedir, 'images' + sfx)
print(f'Loading images from {imgdir}')
if not os.path.exists(imgdir):
print(imgdir, 'does not exist, returning')
return
imgfiles = [
os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')
]
if poses.shape[-1] != len(imgfiles):
print()
print('Mismatch between imgs {} and poses {} !!!!'.format(
len(imgfiles), poses.shape[-1]))
names = set(
name[:-4]
for name in np.load(os.path.join(basedir, 'poses_names.npy')))
assert len(names) == poses.shape[-1]
print('Below failed files are skip due to SfM failure:')
new_imgfiles = []
for i in imgfiles:
fname = os.path.split(i)[1][:-4]
if fname in names:
new_imgfiles.append(i)
else:
print('==>', i)
imgfiles = new_imgfiles
if len(imgfiles) < 3:
print('Too few images...')
import sys
sys.exit()
sh = imageio.imread(imgfiles[0]).shape
if poses.shape[1] == 4:
poses = np.concatenate([poses, np.zeros_like(poses[:, [0]])], 1)
poses[2, 4, :] = np.load(os.path.join(basedir, 'hwf_cxcy.npy'))[2]
poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
poses[2, 4, :] = poses[2, 4, :] * 1. / factor
if not load_imgs:
return poses, bds
imgs = [imread(f)[..., :3] / 255. for f in imgfiles]
imgs = np.stack(imgs, -1)
if load_SR:
if load_SR == 16:
imgdir_sr = os.path.join(basedir, 'images_16')
elif load_SR == 8:
imgdir_sr = os.path.join(basedir, 'images_8')
elif load_SR == 4:
imgdir_sr = os.path.join(basedir, 'images_4')
elif load_SR == 2:
imgdir_sr = os.path.join(basedir, 'images_2')
elif load_SR == 1:
imgdir_sr = os.path.join(basedir, 'images')
imgfiles_sr = [
os.path.join(imgdir_sr, f) for f in sorted(os.listdir(imgdir_sr))
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')
]
imgs_sr = [imread(f)[..., :3] / 255. for f in imgfiles_sr]
imgs_sr = np.stack(imgs_sr, -1)
print('Loaded image data', imgs.shape, poses[:, -1, 0])
if not load_depths and load_SR:
return poses, bds, imgs, imgs_sr
if not load_depths:
return poses, bds, imgs
depthdir = os.path.join(basedir, 'stereo', 'depth_maps')
assert os.path.exists(depthdir), f'Dir not found: {depthdir}'
depthfiles = [
os.path.join(depthdir, f) for f in sorted(os.listdir(depthdir))
if f.endswith('.geometric.bin')
]
assert poses.shape[-1] == len(
depthfiles), 'Mismatch between imgs {} and poses {} !!!!'.format(
len(depthfiles), poses.shape[-1])
depths = [depthread(f) for f in depthfiles]
depths = np.stack(depths, -1)
print('Loaded depth data', depths.shape)
return poses, bds, imgs, depths
def normalize(x):
return x / np.linalg.norm(x)
def viewmatrix(z, up, pos):
vec2 = normalize(z)
vec1_avg = up
vec0 = normalize(np.cross(vec1_avg, vec2))
vec1 = normalize(np.cross(vec2, vec0))
m = np.stack([vec0, vec1, vec2, pos], 1)
return m
def ptstocam(pts, c2w):
tt = np.matmul(c2w[:3, :3].T, (pts - c2w[:3, 3])[..., np.newaxis])[..., 0]
return tt
def poses_avg(poses):
hwf = poses[0, :3, -1:]
center = poses[:, :3, 3].mean(0)
vec2 = normalize(poses[:, :3, 2].sum(0))
up = poses[:, :3, 1].sum(0)
c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
return c2w
def w2c_gen(poses):
final_pose = []
for idx in range(len(poses)):
pose = poses[idx, ...]
z = normalize(pose[:3, 2])
up = pose[:3, 1]
vec2 = normalize(z)
vec0 = normalize(np.cross(up, vec2))
vec1 = normalize(np.cross(vec2, vec0))
m = np.stack([vec0, vec1, vec2], 1)
mt = np.linalg.inv(m)
final_pose.append(mt)
final_pose = np.stack(final_pose, 0)
return final_pose
def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
render_poses = []
rads = np.array(list(rads) + [1.])
hwf = c2w[:, 4:5]
# -np.sin(theta), -np.sin(theta*zrate)*zdelta
# 0, 0
for theta in np.linspace(0., 2 * np.pi * rots, N + 1)[:-1]:
c = np.dot(
c2w[:3, :4],
np.array([
np.cos(theta), -np.sin(theta), -np.sin(theta * zrate) * zdelta,
1.
]) * rads)
z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.])))
render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
return render_poses
def recenter_poses(poses):
poses_ = poses + 0
bottom = np.reshape([0, 0, 0, 1.], [1, 4])
c2w = poses_avg(poses)
c2w = np.concatenate([c2w[:3, :4], bottom], -2)
bottom = np.tile(np.reshape(bottom, [1, 1, 4]), [poses.shape[0], 1, 1])
poses = np.concatenate([poses[:, :3, :4], bottom], -2)
poses = np.linalg.inv(c2w) @ poses
poses_[:, :3, :4] = poses[:, :3, :4]
poses = poses_
return poses
def rerotate_poses(poses):
poses = np.copy(poses)
centroid = poses[:, :3, 3].mean(0)
poses[:, :3, 3] = poses[:, :3, 3] - centroid
# Find the minimum pca vector with minimum eigen value
x = poses[:, :, 3]
mu = x.mean(0)
cov = np.cov((x - mu).T)
ev, eig = np.linalg.eig(cov)
cams_up = eig[:, np.argmin(ev)]
if cams_up[1] < 0:
cams_up = -cams_up
# Find rotation matrix that align cams_up with [0,1,0]
R = scipy.spatial.transform.Rotation.align_vectors(
[[0, 1, 0]], cams_up[None])[0].as_matrix()
# Apply rotation and add back the centroid position
poses[:, :3, :3] = R @ poses[:, :3, :3]
poses[:, :3, [3]] = R @ poses[:, :3, [3]]
poses[:, :3, 3] = poses[:, :3, 3] + centroid
return poses
#####################
def spherify_poses(poses, bds, depths):
def p34_to_44(p):
return np.concatenate([
p,
np.tile(
np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])
], 1)
rays_d = poses[:, :3, 2:3]
rays_o = poses[:, :3, 3:4]
def min_line_dist(rays_o, rays_d):
A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0, 2, 1])
b_i = -A_i @ rays_o
pt_mindist = np.squeeze(-np.linalg.inv(
(np.transpose(A_i, [0, 2, 1]) @ A_i).mean(0)) @ (b_i).mean(0))
return pt_mindist
pt_mindist = min_line_dist(rays_o, rays_d)
center = pt_mindist
up = (poses[:, :3, 3] - center).mean(0)
vec0 = normalize(up)
vec1 = normalize(np.cross([.1, .2, .3], vec0))
vec2 = normalize(np.cross(vec0, vec1))
pos = center
c2w = np.stack([vec1, vec2, vec0, pos], 1)
poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(
poses[:, :3, :4])
radius = np.sqrt(np.mean(np.sum(np.square(poses_reset[:, :3, 3]), -1)))
sc = 1. / radius
poses_reset[:, :3, 3] *= sc
bds *= sc
radius *= sc
depths *= sc
poses_reset = np.concatenate([
poses_reset[:, :3, :4],
np.broadcast_to(poses[0, :3, -1:], poses_reset[:, :3, -1:].shape)
], -1)
return poses_reset, radius, bds, depths
def load_llff_data(basedir,
factor=8,
width=None,
height=None,
recenter=True,
rerotate=True,
bd_factor=.75,
spherify=False,
path_zflat=False,
load_depths=False,
load_SR=False,
movie_render_kwargs={}):
poses, bds, imgs, *depths = _load_data(
basedir,
factor=factor,
width=width,
height=height,
load_depths=load_depths,
load_SR=load_SR) # factor=8 downsamples original imgs by 8x
print('Loaded', basedir, bds.min(), bds.max())
if load_depths:
depths = depths[0]
elif load_SR and not load_depths:
imgs_SRGT = depths[0]
depths = 0
else:
depths = 0
# Correct rotation matrix ordering and move variable dim to axis 0
poses = np.concatenate(
[poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
poses = np.moveaxis(poses, -1, 0).astype(np.float32)
imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
images = imgs
bds = np.moveaxis(bds, -1, 0).astype(np.float32)
# Rescale if bd_factor is provided
if bds.min() < 0 and bd_factor is not None:
print('Found negative z values from SfM sparse points!?')
print('Please try bd_factor=None')
import sys
sys.exit()
sc = 1. if bd_factor is None else 1. / (bds.min() * bd_factor)
poses[:, :3, 3] *= sc
bds *= sc
depths *= sc
if recenter:
poses = recenter_poses(poses)
if spherify:
poses, radius, bds, depths = spherify_poses(poses, bds, depths)
if rerotate:
poses = rerotate_poses(poses)
# generate spiral poses for rendering fly-through movie
centroid = poses[:, :3, 3].mean(0)
radcircle = movie_render_kwargs.get('scale_r', 1) * np.linalg.norm(
poses[:, :3, 3] - centroid, axis=-1).mean()
centroid[0] += movie_render_kwargs.get('shift_x', 0)
centroid[1] += movie_render_kwargs.get('shift_y', 0)
centroid[2] += movie_render_kwargs.get('shift_z', 0)
new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180
target_y = radcircle * np.tan(new_up_rad)
render_poses = []
for th in np.linspace(0., 2. * np.pi, 200):
camorigin = np.array(
[radcircle * np.cos(th), 0, radcircle * np.sin(th)])
if movie_render_kwargs.get('flip_up', False):
up = np.array([0, 1., 0])
else:
up = np.array([0, -1., 0])
vec2 = normalize(camorigin)
vec0 = normalize(np.cross(vec2, up))
vec1 = normalize(np.cross(vec2, vec0))
pos = camorigin + centroid
# rotate to align with new pitch rotation
lookat = -vec2
lookat[1] = target_y
lookat = normalize(lookat)
vec2 = -lookat
vec1 = normalize(np.cross(vec2, vec0))
p = np.stack([vec0, vec1, vec2, pos], 1)
render_poses.append(p)
render_poses = np.stack(render_poses, 0)
render_poses = np.concatenate([
render_poses,
np.broadcast_to(poses[0, :3, -1:], render_poses[:, :3, -1:].shape)
], -1)
else:
c2w = poses_avg(poses)
print('recentered', c2w.shape)
print(c2w[:3, :4])
# Get spiral
# Get average pose
up = normalize(poses[:, :3, 1].sum(0))
# Find a reasonable "focus depth" for this dataset
close_depth, inf_depth = bds.min() * .9, bds.max() * 5.
dt = .75
mean_dz = 1. / (((1. - dt) / close_depth + dt / inf_depth))
focal = mean_dz * movie_render_kwargs.get('scale_f', 1)
# Get radii for spiral path
zdelta = movie_render_kwargs.get('zdelta', 0.5)
zrate = movie_render_kwargs.get('zrate', 1.0)
tt = poses[:, :3, 3] # ptstocam(poses[:3,3,:].T, c2w).T
rads = np.percentile(np.abs(tt), 90, 0) * movie_render_kwargs.get(
'scale_r', 1)
c2w_path = c2w
N_views = 120
N_rots = movie_render_kwargs.get('N_rots', 1)
if path_zflat:
# zloc = np.percentile(tt, 10, 0)[2]
zloc = -close_depth * .1
c2w_path[:3, 3] = c2w_path[:3, 3] + zloc * c2w_path[:3, 2]
rads[2] = 0.
N_rots = 1
N_views /= 2
# Generate poses for spiral path
render_poses = render_path_spiral(
c2w_path,
up,
rads,
focal,
zdelta,
zrate=zrate,
rots=N_rots,
N=N_views)
render_poses = torch.Tensor(render_poses)
# Because both world croodnate system and camera croodnate system are 3-d system, they can be transfer by a:
# 3x3 rotate matrix and 3x1 moving matrix
c2w = poses_avg(poses)
w2c = w2c_gen(poses)
print('Data:')
print(poses.shape, images.shape, bds.shape)
dists = np.sum(np.square(c2w[:3, 3] - poses[:, :3, 3]), -1)
i_test = np.argmin(dists)
print('HOLDOUT view is', i_test)
images = images.astype(np.float32)
poses = poses.astype(np.float32)
if load_SR:
imgs_SRGT = np.moveaxis(imgs_SRGT, [-1, -2], [0, 1]).astype(np.float32)
else:
imgs_SRGT = None
return images, depths, poses, bds, render_poses, i_test, imgs_SRGT, w2c

View File

@@ -0,0 +1,75 @@
import glob
import os
import imageio
import numpy as np
def normalize(x):
return x / np.linalg.norm(x)
def load_tankstemple_data(basedir, movie_render_kwargs={}):
pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt')))
rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png')))
all_poses = []
all_imgs = []
i_split = [[], []]
for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)):
i_set = int(os.path.split(rgb_path)[-1][0])
all_poses.append(np.loadtxt(pose_path).astype(np.float32))
all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32))
i_split[i_set].append(i)
imgs = np.stack(all_imgs, 0)
poses = np.stack(all_poses, 0)
i_split.append(i_split[-1])
path_intrinsics = os.path.join(basedir, 'intrinsics.txt')
H, W = imgs[0].shape[:2]
K = np.loadtxt(path_intrinsics)
focal = float(K[0, 0])
# generate spiral poses for rendering fly-through movie
centroid = poses[:, :3, 3].mean(0)
radcircle = movie_render_kwargs.get('scale_r', 1.0) * np.linalg.norm(
poses[:, :3, 3] - centroid, axis=-1).mean()
centroid[0] += movie_render_kwargs.get('shift_x', 0)
centroid[1] += movie_render_kwargs.get('shift_y', 0)
centroid[2] += movie_render_kwargs.get('shift_z', 0)
new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180
target_y = radcircle * np.tan(new_up_rad)
render_poses = []
for th in np.linspace(0., 2. * np.pi, 200):
camorigin = np.array(
[radcircle * np.cos(th), 0, radcircle * np.sin(th)])
if movie_render_kwargs.get('flip_up_vec', False):
up = np.array([0, -1., 0])
else:
up = np.array([0, 1., 0])
vec2 = normalize(camorigin)
vec0 = normalize(np.cross(vec2, up))
vec1 = normalize(np.cross(vec2, vec0))
pos = camorigin + centroid
# rotate to align with new pitch rotation
lookat = -vec2
lookat[1] = target_y
lookat = normalize(lookat)
lookat *= -1
vec2 = -lookat
vec1 = normalize(np.cross(vec2, vec0))
p = np.stack([vec0, vec1, vec2, pos], 1)
render_poses.append(p)
render_poses = np.stack(render_poses, 0)
render_poses = np.concatenate([
render_poses,
np.broadcast_to(poses[0, :3, -1:], render_poses[:, :3, -1:].shape)
], -1)
return imgs, poses, render_poses, [H, W, focal], K, i_split

View File

@@ -0,0 +1,500 @@
# Copyright (c) 2023, ETH Zurich and UNC Chapel Hill.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
# its contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
import argparse
import collections
import os
import struct
import numpy as np
CameraModel = collections.namedtuple('CameraModel',
['model_id', 'model_name', 'num_params'])
Camera = collections.namedtuple('Camera',
['id', 'model', 'width', 'height', 'params'])
BaseImage = collections.namedtuple(
'Image', ['id', 'qvec', 'tvec', 'camera_id', 'name', 'xys', 'point3D_ids'])
Point3D = collections.namedtuple(
'Point3D', ['id', 'xyz', 'rgb', 'error', 'image_ids', 'point2D_idxs'])
class Image(BaseImage):
def qvec2rotmat(self):
return qvec2rotmat(self.qvec)
CAMERA_MODELS = {
CameraModel(model_id=0, model_name='SIMPLE_PINHOLE', num_params=3),
CameraModel(model_id=1, model_name='PINHOLE', num_params=4),
CameraModel(model_id=2, model_name='SIMPLE_RADIAL', num_params=4),
CameraModel(model_id=3, model_name='RADIAL', num_params=5),
CameraModel(model_id=4, model_name='OPENCV', num_params=8),
CameraModel(model_id=5, model_name='OPENCV_FISHEYE', num_params=8),
CameraModel(model_id=6, model_name='FULL_OPENCV', num_params=12),
CameraModel(model_id=7, model_name='FOV', num_params=5),
CameraModel(model_id=8, model_name='SIMPLE_RADIAL_FISHEYE', num_params=4),
CameraModel(model_id=9, model_name='RADIAL_FISHEYE', num_params=5),
CameraModel(model_id=10, model_name='THIN_PRISM_FISHEYE', num_params=12)
}
CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
for camera_model in CAMERA_MODELS])
CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
for camera_model in CAMERA_MODELS])
def read_next_bytes(fid,
num_bytes,
format_char_sequence,
endian_character='<'):
"""Read and unpack the next bytes from a binary file.
:param fid:
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
:param endian_character: Any of {@, =, <, >, !}
:return: Tuple of read and unpacked values.
"""
data = fid.read(num_bytes)
return struct.unpack(endian_character + format_char_sequence, data)
def write_next_bytes(fid, data, format_char_sequence, endian_character='<'):
"""pack and write to a binary file.
:param fid:
:param data: data to send, if multiple elements are sent at the same time,
they should be encapsuled either in a list or a tuple
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
should be the same length as the data list or tuple
:param endian_character: Any of {@, =, <, >, !}
"""
if isinstance(data, (list, tuple)):
bytes = struct.pack(endian_character + format_char_sequence, *data)
else:
bytes = struct.pack(endian_character + format_char_sequence, data)
fid.write(bytes)
def read_cameras_text(path):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
"""
cameras = {}
with open(path, 'r') as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != '#':
elems = line.split()
camera_id = int(elems[0])
model = elems[1]
width = int(elems[2])
height = int(elems[3])
params = np.array(tuple(map(float, elems[4:])))
cameras[camera_id] = Camera(
id=camera_id,
model=model,
width=width,
height=height,
params=params)
return cameras
def read_cameras_binary(path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
"""
cameras = {}
with open(path_to_model_file, 'rb') as fid:
num_cameras = read_next_bytes(fid, 8, 'Q')[0]
for _ in range(num_cameras):
camera_properties = read_next_bytes(
fid, num_bytes=24, format_char_sequence='iiQQ')
camera_id = camera_properties[0]
model_id = camera_properties[1]
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
width = camera_properties[2]
height = camera_properties[3]
num_params = CAMERA_MODEL_IDS[model_id].num_params
params = read_next_bytes(
fid,
num_bytes=8 * num_params,
format_char_sequence='d' * num_params)
cameras[camera_id] = Camera(
id=camera_id,
model=model_name,
width=width,
height=height,
params=np.array(params))
assert len(cameras) == num_cameras
return cameras
def write_cameras_text(cameras, path):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
"""
HEADER = '# Camera list with one line of data per camera:\n' + \
'# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n' + \
'# Number of cameras: {}\n'.format(len(cameras))
with open(path, 'w') as fid:
fid.write(HEADER)
for _, cam in cameras.items():
to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
line = ' '.join([str(elem) for elem in to_write])
fid.write(line + '\n')
def write_cameras_binary(cameras, path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
"""
with open(path_to_model_file, 'wb') as fid:
write_next_bytes(fid, len(cameras), 'Q')
for _, cam in cameras.items():
model_id = CAMERA_MODEL_NAMES[cam.model].model_id
camera_properties = [cam.id, model_id, cam.width, cam.height]
write_next_bytes(fid, camera_properties, 'iiQQ')
for p in cam.params:
write_next_bytes(fid, float(p), 'd')
return cameras
def read_images_text(path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
"""
images = {}
with open(path, 'r') as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != '#':
elems = line.split()
image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
camera_id = int(elems[8])
image_name = elems[9]
elems = fid.readline().split()
xys = np.column_stack([
tuple(map(float, elems[0::3])),
tuple(map(float, elems[1::3]))
])
point3D_ids = np.array(tuple(map(int, elems[2::3])))
images[image_id] = Image(
id=image_id,
qvec=qvec,
tvec=tvec,
camera_id=camera_id,
name=image_name,
xys=xys,
point3D_ids=point3D_ids)
return images
def read_images_binary(path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
"""
images = {}
with open(path_to_model_file, 'rb') as fid:
num_reg_images = read_next_bytes(fid, 8, 'Q')[0]
for _ in range(num_reg_images):
binary_image_properties = read_next_bytes(
fid, num_bytes=64, format_char_sequence='idddddddi')
image_id = binary_image_properties[0]
qvec = np.array(binary_image_properties[1:5])
tvec = np.array(binary_image_properties[5:8])
camera_id = binary_image_properties[8]
image_name = ''
current_char = read_next_bytes(fid, 1, 'c')[0]
while current_char != b'\x00': # look for the ASCII 0 entry
image_name += current_char.decode('utf-8')
current_char = read_next_bytes(fid, 1, 'c')[0]
num_points2D = read_next_bytes(
fid, num_bytes=8, format_char_sequence='Q')[0]
x_y_id_s = read_next_bytes(
fid,
num_bytes=24 * num_points2D,
format_char_sequence='ddq' * num_points2D)
xys = np.column_stack([
tuple(map(float, x_y_id_s[0::3])),
tuple(map(float, x_y_id_s[1::3]))
])
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
images[image_id] = Image(
id=image_id,
qvec=qvec,
tvec=tvec,
camera_id=camera_id,
name=image_name,
xys=xys,
point3D_ids=point3D_ids)
return images
def write_images_text(images, path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
"""
if len(images) == 0:
mean_observations = 0
else:
mean_observations = sum(
(len(img.point3D_ids) for _, img in images.items())) / len(images)
HEADER = '# Image list with two lines of data per image:\n' + \
'# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n' + \
'# POINTS2D[] as (X, Y, POINT3D_ID)\n' + \
'# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations)
with open(path, 'w') as fid:
fid.write(HEADER)
for _, img in images.items():
image_header = [
img.id, *img.qvec, *img.tvec, img.camera_id, img.name
]
first_line = ' '.join(map(str, image_header))
fid.write(first_line + '\n')
points_strings = []
for xy, point3D_id in zip(img.xys, img.point3D_ids):
points_strings.append(' '.join(map(str, [*xy, point3D_id])))
fid.write(' '.join(points_strings) + '\n')
def write_images_binary(images, path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
"""
with open(path_to_model_file, 'wb') as fid:
write_next_bytes(fid, len(images), 'Q')
for _, img in images.items():
write_next_bytes(fid, img.id, 'i')
write_next_bytes(fid, img.qvec.tolist(), 'dddd')
write_next_bytes(fid, img.tvec.tolist(), 'ddd')
write_next_bytes(fid, img.camera_id, 'i')
for char in img.name:
write_next_bytes(fid, char.encode('utf-8'), 'c')
write_next_bytes(fid, b'\x00', 'c')
write_next_bytes(fid, len(img.point3D_ids), 'Q')
for xy, p3d_id in zip(img.xys, img.point3D_ids):
write_next_bytes(fid, [*xy, p3d_id], 'ddq')
def read_points3D_text(path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
"""
points3D = {}
with open(path, 'r') as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != '#':
elems = line.split()
point3D_id = int(elems[0])
xyz = np.array(tuple(map(float, elems[1:4])))
rgb = np.array(tuple(map(int, elems[4:7])))
error = float(elems[7])
image_ids = np.array(tuple(map(int, elems[8::2])))
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
points3D[point3D_id] = Point3D(
id=point3D_id,
xyz=xyz,
rgb=rgb,
error=error,
image_ids=image_ids,
point2D_idxs=point2D_idxs)
return points3D
def read_points3D_binary(path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
points3D = {}
with open(path_to_model_file, 'rb') as fid:
num_points = read_next_bytes(fid, 8, 'Q')[0]
for _ in range(num_points):
binary_point_line_properties = read_next_bytes(
fid, num_bytes=43, format_char_sequence='QdddBBBd')
point3D_id = binary_point_line_properties[0]
xyz = np.array(binary_point_line_properties[1:4])
rgb = np.array(binary_point_line_properties[4:7])
error = np.array(binary_point_line_properties[7])
track_length = read_next_bytes(
fid, num_bytes=8, format_char_sequence='Q')[0]
track_elems = read_next_bytes(
fid,
num_bytes=8 * track_length,
format_char_sequence='ii' * track_length)
image_ids = np.array(tuple(map(int, track_elems[0::2])))
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
points3D[point3D_id] = Point3D(
id=point3D_id,
xyz=xyz,
rgb=rgb,
error=error,
image_ids=image_ids,
point2D_idxs=point2D_idxs)
return points3D
def write_points3D_text(points3D, path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
"""
if len(points3D) == 0:
mean_track_length = 0
else:
mean_track_length = sum(
(len(pt.image_ids) for _, pt in points3D.items())) / len(points3D)
HEADER = '# 3D point list with one line of data per point:\n' + \
'# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n' + \
'# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length)
with open(path, 'w') as fid:
fid.write(HEADER)
for _, pt in points3D.items():
point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
fid.write(' '.join(map(str, point_header)) + ' ')
track_strings = []
for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
track_strings.append(' '.join(map(str, [image_id, point2D])))
fid.write(' '.join(track_strings) + '\n')
def write_points3D_binary(points3D, path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
with open(path_to_model_file, 'wb') as fid:
write_next_bytes(fid, len(points3D), 'Q')
for _, pt in points3D.items():
write_next_bytes(fid, pt.id, 'Q')
write_next_bytes(fid, pt.xyz.tolist(), 'ddd')
write_next_bytes(fid, pt.rgb.tolist(), 'BBB')
write_next_bytes(fid, pt.error, 'd')
track_length = pt.image_ids.shape[0]
write_next_bytes(fid, track_length, 'Q')
for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
write_next_bytes(fid, [image_id, point2D_id], 'ii')
def detect_model_format(path, ext):
if os.path.isfile(os.path.join(path, 'cameras' + ext)) and \
os.path.isfile(os.path.join(path, 'images' + ext)) and \
os.path.isfile(os.path.join(path, 'points3D' + ext)):
print("Detected model format: '" + ext + "'")
return True
return False
def read_model(path, ext=''):
# try to detect the extension automatically
if ext == '':
if detect_model_format(path, '.bin'):
ext = '.bin'
elif detect_model_format(path, '.txt'):
ext = '.txt'
else:
print("Provide model format: '.bin' or '.txt'")
return
if ext == '.txt':
cameras = read_cameras_text(os.path.join(path, 'cameras' + ext))
images = read_images_text(os.path.join(path, 'images' + ext))
points3D = read_points3D_text(os.path.join(path, 'points3D') + ext)
else:
cameras = read_cameras_binary(os.path.join(path, 'cameras' + ext))
images = read_images_binary(os.path.join(path, 'images' + ext))
points3D = read_points3D_binary(os.path.join(path, 'points3D') + ext)
return cameras, images, points3D
def write_model(cameras, images, points3D, path, ext='.bin'):
if ext == '.txt':
write_cameras_text(cameras, os.path.join(path, 'cameras' + ext))
write_images_text(images, os.path.join(path, 'images' + ext))
write_points3D_text(points3D, os.path.join(path, 'points3D') + ext)
else:
write_cameras_binary(cameras, os.path.join(path, 'cameras' + ext))
write_images_binary(images, os.path.join(path, 'images' + ext))
write_points3D_binary(points3D, os.path.join(path, 'points3D') + ext)
return cameras, images, points3D
def qvec2rotmat(qvec):
array_10 = 1 - 2 * qvec[2]**2 - 2 * qvec[3]**2
array_11 = 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3]
array_12 = 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]
array_1 = [array_10, array_11, array_12]
array_20 = 2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3]
array_21 = 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2
array_22 = 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]
array_2 = [array_20, array_21, array_22]
array_30 = 2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2]
array_31 = 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1]
array_32 = 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2
array_3 = [array_30, array_31, array_32]
return np.array([array_1, array_2, array_3])

View File

@@ -0,0 +1,200 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import glob
import os
import subprocess
from typing import Any, Dict, Union
import cv2
import numpy as np
import tensorflow as tf
from modelscope.metainfo import Preprocessors
from modelscope.preprocessors import Preprocessor
from modelscope.preprocessors.builder import PREPROCESSORS
from modelscope.utils.constant import Fields, ModeKeys
from modelscope.utils.logger import get_logger
logger = get_logger()
@PREPROCESSORS.register_module(
Fields.cv, module_name=Preprocessors.nerf_recon_acc_preprocessor)
class NeRFReconPreprocessor(Preprocessor):
def __init__(self,
mode=ModeKeys.INFERENCE,
data_type='colmap',
use_mask=True,
match_type='exhaustive_matcher',
frame_count=60,
use_distortion=False,
*args,
**kwargs):
super().__init__(mode)
# set preprocessor info
self.data_type = data_type
self.use_mask = use_mask
self.match_type = match_type
if match_type != 'exhaustive_matcher' and match_type != 'sequential_matcher':
raise Exception('matcher type {} is not valid'.format(match_type))
self.frame_count = frame_count
self.use_distortion = use_distortion
def __call__(self, data: Union[str, Dict], **kwargs) -> Dict[str, Any]:
if self.data_type != 'blender' and self.data_type != 'colmap':
raise Exception('data type {} is not support currently'.format(
self.data_type))
data_dir = data['data_dir']
os.makedirs(data_dir, exist_ok=True)
if self.data_type == 'blender':
transform_file = os.path.join(data_dir, 'transforms_train.json')
if not os.path.exists(transform_file):
raise Exception('Blender dataset is not found')
if self.data_type == 'colmap':
video_path = data['video_input_path']
if video_path != '':
self.split_frames(video_path, data_dir, self.frame_count)
self.gen_poses(data_dir, self.match_type, self.use_distortion)
files_needed = [
'{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']
]
if self.use_distortion:
colmap_dir = os.path.join(data_dir, 'preprocess/sparse')
files_had = os.listdir(colmap_dir)
else:
colmap_dir = os.path.join(data_dir, 'sparse/0')
files_had = os.listdir(colmap_dir)
if not all([f in files_had for f in files_needed]):
raise Exception('colmap run failed')
data = {}
data['data_dir'] = data_dir
return data
def split_frames(self, video_path, basedir, frame_count=60):
cap = cv2.VideoCapture(video_path)
fps = round(cap.get(cv2.CAP_PROP_FPS))
frame_total = round(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if not os.path.exists(os.path.join(basedir, 'images')):
logger.info('Need to run ffmpeg')
image_dir = os.path.join(basedir, 'images')
os.makedirs(image_dir, exist_ok=True)
fps = int(frame_count * fps / frame_total)
cmd = f"ffmpeg -i {video_path} -qscale:v 1 -qmin 1 -vf \"fps={fps}\" {image_dir}/%04d.png"
os.system(cmd)
logger.info('split frames done')
else:
logger.info('Don\'t need to run ffmpeg')
def run_colmap(self, basedir, match_type, use_distortion):
logfile_name = os.path.join(basedir, 'colmap_output.txt')
logfile = open(logfile_name, 'w')
feature_extractor_args = [
'colmap', 'feature_extractor', '--database_path',
os.path.join(basedir, 'database.db'), '--image_path',
os.path.join(basedir, 'images'), '--ImageReader.single_camera', '1'
]
feat_output = (
subprocess.check_output(
feature_extractor_args, universal_newlines=True))
logfile.write(feat_output)
logger.info('Features extracted done')
exhaustive_matcher_args = [
'colmap',
match_type,
'--database_path',
os.path.join(basedir, 'database.db'),
]
match_output = (
subprocess.check_output(
exhaustive_matcher_args, universal_newlines=True))
logfile.write(match_output)
logger.info('Features matched done')
p = os.path.join(basedir, 'sparse')
if not os.path.exists(p):
os.makedirs(p)
mapper_args = [
'colmap',
'mapper',
'--database_path',
os.path.join(basedir, 'database.db'),
'--image_path',
os.path.join(basedir, 'images'),
'--output_path',
os.path.join(
basedir, 'sparse'
), # --export_path changed to --output_path in colmap 3.6
'--Mapper.num_threads',
'16',
'--Mapper.init_min_tri_angle',
'4',
'--Mapper.multiple_models',
'0',
'--Mapper.extract_colors',
'0',
]
map_output = (
subprocess.check_output(mapper_args, universal_newlines=True))
logfile.write(map_output)
logger.info('Sparse map created done.')
bundle_adjuster_cmd = [
'colmap',
'bundle_adjuster',
'--input_path',
os.path.join(basedir, 'sparse/0'),
'--output_path',
os.path.join(basedir, 'sparse/0'),
'--BundleAdjustment.refine_principal_point',
'1',
]
map_output = (
subprocess.check_output(
bundle_adjuster_cmd, universal_newlines=True))
logfile.write(map_output)
logger.info('Refining intrinsics done.')
if use_distortion:
os.makedirs(os.path.join(basedir, 'preprocess'), exist_ok=True)
distort_cmd = [
'colmap', 'image_undistorter', '--image_path',
os.path.join(basedir, 'images'), '--input_path',
os.path.join(basedir, 'sparse/0'), '--output_path',
os.path.join(basedir, 'preprocess'), '--output_type', 'COLMAP'
]
map_output = (
subprocess.check_output(distort_cmd, universal_newlines=True))
logfile.write(map_output)
logger.info('Image distortion done.')
logfile.close()
logger.info(
'Finished running COLMAP, see {} for logs'.format(logfile_name))
def gen_poses(self, basedir, match_type, use_distortion):
files_needed = [
'{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']
]
if os.path.exists(os.path.join(basedir, 'sparse/0')):
files_had = os.listdir(os.path.join(basedir, 'sparse/0'))
else:
files_had = []
if not all([f in files_had for f in files_needed]):
logger.info('Need to run COLMAP')
self.run_colmap(basedir, match_type, use_distortion)
else:
logger.info('Don\'t need to run COLMAP')

View File

@@ -0,0 +1,289 @@
import argparse
import os
import random
import time
import imageio
import mmcv
import numpy as np
import torch
from tqdm import tqdm, trange
from modelscope.metainfo import Models
from modelscope.models.base import Tensor, TorchModel
from modelscope.models.builder import MODELS
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
from .dataloader.load_data import load_data
from .network.dvgo import DirectMPIGO, DirectVoxGO, SFTNet, get_rays_of_a_view
logger = get_logger()
def to8b(x):
return (255 * np.clip(x, 0, 1)).astype(np.uint8)
__all__ = ['NeRFRecon4K']
@MODELS.register_module(Tasks.nerf_recon_4k, module_name=Models.nerf_recon_4k)
class NeRFRecon4K(TorchModel):
def __init__(self, model_dir, **kwargs):
super().__init__(model_dir, **kwargs)
if not torch.cuda.is_available():
raise Exception('GPU is required')
self.device = torch.device('cuda')
logger.info('model params:{}'.format(kwargs))
self.data_type = kwargs['data_type']
# self.use_mask = kwargs['use_mask']
# self.num_samples_per_ray = kwargs['num_samples_per_ray']
self.test_ray_chunk = kwargs['test_ray_chunk']
# self.enc_ckpt_path = kwargs['enc_ckpt_path']
# self.dec_ckpt_path = kwargs['dec_ckpt_path']
self.enc_ckpt_path = os.path.join(model_dir, 'fine_100000.tar')
if not os.path.exists(self.enc_ckpt_path):
raise Exception('encoder ckpt path not found')
# if self.dec_ckpt_path == '':
self.dec_ckpt_path = os.path.join(model_dir, 'sresrnet_100000.pth')
if not os.path.exists(self.dec_ckpt_path):
raise Exception('decoder ckpt path not found')
self.ckpt_name = self.dec_ckpt_path.split('/')[-1][:-4]
self.ndc = True if self.data_type == 'llff' else False
self.sr_ratio = int(kwargs['factor'] / kwargs['load_sr'])
self.load_existed_model()
self.test_tile = kwargs['test_tile']
self.stepsize = kwargs['stepsize']
def load_existed_model(self):
if self.ndc:
model_class = DirectMPIGO
ckpt = torch.load(self.enc_ckpt_path, map_location='cpu')
else:
model_class = DirectVoxGO
ckpt = torch.load(self.enc_ckpt_path, map_location='cpu')
ckpt['model_kwargs']['mask_cache_path'] = self.enc_ckpt_path
self.encoder = model_class(**ckpt['model_kwargs'])
self.encoder.load_state_dict(ckpt['model_state_dict'])
self.encoder = self.encoder.to(self.device)
self.decoder = SFTNet(
n_in_colors=3,
scale=self.sr_ratio,
num_feat=64,
num_block=5,
num_grow_ch=32,
num_cond=1,
dswise=False).to(self.device)
self.decoder.load_network(
load_path=self.dec_ckpt_path, device=self.device)
self.decoder.eval()
def nerf_reconstruction(self, data_cfg, render_dir):
data_dict = load_everything(cfg_data=data_cfg)
self.render_viewpoints_kwargs = {
'render_kwargs': {
'near': data_dict['near'],
'far': data_dict['far'],
'bg': 1 if data_dict['white_bkgd'] else 0,
'stepsize': self.stepsize,
'inverse_y': False,
'flip_x': False,
'flip_y': False,
'render_depth': True,
},
}
os.makedirs(render_dir, exist_ok=True)
print('All results are dumped into', render_dir)
rgbs, depths, bgmaps, _, _, rgb_features = self.render_viewpoints(
render_poses=data_dict['poses'][data_dict['i_test']],
HW=data_dict['HW'][data_dict['i_test']],
Ks=data_dict['Ks'][data_dict['i_test']],
gt_imgs=[
data_dict['images'][i].cpu().numpy()
for i in data_dict['i_test']
],
savedir=render_dir,
dump_images=False,
**self.render_viewpoints_kwargs)
rgbsr = []
for idx, rgbsave in enumerate(tqdm(rgb_features)):
rgbtest = torch.from_numpy(rgbsave).movedim(-1, 0).unsqueeze(0).to(
self.device)
# rgb = torch.from_numpy(rgbs[idx]).movedim(-1, 0).unsqueeze(0).to(self.device)
input_cond = torch.from_numpy(depths).movedim(-1, 1)
input_cond = input_cond[idx, :, :, :].to(self.device)
if self.test_tile:
rgb_srtest = self.decoder.tile_process(
rgbtest, input_cond, tile_size=self.test_tile)
else:
rgb_srtest = self.decoder(rgbtest,
input_cond).detach().to('cpu')
rgb_srsave = rgb_srtest.squeeze().movedim(0, -1).detach().clamp(
0, 1).numpy()
rgbsr.append(rgb_srsave)
print(
'''all inference process has done, saving images... because our images are
4K (4032x3024), the saving process may be time-consuming.''')
rgbsr = np.array(rgbsr)
for i in trange(len(rgbsr)):
rgb8 = to8b(rgbsr[i])
filename = os.path.join(render_dir, '{:03d}_dec.png'.format(i))
imageio.imwrite(filename, rgb8)
imageio.mimwrite(
os.path.join(render_dir, 'result_dec.mp4'),
to8b(rgbsr),
fps=25,
codec='libx264',
quality=8)
@torch.no_grad()
def render_viewpoints(self,
render_poses,
HW,
Ks,
render_kwargs,
gt_imgs=None,
savedir=None,
dump_images=False,
render_factor=0,
eval_ssim=False,
eval_lpips_alex=False,
eval_lpips_vgg=False):
'''Render images for the given viewpoints; run evaluation if gt given.
'''
assert len(render_poses) == len(HW) and len(HW) == len(Ks)
if render_factor != 0:
HW = np.copy(HW)
Ks = np.copy(Ks)
HW = (HW / render_factor).astype(int)
Ks[:, :2, :3] /= render_factor
rgbs = []
rgb_features = []
depths = []
bgmaps = []
psnrs = []
viewdirs_all = []
ssims = []
lpips_alex = []
lpips_vgg = []
for i, c2w in enumerate(tqdm(render_poses)):
H, W = HW[i]
K = Ks[i]
c2w = torch.Tensor(c2w)
rays_o, rays_d, viewdirs = get_rays_of_a_view(
H,
W,
K,
c2w,
self.ndc,
inverse_y=False,
flip_x=False,
flip_y=False)
keys = ['rgb_marched', 'depth', 'alphainv_last', 'rgb_feature']
rays_o = rays_o.flatten(0, -2).to('cuda')
rays_d = rays_d.flatten(0, -2).to('cuda')
viewdirs = viewdirs.flatten(0, -2).to('cuda')
time_rdstart = time.time()
render_result_chunks = [{
k: v
for k, v in self.encoder(ro, rd, vd, **render_kwargs).items()
if k in keys
} for ro, rd, vd in zip(
rays_o.split(self.test_ray_chunk, 0),
rays_d.split(self.test_ray_chunk, 0),
viewdirs.split(self.test_ray_chunk, 0))]
render_result = {
k:
torch.cat([ret[k]
for ret in render_result_chunks]).reshape(H, W, -1)
for k in render_result_chunks[0].keys()
}
print(f'render time is: {time.time() - time_rdstart}')
rgb = render_result['rgb_marched'].clamp(0, 1).cpu().numpy()
rgb_feature = render_result['rgb_feature'].cpu().numpy()
depth = render_result['depth'].cpu().numpy()
bgmap = render_result['alphainv_last'].cpu().numpy()
rgbs.append(rgb)
rgb_features.append(rgb_feature)
depths.append(depth)
bgmaps.append(bgmap)
viewdirs_all.append(viewdirs)
if i == 0:
print('Testing', rgb.shape)
if gt_imgs is not None and render_factor == 0:
p = -10. * np.log10(np.mean(np.square(rgb - gt_imgs[i])))
psnrs.append(p)
if len(psnrs):
print('Testing psnr', np.mean(psnrs), '(avg)')
if eval_ssim:
print('Testing ssim', np.mean(ssims), '(avg)')
if eval_lpips_vgg:
print('Testing lpips (vgg)', np.mean(lpips_vgg), '(avg)')
if eval_lpips_alex:
print('Testing lpips (alex)', np.mean(lpips_alex), '(avg)')
if savedir is not None and dump_images:
for i in trange(len(rgbs)):
rgb8 = to8b(rgbs[i])
filename = os.path.join(savedir, '{:03d}_enc.png'.format(i))
imageio.imwrite(filename, rgb8)
rgbs = np.array(rgbs)
rgb_features = np.array(rgb_features)
depths = np.array(depths)
bgmaps = np.array(bgmaps)
return rgbs, depths, bgmaps, psnrs, viewdirs_all, rgb_features
def load_everything(cfg_data):
'''Load images / poses / camera settings / data split.
'''
cfg_data = mmcv.Config(cfg_data)
data_dict = load_data(cfg_data)
# remove useless field
kept_keys = {
'hwf', 'HW', 'Ks', 'near', 'far', 'near_clip', 'i_train', 'i_val',
'i_test', 'irregular_shape', 'poses', 'render_poses', 'images',
'white_bkgd'
}
# if cfg.data.load_sr:
kept_keys.add('srgt')
kept_keys.add('w2c')
data_dict['srgt'] = torch.FloatTensor(data_dict['srgt'], device='cpu')
data_dict['w2c'] = torch.FloatTensor(data_dict['w2c'], device='cpu')
for k in list(data_dict.keys()):
if k not in kept_keys:
data_dict.pop(k)
# construct data tensor
if data_dict['irregular_shape']:
data_dict['images'] = [
torch.FloatTensor(im, device='cpu') for im in data_dict['images']
]
else:
data_dict['images'] = torch.FloatTensor(
data_dict['images'], device='cpu')
data_dict['poses'] = torch.Tensor(data_dict['poses'])
return data_dict

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,176 @@
# The implementation is partly adopted from nerfacc, made publicly available under the MIT License
# at https://github.com/KAIR-BAIR/nerfacc/blob/master/examples/radiance_fields/ngp.py
import gc
from collections import defaultdict
import mcubes
import numpy as np
import tinycudann as tcnn
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.cuda.amp import custom_bwd, custom_fwd
class PSNR(nn.Module):
def __init__(self):
super().__init__()
def forward(self, inputs, targets, valid_mask=None, reduction='mean'):
assert reduction in ['mean', 'none']
value = (inputs - targets)**2
if valid_mask is not None:
value = value[valid_mask]
if reduction == 'mean':
return -10 * torch.log10(torch.mean(value))
elif reduction == 'none':
return -10 * torch.log10(
torch.mean(value, dim=tuple(range(value.ndim)[1:])))
def extract_fields(bound_min, bound_max, resolution, query_func):
N = 64
X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N)
Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N)
Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N)
u = np.zeros([resolution, resolution, resolution], dtype=np.float32)
with torch.no_grad():
for xi, xs in enumerate(X):
for yi, ys in enumerate(Y):
for zi, zs in enumerate(Z):
xx, yy, zz = torch.meshgrid(xs, ys, zs)
xx = xx.reshape(-1, 1)
yy = yy.reshape(-1, 1)
zz = zz.reshape(-1, 1)
pts = torch.cat([xx, yy, zz], dim=-1).cuda()
val = query_func(pts).reshape(
len(xs), len(ys), len(zs)).detach().cpu().numpy()
u[xi * N:xi * N + len(xs), yi * N:yi * N + len(ys),
zi * N:zi * N + len(zs)] = val
return u
def extract_geometry(bound_min, bound_max, resolution, threshold, query_func):
u = extract_fields(bound_min, bound_max, resolution, query_func)
vertices, triangles = mcubes.marching_cubes(u, threshold)
b_max_np = bound_max.detach().cpu().numpy()
b_min_np = bound_min.detach().cpu().numpy()
vertices = vertices / (resolution - 1.0) * (
b_max_np - b_min_np)[None, :] + b_min_np[None, :]
return vertices, triangles
def chunk_batch(func, chunk_size, *args, **kwargs):
B = None
for arg in args:
if isinstance(arg, torch.Tensor):
B = arg.shape[0]
break
out = defaultdict(list)
out_type = None
for i in range(0, B, chunk_size):
out_chunk = func(
*[
arg[i:i + chunk_size] if isinstance(arg, torch.Tensor) else arg
for arg in args
], **kwargs)
if out_chunk is None:
continue
out_type = type(out_chunk)
if isinstance(out_chunk, torch.Tensor):
out_chunk = {0: out_chunk}
elif isinstance(out_chunk, tuple) or isinstance(out_chunk, list):
chunk_length = len(out_chunk)
out_chunk = {i: chunk for i, chunk in enumerate(out_chunk)}
elif isinstance(out_chunk, dict):
pass
else:
exit(1)
for k, v in out_chunk.items():
out[k].append(v if torch.is_grad_enabled() else v.detach())
if out_type is None:
return
out = {k: torch.cat(v, dim=0) for k, v in out.items()}
if out_type is torch.Tensor:
return out[0]
elif out_type in [tuple, list]:
return out_type([out[i] for i in range(chunk_length)])
elif out_type is dict:
return out
def get_activation(name):
name = name.lower()
if name is None or name == 'none':
return nn.Identity()
elif name.startswith('scale'):
scale_factor = float(name[5:])
return lambda x: x.clamp(0., scale_factor) / scale_factor
elif name.startswith('clamp'):
clamp_max = float(name[5:])
return lambda x: x.clamp(0., clamp_max)
elif name.startswith('mul'):
mul_factor = float(name[3:])
return lambda x: x * mul_factor
elif name == 'trunc_exp':
return trunc_exp
elif name.startswith('+') or name.startswith('-'):
return lambda x: x + float(name)
elif name.lower() == 'sigmoid':
return lambda x: torch.sigmoid(x)
elif name.lower() == 'tanh':
return lambda x: torch.tanh(x)
else:
return getattr(F, name)
class _TruncExp(Function):
# Implementation from torch-ngp:
# https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py
@staticmethod
@custom_fwd(cast_inputs=torch.float32)
def forward(ctx, x): # pylint: disable=arguments-differ
ctx.save_for_backward(x)
return torch.exp(x)
@staticmethod
@custom_bwd
def backward(ctx, g):
x = ctx.saved_tensors[0]
return g * torch.exp(torch.clamp(x, max=15))
trunc_exp = _TruncExp.apply
def dot(x, y):
return torch.sum(x * y, -1, keepdim=True)
def reflect(x, n):
return 2 * dot(x, n) * n - x
def normalize(dat, inp_scale, tgt_scale):
if inp_scale is None:
inp_scale = [dat.min(), dat.max()]
dat = (dat - inp_scale[0]) / (inp_scale[1] - inp_scale[0])
dat = dat * (tgt_scale[1] - tgt_scale[0]) + tgt_scale[0]
return dat
def cleanup():
gc.collect()
torch.cuda.empty_cache()
tcnn.free_temporary_memory()
def update_module_step(m, epoch, global_step):
if hasattr(m, 'update_step'):
m.update_step(epoch, global_step)

View File

View File

@@ -0,0 +1,85 @@
#include <torch/extension.h>
#include <vector>
void adam_upd_cuda(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
int step, float beta1, float beta2, float lr, float eps);
void masked_adam_upd_cuda(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
int step, float beta1, float beta2, float lr, float eps);
void adam_upd_with_perlr_cuda(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
torch::Tensor perlr,
int step, float beta1, float beta2, float lr, float eps);
// C++ interface
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
void adam_upd(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
int step, float beta1, float beta2, float lr, float eps) {
CHECK_INPUT(param);
CHECK_INPUT(grad);
CHECK_INPUT(exp_avg);
CHECK_INPUT(exp_avg_sq);
adam_upd_cuda(param, grad, exp_avg, exp_avg_sq,
step, beta1, beta2, lr, eps);
}
void masked_adam_upd(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
int step, float beta1, float beta2, float lr, float eps) {
CHECK_INPUT(param);
CHECK_INPUT(grad);
CHECK_INPUT(exp_avg);
CHECK_INPUT(exp_avg_sq);
masked_adam_upd_cuda(param, grad, exp_avg, exp_avg_sq,
step, beta1, beta2, lr, eps);
}
void adam_upd_with_perlr(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
torch::Tensor perlr,
int step, float beta1, float beta2, float lr, float eps) {
CHECK_INPUT(param);
CHECK_INPUT(grad);
CHECK_INPUT(exp_avg);
CHECK_INPUT(exp_avg_sq);
adam_upd_with_perlr_cuda(param, grad, exp_avg, exp_avg_sq, perlr,
step, beta1, beta2, lr, eps);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("adam_upd", &adam_upd,
"Adam update");
m.def("masked_adam_upd", &masked_adam_upd,
"Adam update ignoring zero grad");
m.def("adam_upd_with_perlr", &adam_upd_with_perlr,
"Adam update ignoring zero grad with per-voxel lr");
}

View File

@@ -0,0 +1,132 @@
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
template <typename scalar_t>
__global__ void adam_upd_cuda_kernel(
scalar_t* __restrict__ param,
const scalar_t* __restrict__ grad,
scalar_t* __restrict__ exp_avg,
scalar_t* __restrict__ exp_avg_sq,
const size_t N,
const float step_size, const float beta1, const float beta2, const float eps) {
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
if(index<N) {
exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
param[index] -= step_size * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
}
}
template <typename scalar_t>
__global__ void masked_adam_upd_cuda_kernel(
scalar_t* __restrict__ param,
const scalar_t* __restrict__ grad,
scalar_t* __restrict__ exp_avg,
scalar_t* __restrict__ exp_avg_sq,
const size_t N,
const float step_size, const float beta1, const float beta2, const float eps) {
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
if(index<N && grad[index]!=0) {
exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
param[index] -= step_size * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
}
}
template <typename scalar_t>
__global__ void adam_upd_with_perlr_cuda_kernel(
scalar_t* __restrict__ param,
const scalar_t* __restrict__ grad,
scalar_t* __restrict__ exp_avg,
scalar_t* __restrict__ exp_avg_sq,
scalar_t* __restrict__ perlr,
const size_t N,
const float step_size, const float beta1, const float beta2, const float eps) {
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
if(index<N) {
exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
param[index] -= step_size * perlr[index] * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
}
}
void adam_upd_cuda(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
const int step, const float beta1, const float beta2, const float lr, const float eps) {
const size_t N = param.numel();
const int threads = 256;
const int blocks = (N + threads - 1) / threads;
const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_cuda", ([&] {
adam_upd_cuda_kernel<scalar_t><<<blocks, threads>>>(
param.data<scalar_t>(),
grad.data<scalar_t>(),
exp_avg.data<scalar_t>(),
exp_avg_sq.data<scalar_t>(),
N, step_size, beta1, beta2, eps);
}));
}
void masked_adam_upd_cuda(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
const int step, const float beta1, const float beta2, const float lr, const float eps) {
const size_t N = param.numel();
const int threads = 256;
const int blocks = (N + threads - 1) / threads;
const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
AT_DISPATCH_FLOATING_TYPES(param.type(), "masked_adam_upd_cuda", ([&] {
masked_adam_upd_cuda_kernel<scalar_t><<<blocks, threads>>>(
param.data<scalar_t>(),
grad.data<scalar_t>(),
exp_avg.data<scalar_t>(),
exp_avg_sq.data<scalar_t>(),
N, step_size, beta1, beta2, eps);
}));
}
void adam_upd_with_perlr_cuda(
torch::Tensor param,
torch::Tensor grad,
torch::Tensor exp_avg,
torch::Tensor exp_avg_sq,
torch::Tensor perlr,
const int step, const float beta1, const float beta2, const float lr, const float eps) {
const size_t N = param.numel();
const int threads = 256;
const int blocks = (N + threads - 1) / threads;
const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_with_perlr_cuda", ([&] {
adam_upd_with_perlr_cuda_kernel<scalar_t><<<blocks, threads>>>(
param.data<scalar_t>(),
grad.data<scalar_t>(),
exp_avg.data<scalar_t>(),
exp_avg_sq.data<scalar_t>(),
perlr.data<scalar_t>(),
N, step_size, beta1, beta2, eps);
}));
}

View File

@@ -0,0 +1,182 @@
#include <torch/extension.h>
#include <vector>
std::vector<torch::Tensor> infer_t_minmax_cuda(
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
const float near, const float far);
torch::Tensor infer_n_samples_cuda(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist);
std::vector<torch::Tensor> infer_ray_start_dir_cuda(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min);
std::vector<torch::Tensor> sample_pts_on_rays_cuda(
torch::Tensor rays_o, torch::Tensor rays_d,
torch::Tensor xyz_min, torch::Tensor xyz_max,
const float near, const float far, const float stepdist);
std::vector<torch::Tensor> sample_ndc_pts_on_rays_cuda(
torch::Tensor rays_o, torch::Tensor rays_d,
torch::Tensor xyz_min, torch::Tensor xyz_max,
const int N_samples);
torch::Tensor sample_bg_pts_on_rays_cuda(
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
const float bg_preserve, const int N_samples);
torch::Tensor maskcache_lookup_cuda(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift);
std::vector<torch::Tensor> raw2alpha_cuda(torch::Tensor density, const float shift, const float interval);
std::vector<torch::Tensor> raw2alpha_nonuni_cuda(torch::Tensor density, const float shift, torch::Tensor interval);
torch::Tensor raw2alpha_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, const float interval);
torch::Tensor raw2alpha_nonuni_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval);
std::vector<torch::Tensor> alpha2weight_cuda(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays);
torch::Tensor alpha2weight_backward_cuda(
torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
torch::Tensor grad_weights, torch::Tensor grad_last);
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
std::vector<torch::Tensor> infer_t_minmax(
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
const float near, const float far) {
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(xyz_min);
CHECK_INPUT(xyz_max);
return infer_t_minmax_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far);
}
torch::Tensor infer_n_samples(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist) {
CHECK_INPUT(rays_d);
CHECK_INPUT(t_min);
CHECK_INPUT(t_max);
return infer_n_samples_cuda(rays_d, t_min, t_max, stepdist);
}
std::vector<torch::Tensor> infer_ray_start_dir(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min) {
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(t_min);
return infer_ray_start_dir_cuda(rays_o, rays_d, t_min);
}
std::vector<torch::Tensor> sample_pts_on_rays(
torch::Tensor rays_o, torch::Tensor rays_d,
torch::Tensor xyz_min, torch::Tensor xyz_max,
const float near, const float far, const float stepdist) {
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(xyz_min);
CHECK_INPUT(xyz_max);
assert(rays_o.dim()==2);
assert(rays_o.size(1)==3);
return sample_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far, stepdist);
}
std::vector<torch::Tensor> sample_ndc_pts_on_rays(
torch::Tensor rays_o, torch::Tensor rays_d,
torch::Tensor xyz_min, torch::Tensor xyz_max,
const int N_samples) {
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(xyz_min);
CHECK_INPUT(xyz_max);
assert(rays_o.dim()==2);
assert(rays_o.size(1)==3);
return sample_ndc_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, N_samples);
}
torch::Tensor sample_bg_pts_on_rays(
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
const float bg_preserve, const int N_samples) {
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(t_max);
return sample_bg_pts_on_rays_cuda(rays_o, rays_d, t_max, bg_preserve, N_samples);
}
torch::Tensor maskcache_lookup(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift) {
CHECK_INPUT(world);
CHECK_INPUT(xyz);
CHECK_INPUT(xyz2ijk_scale);
CHECK_INPUT(xyz2ijk_shift);
assert(world.dim()==3);
assert(xyz.dim()==2);
assert(xyz.size(1)==3);
return maskcache_lookup_cuda(world, xyz, xyz2ijk_scale, xyz2ijk_shift);
}
std::vector<torch::Tensor> raw2alpha(torch::Tensor density, const float shift, const float interval) {
CHECK_INPUT(density);
assert(density.dim()==1);
return raw2alpha_cuda(density, shift, interval);
}
std::vector<torch::Tensor> raw2alpha_nonuni(torch::Tensor density, const float shift, torch::Tensor interval) {
CHECK_INPUT(density);
assert(density.dim()==1);
return raw2alpha_nonuni_cuda(density, shift, interval);
}
torch::Tensor raw2alpha_backward(torch::Tensor exp, torch::Tensor grad_back, const float interval) {
CHECK_INPUT(exp);
CHECK_INPUT(grad_back);
return raw2alpha_backward_cuda(exp, grad_back, interval);
}
torch::Tensor raw2alpha_nonuni_backward(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval) {
CHECK_INPUT(exp);
CHECK_INPUT(grad_back);
return raw2alpha_nonuni_backward_cuda(exp, grad_back, interval);
}
std::vector<torch::Tensor> alpha2weight(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays) {
CHECK_INPUT(alpha);
CHECK_INPUT(ray_id);
assert(alpha.dim()==1);
assert(ray_id.dim()==1);
assert(alpha.sizes()==ray_id.sizes());
return alpha2weight_cuda(alpha, ray_id, n_rays);
}
torch::Tensor alpha2weight_backward(
torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
torch::Tensor grad_weights, torch::Tensor grad_last) {
CHECK_INPUT(alpha);
CHECK_INPUT(weight);
CHECK_INPUT(T);
CHECK_INPUT(alphainv_last);
CHECK_INPUT(i_start);
CHECK_INPUT(i_end);
CHECK_INPUT(grad_weights);
CHECK_INPUT(grad_last);
return alpha2weight_backward_cuda(
alpha, weight, T, alphainv_last,
i_start, i_end, n_rays,
grad_weights, grad_last);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("infer_t_minmax", &infer_t_minmax, "Inference t_min and t_max of ray-bbox intersection");
m.def("infer_n_samples", &infer_n_samples, "Inference the number of points to sample on each ray");
m.def("infer_ray_start_dir", &infer_ray_start_dir, "Inference the starting point and shooting direction of each ray");
m.def("sample_pts_on_rays", &sample_pts_on_rays, "Sample points on rays");
m.def("sample_ndc_pts_on_rays", &sample_ndc_pts_on_rays, "Sample points on rays");
m.def("sample_bg_pts_on_rays", &sample_bg_pts_on_rays, "Sample points on bg");
m.def("maskcache_lookup", &maskcache_lookup, "Lookup to skip know freespace.");
m.def("raw2alpha", &raw2alpha, "Raw values [-inf, inf] to alpha [0, 1].");
m.def("raw2alpha_backward", &raw2alpha_backward, "Backward pass of the raw to alpha");
m.def("raw2alpha_nonuni", &raw2alpha_nonuni, "Raw values [-inf, inf] to alpha [0, 1].");
m.def("raw2alpha_nonuni_backward", &raw2alpha_nonuni_backward, "Backward pass of the raw to alpha");
m.def("alpha2weight", &alpha2weight, "Per-point alpha to accumulated blending weight");
m.def("alpha2weight_backward", &alpha2weight_backward, "Backward pass of alpha2weight");
}

View File

@@ -0,0 +1,707 @@
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
/*
Points sampling helper functions.
*/
template <typename scalar_t>
__global__ void infer_t_minmax_cuda_kernel(
scalar_t* __restrict__ rays_o,
scalar_t* __restrict__ rays_d,
scalar_t* __restrict__ xyz_min,
scalar_t* __restrict__ xyz_max,
const float near, const float far, const int n_rays,
scalar_t* __restrict__ t_min,
scalar_t* __restrict__ t_max) {
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
if(i_ray<n_rays) {
const int offset = i_ray * 3;
float vx = ((rays_d[offset ]==0) ? 1e-6 : rays_d[offset ]);
float vy = ((rays_d[offset+1]==0) ? 1e-6 : rays_d[offset+1]);
float vz = ((rays_d[offset+2]==0) ? 1e-6 : rays_d[offset+2]);
float ax = (xyz_max[0] - rays_o[offset ]) / vx;
float ay = (xyz_max[1] - rays_o[offset+1]) / vy;
float az = (xyz_max[2] - rays_o[offset+2]) / vz;
float bx = (xyz_min[0] - rays_o[offset ]) / vx;
float by = (xyz_min[1] - rays_o[offset+1]) / vy;
float bz = (xyz_min[2] - rays_o[offset+2]) / vz;
t_min[i_ray] = max(min(max(max(min(ax, bx), min(ay, by)), min(az, bz)), far), near);
t_max[i_ray] = max(min(min(min(max(ax, bx), max(ay, by)), max(az, bz)), far), near);
}
}
template <typename scalar_t>
__global__ void infer_n_samples_cuda_kernel(
scalar_t* __restrict__ rays_d,
scalar_t* __restrict__ t_min,
scalar_t* __restrict__ t_max,
const float stepdist,
const int n_rays,
int64_t* __restrict__ n_samples) {
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
if(i_ray<n_rays) {
const int offset = i_ray * 3;
const float rnorm = sqrt(
rays_d[offset ]*rays_d[offset ] +\
rays_d[offset+1]*rays_d[offset+1] +\
rays_d[offset+2]*rays_d[offset+2]);
// at least 1 point for easier implementation in the later sample_pts_on_rays_cuda
n_samples[i_ray] = max(ceil((t_max[i_ray]-t_min[i_ray]) * rnorm / stepdist), 1.);
}
}
template <typename scalar_t>
__global__ void infer_ray_start_dir_cuda_kernel(
scalar_t* __restrict__ rays_o,
scalar_t* __restrict__ rays_d,
scalar_t* __restrict__ t_min,
const int n_rays,
scalar_t* __restrict__ rays_start,
scalar_t* __restrict__ rays_dir) {
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
if(i_ray<n_rays) {
const int offset = i_ray * 3;
const float rnorm = sqrt(
rays_d[offset ]*rays_d[offset ] +\
rays_d[offset+1]*rays_d[offset+1] +\
rays_d[offset+2]*rays_d[offset+2]);
rays_start[offset ] = rays_o[offset ] + rays_d[offset ] * t_min[i_ray];
rays_start[offset+1] = rays_o[offset+1] + rays_d[offset+1] * t_min[i_ray];
rays_start[offset+2] = rays_o[offset+2] + rays_d[offset+2] * t_min[i_ray];
rays_dir [offset ] = rays_d[offset ] / rnorm;
rays_dir [offset+1] = rays_d[offset+1] / rnorm;
rays_dir [offset+2] = rays_d[offset+2] / rnorm;
}
}
std::vector<torch::Tensor> infer_t_minmax_cuda(
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
const float near, const float far) {
const int n_rays = rays_o.size(0);
auto t_min = torch::empty({n_rays}, rays_o.options());
auto t_max = torch::empty({n_rays}, rays_o.options());
const int threads = 256;
const int blocks = (n_rays + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "infer_t_minmax_cuda", ([&] {
infer_t_minmax_cuda_kernel<scalar_t><<<blocks, threads>>>(
rays_o.data<scalar_t>(),
rays_d.data<scalar_t>(),
xyz_min.data<scalar_t>(),
xyz_max.data<scalar_t>(),
near, far, n_rays,
t_min.data<scalar_t>(),
t_max.data<scalar_t>());
}));
return {t_min, t_max};
}
torch::Tensor infer_n_samples_cuda(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist) {
const int n_rays = t_min.size(0);
auto n_samples = torch::empty({n_rays}, torch::dtype(torch::kInt64).device(torch::kCUDA));
const int threads = 256;
const int blocks = (n_rays + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(t_min.type(), "infer_n_samples_cuda", ([&] {
infer_n_samples_cuda_kernel<scalar_t><<<blocks, threads>>>(
rays_d.data<scalar_t>(),
t_min.data<scalar_t>(),
t_max.data<scalar_t>(),
stepdist,
n_rays,
n_samples.data<int64_t>());
}));
return n_samples;
}
std::vector<torch::Tensor> infer_ray_start_dir_cuda(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min) {
const int n_rays = rays_o.size(0);
const int threads = 256;
const int blocks = (n_rays + threads - 1) / threads;
auto rays_start = torch::empty_like(rays_o);
auto rays_dir = torch::empty_like(rays_o);
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "infer_ray_start_dir_cuda", ([&] {
infer_ray_start_dir_cuda_kernel<scalar_t><<<blocks, threads>>>(
rays_o.data<scalar_t>(),
rays_d.data<scalar_t>(),
t_min.data<scalar_t>(),
n_rays,
rays_start.data<scalar_t>(),
rays_dir.data<scalar_t>());
}));
return {rays_start, rays_dir};
}
/*
Sampling query points on rays.
*/
__global__ void __set_1_at_ray_seg_start(
int64_t* __restrict__ ray_id,
int64_t* __restrict__ N_steps_cumsum,
const int n_rays) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(0<idx && idx<n_rays) {
ray_id[N_steps_cumsum[idx-1]] = 1;
}
}
__global__ void __set_step_id(
int64_t* __restrict__ step_id,
int64_t* __restrict__ ray_id,
int64_t* __restrict__ N_steps_cumsum,
const int total_len) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx<total_len) {
const int rid = ray_id[idx];
step_id[idx] = idx - ((rid!=0) ? N_steps_cumsum[rid-1] : 0);
}
}
template <typename scalar_t>
__global__ void sample_pts_on_rays_cuda_kernel(
scalar_t* __restrict__ rays_start,
scalar_t* __restrict__ rays_dir,
scalar_t* __restrict__ xyz_min,
scalar_t* __restrict__ xyz_max,
int64_t* __restrict__ ray_id,
int64_t* __restrict__ step_id,
const float stepdist, const int total_len,
scalar_t* __restrict__ rays_pts,
bool* __restrict__ mask_outbbox) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx<total_len) {
const int i_ray = ray_id[idx];
const int i_step = step_id[idx];
const int offset_p = idx * 3;
const int offset_r = i_ray * 3;
const float dist = stepdist * i_step;
const float px = rays_start[offset_r ] + rays_dir[offset_r ] * dist;
const float py = rays_start[offset_r+1] + rays_dir[offset_r+1] * dist;
const float pz = rays_start[offset_r+2] + rays_dir[offset_r+2] * dist;
rays_pts[offset_p ] = px;
rays_pts[offset_p+1] = py;
rays_pts[offset_p+2] = pz;
mask_outbbox[idx] = (xyz_min[0]>px) | (xyz_min[1]>py) | (xyz_min[2]>pz) | \
(xyz_max[0]<px) | (xyz_max[1]<py) | (xyz_max[2]<pz);
}
}
std::vector<torch::Tensor> sample_pts_on_rays_cuda(
torch::Tensor rays_o, torch::Tensor rays_d,
torch::Tensor xyz_min, torch::Tensor xyz_max,
const float near, const float far, const float stepdist) {
const int threads = 256;
const int n_rays = rays_o.size(0);
// Compute ray-bbox intersection
auto t_minmax = infer_t_minmax_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far);
auto t_min = t_minmax[0];
auto t_max = t_minmax[1];
// Compute the number of points required.
// Assign ray index and step index to each.
auto N_steps = infer_n_samples_cuda(rays_d, t_min, t_max, stepdist);
auto N_steps_cumsum = N_steps.cumsum(0);
const int total_len = N_steps.sum().item<int>();
auto ray_id = torch::zeros({total_len}, torch::dtype(torch::kInt64).device(torch::kCUDA));
__set_1_at_ray_seg_start<<<(n_rays+threads-1)/threads, threads>>>(
ray_id.data<int64_t>(), N_steps_cumsum.data<int64_t>(), n_rays);
ray_id.cumsum_(0);
auto step_id = torch::empty({total_len}, ray_id.options());
__set_step_id<<<(total_len+threads-1)/threads, threads>>>(
step_id.data<int64_t>(), ray_id.data<int64_t>(), N_steps_cumsum.data<int64_t>(), total_len);
// Compute the global xyz of each point
auto rays_start_dir = infer_ray_start_dir_cuda(rays_o, rays_d, t_min);
auto rays_start = rays_start_dir[0];
auto rays_dir = rays_start_dir[1];
auto rays_pts = torch::empty({total_len, 3}, torch::dtype(rays_o.dtype()).device(torch::kCUDA));
auto mask_outbbox = torch::empty({total_len}, torch::dtype(torch::kBool).device(torch::kCUDA));
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "sample_pts_on_rays_cuda", ([&] {
sample_pts_on_rays_cuda_kernel<scalar_t><<<(total_len+threads-1)/threads, threads>>>(
rays_start.data<scalar_t>(),
rays_dir.data<scalar_t>(),
xyz_min.data<scalar_t>(),
xyz_max.data<scalar_t>(),
ray_id.data<int64_t>(),
step_id.data<int64_t>(),
stepdist, total_len,
rays_pts.data<scalar_t>(),
mask_outbbox.data<bool>());
}));
return {rays_pts, mask_outbbox, ray_id, step_id, N_steps, t_min, t_max};
}
template <typename scalar_t>
__global__ void sample_ndc_pts_on_rays_cuda_kernel(
const scalar_t* __restrict__ rays_o,
const scalar_t* __restrict__ rays_d,
const scalar_t* __restrict__ xyz_min,
const scalar_t* __restrict__ xyz_max,
const int N_samples, const int n_rays,
scalar_t* __restrict__ rays_pts,
bool* __restrict__ mask_outbbox) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx<N_samples*n_rays) {
const int i_ray = idx / N_samples;
const int i_step = idx % N_samples;
const int offset_p = idx * 3;
const int offset_r = i_ray * 3;
const float dist = ((float)i_step) / (N_samples-1);
const float px = rays_o[offset_r ] + rays_d[offset_r ] * dist;
const float py = rays_o[offset_r+1] + rays_d[offset_r+1] * dist;
const float pz = rays_o[offset_r+2] + rays_d[offset_r+2] * dist;
rays_pts[offset_p ] = px;
rays_pts[offset_p+1] = py;
rays_pts[offset_p+2] = pz;
mask_outbbox[idx] = (xyz_min[0]>px) | (xyz_min[1]>py) | (xyz_min[2]>pz) | \
(xyz_max[0]<px) | (xyz_max[1]<py) | (xyz_max[2]<pz);
}
}
std::vector<torch::Tensor> sample_ndc_pts_on_rays_cuda(
torch::Tensor rays_o, torch::Tensor rays_d,
torch::Tensor xyz_min, torch::Tensor xyz_max,
const int N_samples) {
const int threads = 256;
const int n_rays = rays_o.size(0);
auto rays_pts = torch::empty({n_rays, N_samples, 3}, torch::dtype(rays_o.dtype()).device(torch::kCUDA));
auto mask_outbbox = torch::empty({n_rays, N_samples}, torch::dtype(torch::kBool).device(torch::kCUDA));
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "sample_ndc_pts_on_rays_cuda", ([&] {
sample_ndc_pts_on_rays_cuda_kernel<scalar_t><<<(n_rays*N_samples+threads-1)/threads, threads>>>(
rays_o.data<scalar_t>(),
rays_d.data<scalar_t>(),
xyz_min.data<scalar_t>(),
xyz_max.data<scalar_t>(),
N_samples, n_rays,
rays_pts.data<scalar_t>(),
mask_outbbox.data<bool>());
}));
return {rays_pts, mask_outbbox};
}
template <typename scalar_t>
__device__ __forceinline__ scalar_t norm3(const scalar_t x, const scalar_t y, const scalar_t z) {
return sqrt(x*x + y*y + z*z);
}
template <typename scalar_t>
__global__ void sample_bg_pts_on_rays_cuda_kernel(
const scalar_t* __restrict__ rays_o,
const scalar_t* __restrict__ rays_d,
const scalar_t* __restrict__ t_max,
const float bg_preserve,
const int N_samples, const int n_rays,
scalar_t* __restrict__ rays_pts) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx<N_samples*n_rays) {
const int i_ray = idx / N_samples;
const int i_step = idx % N_samples;
const int offset_p = idx * 3;
const int offset_r = i_ray * 3;
/* Original pytorch implementation
ori_t_outer = t_max[:,None] - 1 + 1 / torch.linspace(1, 0, N_outer+1)[:-1]
ori_ray_pts_outer = (rays_o[:,None,:] + rays_d[:,None,:] * ori_t_outer[:,:,None]).reshape(-1,3)
t_outer = ori_ray_pts_outer.norm(dim=-1)
R_outer = t_outer / ori_ray_pts_outer.abs().amax(1)
# r = R * R / t
o2i_p = R_outer.pow(2) / t_outer.pow(2) * (1-self.bg_preserve) + R_outer / t_outer * self.bg_preserve
ray_pts_outer = (ori_ray_pts_outer * o2i_p[:,None]).reshape(len(rays_o), -1, 3)
*/
const float t_inner = t_max[i_ray];
const float ori_t_outer = t_inner - 1. + 1. / (1. - ((float)i_step) / N_samples);
const float ori_ray_pts_x = rays_o[offset_r ] + rays_d[offset_r ] * ori_t_outer;
const float ori_ray_pts_y = rays_o[offset_r+1] + rays_d[offset_r+1] * ori_t_outer;
const float ori_ray_pts_z = rays_o[offset_r+2] + rays_d[offset_r+2] * ori_t_outer;
const float t_outer = norm3(ori_ray_pts_x, ori_ray_pts_y, ori_ray_pts_z);
const float ori_ray_pts_m = max(abs(ori_ray_pts_x), max(abs(ori_ray_pts_y), abs(ori_ray_pts_z)));
const float R_outer = t_outer / ori_ray_pts_m;
const float o2i_p = R_outer*R_outer / (t_outer*t_outer) * (1.-bg_preserve) + R_outer / t_outer * bg_preserve;
const float px = ori_ray_pts_x * o2i_p;
const float py = ori_ray_pts_y * o2i_p;
const float pz = ori_ray_pts_z * o2i_p;
rays_pts[offset_p ] = px;
rays_pts[offset_p+1] = py;
rays_pts[offset_p+2] = pz;
}
}
torch::Tensor sample_bg_pts_on_rays_cuda(
torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
const float bg_preserve, const int N_samples) {
const int threads = 256;
const int n_rays = rays_o.size(0);
auto rays_pts = torch::empty({n_rays, N_samples, 3}, torch::dtype(rays_o.dtype()).device(torch::kCUDA));
AT_DISPATCH_FLOATING_TYPES(rays_o.type(), "sample_bg_pts_on_rays_cuda", ([&] {
sample_bg_pts_on_rays_cuda_kernel<scalar_t><<<(n_rays*N_samples+threads-1)/threads, threads>>>(
rays_o.data<scalar_t>(),
rays_d.data<scalar_t>(),
t_max.data<scalar_t>(),
bg_preserve,
N_samples, n_rays,
rays_pts.data<scalar_t>());
}));
return rays_pts;
}
/*
MaskCache lookup to skip known freespace.
*/
static __forceinline__ __device__
bool check_xyz(int i, int j, int k, int sz_i, int sz_j, int sz_k) {
return (0 <= i) && (i < sz_i) && (0 <= j) && (j < sz_j) && (0 <= k) && (k < sz_k);
}
template <typename scalar_t>
__global__ void maskcache_lookup_cuda_kernel(
bool* __restrict__ world,
scalar_t* __restrict__ xyz,
bool* __restrict__ out,
scalar_t* __restrict__ xyz2ijk_scale,
scalar_t* __restrict__ xyz2ijk_shift,
const int sz_i, const int sz_j, const int sz_k, const int n_pts) {
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
if(i_pt<n_pts) {
const int offset = i_pt * 3;
const int i = round(xyz[offset ] * xyz2ijk_scale[0] + xyz2ijk_shift[0]);
const int j = round(xyz[offset+1] * xyz2ijk_scale[1] + xyz2ijk_shift[1]);
const int k = round(xyz[offset+2] * xyz2ijk_scale[2] + xyz2ijk_shift[2]);
if(check_xyz(i, j, k, sz_i, sz_j, sz_k)) {
out[i_pt] = world[i*sz_j*sz_k + j*sz_k + k];
}
}
}
torch::Tensor maskcache_lookup_cuda(
torch::Tensor world,
torch::Tensor xyz,
torch::Tensor xyz2ijk_scale,
torch::Tensor xyz2ijk_shift) {
const int sz_i = world.size(0);
const int sz_j = world.size(1);
const int sz_k = world.size(2);
const int n_pts = xyz.size(0);
auto out = torch::zeros({n_pts}, torch::dtype(torch::kBool).device(torch::kCUDA));
if(n_pts==0) {
return out;
}
const int threads = 256;
const int blocks = (n_pts + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(xyz.type(), "maskcache_lookup_cuda", ([&] {
maskcache_lookup_cuda_kernel<scalar_t><<<blocks, threads>>>(
world.data<bool>(),
xyz.data<scalar_t>(),
out.data<bool>(),
xyz2ijk_scale.data<scalar_t>(),
xyz2ijk_shift.data<scalar_t>(),
sz_i, sz_j, sz_k, n_pts);
}));
return out;
}
/*
Ray marching helper function.
*/
template <typename scalar_t>
__global__ void raw2alpha_cuda_kernel(
scalar_t* __restrict__ density,
const float shift, const float interval, const int n_pts,
scalar_t* __restrict__ exp_d,
scalar_t* __restrict__ alpha) {
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
if(i_pt<n_pts) {
const scalar_t e = exp(density[i_pt] + shift); // can be inf
exp_d[i_pt] = e;
alpha[i_pt] = 1 - pow(1 + e, -interval);
}
}
template <typename scalar_t>
__global__ void raw2alpha_nonuni_cuda_kernel(
scalar_t* __restrict__ density,
const float shift, scalar_t* __restrict__ interval, const int n_pts,
scalar_t* __restrict__ exp_d,
scalar_t* __restrict__ alpha) {
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
if(i_pt<n_pts) {
const scalar_t e = exp(density[i_pt] + shift); // can be inf
exp_d[i_pt] = e;
alpha[i_pt] = 1 - pow(1 + e, -interval[i_pt]);
}
}
std::vector<torch::Tensor> raw2alpha_cuda(torch::Tensor density, const float shift, const float interval) {
const int n_pts = density.size(0);
auto exp_d = torch::empty_like(density);
auto alpha = torch::empty_like(density);
if(n_pts==0) {
return {exp_d, alpha};
}
const int threads = 256;
const int blocks = (n_pts + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(density.type(), "raw2alpha_cuda", ([&] {
raw2alpha_cuda_kernel<scalar_t><<<blocks, threads>>>(
density.data<scalar_t>(),
shift, interval, n_pts,
exp_d.data<scalar_t>(),
alpha.data<scalar_t>());
}));
return {exp_d, alpha};
}
std::vector<torch::Tensor> raw2alpha_nonuni_cuda(torch::Tensor density, const float shift, torch::Tensor interval) {
const int n_pts = density.size(0);
auto exp_d = torch::empty_like(density);
auto alpha = torch::empty_like(density);
if(n_pts==0) {
return {exp_d, alpha};
}
const int threads = 256;
const int blocks = (n_pts + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(density.type(), "raw2alpha_cuda", ([&] {
raw2alpha_nonuni_cuda_kernel<scalar_t><<<blocks, threads>>>(
density.data<scalar_t>(),
shift, interval.data<scalar_t>(), n_pts,
exp_d.data<scalar_t>(),
alpha.data<scalar_t>());
}));
return {exp_d, alpha};
}
template <typename scalar_t>
__global__ void raw2alpha_backward_cuda_kernel(
scalar_t* __restrict__ exp_d,
scalar_t* __restrict__ grad_back,
const float interval, const int n_pts,
scalar_t* __restrict__ grad) {
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
if(i_pt<n_pts) {
grad[i_pt] = min(exp_d[i_pt], 1e10) * pow(1+exp_d[i_pt], -interval-1) * interval * grad_back[i_pt];
}
}
template <typename scalar_t>
__global__ void raw2alpha_nonuni_backward_cuda_kernel(
scalar_t* __restrict__ exp_d,
scalar_t* __restrict__ grad_back,
scalar_t* __restrict__ interval, const int n_pts,
scalar_t* __restrict__ grad) {
const int i_pt = blockIdx.x * blockDim.x + threadIdx.x;
if(i_pt<n_pts) {
grad[i_pt] = min(exp_d[i_pt], 1e10) * pow(1+exp_d[i_pt], -interval[i_pt]-1) * interval[i_pt] * grad_back[i_pt];
}
}
torch::Tensor raw2alpha_backward_cuda(torch::Tensor exp_d, torch::Tensor grad_back, const float interval) {
const int n_pts = exp_d.size(0);
auto grad = torch::empty_like(exp_d);
if(n_pts==0) {
return grad;
}
const int threads = 256;
const int blocks = (n_pts + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(exp_d.type(), "raw2alpha_backward_cuda", ([&] {
raw2alpha_backward_cuda_kernel<scalar_t><<<blocks, threads>>>(
exp_d.data<scalar_t>(),
grad_back.data<scalar_t>(),
interval, n_pts,
grad.data<scalar_t>());
}));
return grad;
}
torch::Tensor raw2alpha_nonuni_backward_cuda(torch::Tensor exp_d, torch::Tensor grad_back, torch::Tensor interval) {
const int n_pts = exp_d.size(0);
auto grad = torch::empty_like(exp_d);
if(n_pts==0) {
return grad;
}
const int threads = 256;
const int blocks = (n_pts + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(exp_d.type(), "raw2alpha_backward_cuda", ([&] {
raw2alpha_nonuni_backward_cuda_kernel<scalar_t><<<blocks, threads>>>(
exp_d.data<scalar_t>(),
grad_back.data<scalar_t>(),
interval.data<scalar_t>(), n_pts,
grad.data<scalar_t>());
}));
return grad;
}
template <typename scalar_t>
__global__ void alpha2weight_cuda_kernel(
scalar_t* __restrict__ alpha,
const int n_rays,
scalar_t* __restrict__ weight,
scalar_t* __restrict__ T,
scalar_t* __restrict__ alphainv_last,
int64_t* __restrict__ i_start,
int64_t* __restrict__ i_end) {
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
if(i_ray<n_rays) {
const int i_s = i_start[i_ray];
const int i_e_max = i_end[i_ray];
float T_cum = 1.;
int i;
for(i=i_s; i<i_e_max; ++i) {
T[i] = T_cum;
weight[i] = T_cum * alpha[i];
T_cum *= (1. - alpha[i]);
if(T_cum<1e-3) {
i+=1;
break;
}
}
i_end[i_ray] = i;
alphainv_last[i_ray] = T_cum;
}
}
__global__ void __set_i_for_segment_start_end(
int64_t* __restrict__ ray_id,
const int n_pts,
int64_t* __restrict__ i_start,
int64_t* __restrict__ i_end) {
const int index = blockIdx.x * blockDim.x + threadIdx.x;
if(0<index && index<n_pts && ray_id[index]!=ray_id[index-1]) {
i_start[ray_id[index]] = index;
i_end[ray_id[index-1]] = index;
}
}
std::vector<torch::Tensor> alpha2weight_cuda(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays) {
const int n_pts = alpha.size(0);
const int threads = 256;
auto weight = torch::zeros_like(alpha);
auto T = torch::ones_like(alpha);
auto alphainv_last = torch::ones({n_rays}, alpha.options());
auto i_start = torch::zeros({n_rays}, torch::dtype(torch::kInt64).device(torch::kCUDA));
auto i_end = torch::zeros({n_rays}, torch::dtype(torch::kInt64).device(torch::kCUDA));
if(n_pts==0) {
return {weight, T, alphainv_last, i_start, i_end};
}
__set_i_for_segment_start_end<<<(n_pts+threads-1)/threads, threads>>>(
ray_id.data<int64_t>(), n_pts, i_start.data<int64_t>(), i_end.data<int64_t>());
i_end[ray_id[n_pts-1]] = n_pts;
const int blocks = (n_rays + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(alpha.type(), "alpha2weight_cuda", ([&] {
alpha2weight_cuda_kernel<scalar_t><<<blocks, threads>>>(
alpha.data<scalar_t>(),
n_rays,
weight.data<scalar_t>(),
T.data<scalar_t>(),
alphainv_last.data<scalar_t>(),
i_start.data<int64_t>(),
i_end.data<int64_t>());
}));
return {weight, T, alphainv_last, i_start, i_end};
}
template <typename scalar_t>
__global__ void alpha2weight_backward_cuda_kernel(
scalar_t* __restrict__ alpha,
scalar_t* __restrict__ weight,
scalar_t* __restrict__ T,
scalar_t* __restrict__ alphainv_last,
int64_t* __restrict__ i_start,
int64_t* __restrict__ i_end,
const int n_rays,
scalar_t* __restrict__ grad_weights,
scalar_t* __restrict__ grad_last,
scalar_t* __restrict__ grad) {
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
if(i_ray<n_rays) {
const int i_s = i_start[i_ray];
const int i_e = i_end[i_ray];
float back_cum = grad_last[i_ray] * alphainv_last[i_ray];
for(int i=i_e-1; i>=i_s; --i) {
grad[i] = grad_weights[i] * T[i] - back_cum / (1-alpha[i] + 1e-10);
back_cum += grad_weights[i] * weight[i];
}
}
}
torch::Tensor alpha2weight_backward_cuda(
torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
torch::Tensor grad_weights, torch::Tensor grad_last) {
auto grad = torch::zeros_like(alpha);
if(n_rays==0) {
return grad;
}
const int threads = 256;
const int blocks = (n_rays + threads - 1) / threads;
AT_DISPATCH_FLOATING_TYPES(alpha.type(), "alpha2weight_backward_cuda", ([&] {
alpha2weight_backward_cuda_kernel<scalar_t><<<blocks, threads>>>(
alpha.data<scalar_t>(),
weight.data<scalar_t>(),
T.data<scalar_t>(),
alphainv_last.data<scalar_t>(),
i_start.data<int64_t>(),
i_end.data<int64_t>(),
n_rays,
grad_weights.data<scalar_t>(),
grad_last.data<scalar_t>(),
grad.data<scalar_t>());
}));
return grad;
}

View File

@@ -0,0 +1,22 @@
#include <torch/extension.h>
#include <vector>
void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode);
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
void total_variation_add_grad(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) {
CHECK_INPUT(param);
CHECK_INPUT(grad);
total_variation_add_grad_cuda(param, grad, wx, wy, wz, dense_mode);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("total_variation_add_grad", &total_variation_add_grad, "Add total variation grad");
}

View File

@@ -0,0 +1,67 @@
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
template <typename scalar_t, typename bound_t>
__device__ __forceinline__ scalar_t clamp(const scalar_t v, const bound_t lo, const bound_t hi) {
return min(max(v, lo), hi);
}
template <typename scalar_t, bool dense_mode>
__global__ void total_variation_add_grad_cuda_kernel(
const scalar_t* __restrict__ param,
scalar_t* __restrict__ grad,
float wx, float wy, float wz,
const size_t sz_i, const size_t sz_j, const size_t sz_k, const size_t N) {
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
if(index<N && (dense_mode || grad[index]!=0)) {
const size_t k = index % sz_k;
const size_t j = index / sz_k % sz_j;
const size_t i = index / sz_k / sz_j % sz_i;
float grad_to_add = 0;
grad_to_add += (k==0 ? 0 : wx * clamp(param[index]-param[index-1], -1.f, 1.f));
grad_to_add += (k==sz_k-1 ? 0 : wx * clamp(param[index]-param[index+1], -1.f, 1.f));
grad_to_add += (j==0 ? 0 : wy * clamp(param[index]-param[index-sz_k], -1.f, 1.f));
grad_to_add += (j==sz_j-1 ? 0 : wy * clamp(param[index]-param[index+sz_k], -1.f, 1.f));
grad_to_add += (i==0 ? 0 : wz * clamp(param[index]-param[index-sz_k*sz_j], -1.f, 1.f));
grad_to_add += (i==sz_i-1 ? 0 : wz * clamp(param[index]-param[index+sz_k*sz_j], -1.f, 1.f));
grad[index] += grad_to_add;
}
}
void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) {
const size_t N = param.numel();
const size_t sz_i = param.size(2);
const size_t sz_j = param.size(3);
const size_t sz_k = param.size(4);
const int threads = 256;
const int blocks = (N + threads - 1) / threads;
wx /= 6;
wy /= 6;
wz /= 6;
if(dense_mode) {
AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] {
total_variation_add_grad_cuda_kernel<scalar_t,true><<<blocks, threads>>>(
param.data<scalar_t>(),
grad.data<scalar_t>(),
wx, wy, wz,
sz_i, sz_j, sz_k, N);
}));
}
else {
AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] {
total_variation_add_grad_cuda_kernel<scalar_t,false><<<blocks, threads>>>(
param.data<scalar_t>(),
grad.data<scalar_t>(),
wx, wy, wz,
sz_i, sz_j, sz_k, N);
}));
}
}

View File

@@ -0,0 +1,20 @@
#include <torch/extension.h>
#include <vector>
torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres);
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
torch::Tensor cumdist_thres(torch::Tensor dist, float thres) {
CHECK_INPUT(dist);
return cumdist_thres_cuda(dist, thres);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("cumdist_thres", &cumdist_thres, "Generate mask for cumulative dist.");
}

View File

@@ -0,0 +1,47 @@
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
/*
helper function to skip oversampled points,
especially near the foreground scene bbox boundary
*/
template <typename scalar_t>
__global__ void cumdist_thres_cuda_kernel(
scalar_t* __restrict__ dist,
const float thres,
const int n_rays,
const int n_pts,
bool* __restrict__ mask) {
const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
if(i_ray<n_rays) {
float cum_dist = 0;
const int i_s = i_ray * n_pts;
const int i_t = i_s + n_pts;
int i;
for(i=i_s; i<i_t; ++i) {
cum_dist += dist[i];
bool over = (cum_dist > thres);
cum_dist *= float(!over);
mask[i] = over;
}
}
}
torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres) {
const int n_rays = dist.size(0);
const int n_pts = dist.size(1);
const int threads = 256;
const int blocks = (n_rays + threads - 1) / threads;
auto mask = torch::zeros({n_rays, n_pts}, torch::dtype(torch::kBool).device(torch::kCUDA));
AT_DISPATCH_FLOATING_TYPES(dist.type(), "cumdist_thres_cuda", ([&] {
cumdist_thres_cuda_kernel<scalar_t><<<blocks, threads>>>(
dist.data<scalar_t>(), thres,
n_rays, n_pts,
mask.data<bool>());
}));
return mask;
}

View File

@@ -103,6 +103,7 @@ if TYPE_CHECKING:
from .mobile_image_super_resolution_pipeline import MobileImageSuperResolutionPipeline
from .image_human_parsing_pipeline import ImageHumanParsingPipeline
from .nerf_recon_acc_pipeline import NeRFReconAccPipeline
from .nerf_recon_4k_pipeline import NeRFRecon4KPipeline
from .controllable_image_generation_pipeline import ControllableImageGenerationPipeline
from .image_bts_depth_estimation_pipeline import ImageBTSDepthEstimationPipeline
from .pedestrian_attribute_recognition_pipeline import PedestrainAttributeRecognitionPipeline
@@ -254,6 +255,7 @@ else:
'bad_image_detecting_pipeline': ['BadImageDetecingPipeline'],
'image_human_parsing_pipeline': ['ImageHumanParsingPipeline'],
'nerf_recon_acc_pipeline': ['NeRFReconAccPipeline'],
'nerf_recon_4k_pipeline': ['NeRFRecon4KPipeline'],
'controllable_image_generation_pipeline': [
'ControllableImageGenerationPipeline'
],

View File

@@ -0,0 +1,87 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict
from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, Model, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.pipelines.util import is_model, is_official_hub_path
from modelscope.utils.constant import Invoke, Tasks
from modelscope.utils.logger import get_logger
logger = get_logger()
@PIPELINES.register_module(
Tasks.nerf_recon_4k, module_name=Pipelines.nerf_recon_4k)
class NeRFRecon4KPipeline(Pipeline):
""" NeRF reconstruction acceleration pipeline
Example:
```python
>>> from modelscope.pipelines import pipeline
>>> nerf_recon_acc = pipeline(Tasks.nerf_recon_acc,
'damo/cv_nerf-3d-reconstruction-accelerate_damo')
>>> nerf_recon_acc({
'data_dir': '/data/lego', # data dir path (str)
'render_dir': 'save_dir', # save dir path (str)
})
>>> #
```
"""
def __init__(self,
model,
data_type='blender',
test_ray_chunk=8192,
test_tile=510,
stepsize=1.0,
factor=4,
load_sr=1,
device='gpu',
**kwargs):
"""
use model to create a image sky change pipeline for image editing
Args:
model (str or Model): model_id on modelscope hub
data_type (str): currently only support 'blender' and 'colmap'
use_mask (bool): segment the object or not
ckpt_path (str): the checkpoint ckpt_path
save_mesh (bool): render mesh or not
n_test_traj_steps (int): number of random sampled images for test view, only for colmap data.
test_ray_chunk (int): ray chunk size for test, avoid GPU OOM
device (str): only support gpu
"""
model = Model.from_pretrained(
model,
device=device,
model_prefetched=True,
invoked_by=Invoke.PIPELINE,
data_type=data_type,
test_ray_chunk=test_ray_chunk,
test_tile=test_tile,
stepsize=stepsize,
factor=factor,
load_sr=load_sr) if is_model(model) else model
super().__init__(model=model, **kwargs)
if not isinstance(self.model, Model):
logger.error('model object is not initialized.')
raise Exception('model object is not initialized.')
self.data_type = data_type
if self.data_type != 'blender' and self.data_type != 'llff':
raise Exception('data type {} is not support currently'.format(
self.data_type))
logger.info('load model done')
def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
data_cfg = input['data_cfg']
render_dir = input['render_dir']
self.model.nerf_reconstruction(data_cfg, render_dir)
return {OutputKeys.OUTPUT: 'Done'}
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

View File

@@ -15,6 +15,7 @@ from torch.utils.data import DataLoader, Dataset, Sampler
from torch.utils.data.dataloader import default_collate
from torch.utils.data.distributed import DistributedSampler
from modelscope.hub.check_model import check_local_model_is_latest
from modelscope.metainfo import Trainers
from modelscope.metrics import build_metric, task_default_metrics
from modelscope.metrics.prediction_saving_wrapper import \
@@ -27,6 +28,7 @@ from modelscope.msdatasets.dataset_cls.custom_datasets.builder import \
from modelscope.msdatasets.ms_dataset import MsDataset
from modelscope.outputs import ModelOutputBase
from modelscope.preprocessors.base import Preprocessor
from modelscope.swift import Swift
from modelscope.trainers.hooks.builder import HOOKS
from modelscope.trainers.hooks.priority import Priority, get_priority
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
@@ -34,7 +36,7 @@ from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.utils.config import Config, ConfigDict, JSONIteratorEncoder
from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigFields,
ConfigKeys, DistributedParallelType,
ModeKeys, ModelFile, ThirdParty,
Invoke, ModeKeys, ModelFile, ThirdParty,
TrainerStages)
from modelscope.utils.data_utils import to_device
from modelscope.utils.device import create_device
@@ -45,7 +47,6 @@ from modelscope.utils.torch_utils import (compile_model, get_dist_info,
get_local_rank, init_dist, is_dist,
is_master, is_on_same_device,
set_random_seed)
from ..swift import Swift
from .base import BaseTrainer
from .builder import TRAINERS
from .default_config import merge_cfg, merge_hooks, update_cfg
@@ -152,6 +153,10 @@ class EpochBasedTrainer(BaseTrainer):
assert cfg_file is not None, 'Config file should not be None if model is not from pretrained!'
self.model_dir = os.path.dirname(cfg_file)
self.input_model_id = None
if hasattr(model, 'model_dir'):
check_local_model_is_latest(
model.model_dir,
user_agent={Invoke.KEY: Invoke.LOCAL_TRAINER})
super().__init__(cfg_file, arg_parse_fn)
self.cfg_modify_fn = cfg_modify_fn

View File

@@ -154,6 +154,7 @@ class CVTasks(object):
motion_generation = 'motion-generation'
# 3d reconstruction
nerf_recon_acc = 'nerf-recon-acc'
nerf_recon_4k = 'nerf-recon-4k'
nerf_recon_vq_compression = 'nerf-recon-vq-compression'
# vision efficient tuning

View File

@@ -95,8 +95,10 @@ def get_wrapped_class(module_class, ignore_file_pattern=[], **kwargs):
else:
model_dir = pretrained_model_name_or_path
return module_class.from_pretrained(model_dir, *model_args,
model = module_class.from_pretrained(model_dir, *model_args,
**kwargs)
model.model_dir = model_dir
return model
return ClassWrapper

View File

@@ -62,6 +62,7 @@ tensorflow-estimator>=1.15.1
tf_slim
thop
timm>=0.4.9
torch-scatter
torchmetrics>=0.6.2
torchsummary>=1.5.1
torchvision

View File

@@ -0,0 +1,67 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import unittest
import torch
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.msdatasets import MsDataset
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import DownloadMode, Tasks
from modelscope.utils.test_utils import test_level
class NeRFRecon4KTest(unittest.TestCase):
def setUp(self) -> None:
self.model_id = 'DAMOXR/cv_nerf-3d-reconstruction-4k-nerf_damo'
data_dir = MsDataset.load(
'DAMOXR/nerf_llff_data',
subset_name='default',
split='test',
download_mode=DownloadMode.FORCE_REDOWNLOAD
).config_kwargs['split_config']['test']
nerf_llff = os.path.join(data_dir, 'nerf_llff_data')
scene = 'fern'
data_dir = os.path.join(nerf_llff, scene)
self.render_dir = 'exp'
self.data_dic = dict(
datadir=data_dir,
dataset_type='llff',
load_sr=1,
factor=4,
ndc=True,
white_bkgd=False)
# @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
# @unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
# def test_run_by_direct_model_download(self):
# snapshot_path = snapshot_download(self.model_id)
# print('snapshot_path: {}'.format(snapshot_path))
# nerf_recon_4k = pipeline(
# Tasks.nerf_recon_4k,
# model=snapshot_path,
# data_type='llff',
# )
# nerf_recon_4k(
# dict(data_cfg=self.data_dic, render_dir=self.render_dir))
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest only')
def test_run_modelhub(self):
nerf_recon_4k = pipeline(
Tasks.nerf_recon_4k,
model=self.model_id,
data_type='llff',
)
nerf_recon_4k(dict(data_cfg=self.data_dic, render_dir=self.render_dir))
print('4k-nerf_damo.test_run_modelhub done')
if __name__ == '__main__':
unittest.main()