From 2ee65141e6b1153f899f45316a7b5fdc70d3131d Mon Sep 17 00:00:00 2001 From: myf272609 Date: Mon, 25 Sep 2023 21:09:18 +0800 Subject: [PATCH] [to #42322933] add 3dhuman render and animation models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增3D人物模型渲染pipeline 新增3D角色自动驱动pipeline Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14146042 * upload 3dhuman render and animation code * remove chumpy dependence * feat: Fix conflict, auto commit by WebIDE * modify code structure, add user inputs, etc. * add output path --- data/test | 2 +- modelscope/metainfo.py | 6 + modelscope/models/cv/__init__.py | 8 +- .../models/cv/human3d_animation/__init__.py | 28 ++ .../models/cv/human3d_animation/bvh_writer.py | 184 +++++++++ .../cv/human3d_animation/generate_skeleton.py | 167 ++++++++ .../models/cv/human3d_animation/transforms.py | 316 +++++++++++++++ .../models/cv/human3d_animation/utils.py | 375 ++++++++++++++++++ modelscope/outputs/outputs.py | 2 + modelscope/pipeline_inputs.py | 10 + modelscope/pipelines/cv/__init__.py | 4 + .../cv/human3d_animation_pipeline.py | 135 +++++++ .../pipelines/cv/human3d_render_pipeline.py | 169 ++++++++ modelscope/utils/constant.py | 2 + tests/pipelines/test_human3d_animation.py | 32 ++ tests/pipelines/test_human3d_render.py | 56 +++ 16 files changed, 1491 insertions(+), 5 deletions(-) create mode 100644 modelscope/models/cv/human3d_animation/__init__.py create mode 100644 modelscope/models/cv/human3d_animation/bvh_writer.py create mode 100644 modelscope/models/cv/human3d_animation/generate_skeleton.py create mode 100644 modelscope/models/cv/human3d_animation/transforms.py create mode 100644 modelscope/models/cv/human3d_animation/utils.py create mode 100644 modelscope/pipelines/cv/human3d_animation_pipeline.py create mode 100644 modelscope/pipelines/cv/human3d_render_pipeline.py create mode 100644 tests/pipelines/test_human3d_animation.py create mode 100644 tests/pipelines/test_human3d_render.py diff --git a/data/test b/data/test index 85694c76..77a9ad7f 160000 --- a/data/test +++ b/data/test @@ -1 +1 @@ -Subproject commit 85694c76a6c270fcaadeac2cd86503c5e358b028 +Subproject commit 77a9ad7fb3cc4bcc99f4a33822c813e7ab473ba0 diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 6cdfaeaa..f9dad32f 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -449,6 +449,8 @@ class Pipelines(object): text_to_360panorama_image = 'text-to-360panorama-image' image_try_on = 'image-try-on' human_image_generation = 'human-image-generation' + human3d_render = 'human3d-render' + human3d_animation = 'human3d-animation' image_view_transform = 'image-view-transform' image_control_3d_portrait = 'image-control-3d-portrait' @@ -923,6 +925,10 @@ DEFAULT_MODEL_FOR_PIPELINE = { 'damo/cv_SAL-VTON_virtual-try-on'), Tasks.human_image_generation: (Pipelines.human_image_generation, 'damo/cv_FreqHPT_human-image-generation'), + Tasks.human3d_render: (Pipelines.human3d_render, + 'damo/cv_3d-human-synthesis-library'), + Tasks.human3d_animation: (Pipelines.human3d_animation, + 'damo/cv_3d-human-animation'), Tasks.image_view_transform: (Pipelines.image_view_transform, 'damo/cv_image-view-transform'), Tasks.image_control_3d_portrait: ( diff --git a/modelscope/models/cv/__init__.py b/modelscope/models/cv/__init__.py index 5cbee709..3fc455c5 100644 --- a/modelscope/models/cv/__init__.py +++ b/modelscope/models/cv/__init__.py @@ -5,10 +5,10 @@ from . import (action_recognition, animal_recognition, bad_image_detecting, body_2d_keypoints, body_3d_keypoints, cartoon, cmdssl_video_embedding, controllable_image_generation, crowd_counting, face_detection, face_generation, - face_reconstruction, human_reconstruction, image_classification, - image_color_enhance, image_colorization, image_defrcn_fewshot, - image_denoise, image_editing, image_inpainting, - image_instance_segmentation, image_matching, + face_reconstruction, human3d_animation, human_reconstruction, + image_classification, image_color_enhance, image_colorization, + image_defrcn_fewshot, image_denoise, image_editing, + image_inpainting, image_instance_segmentation, image_matching, image_mvs_depth_estimation, image_panoptic_segmentation, image_portrait_enhancement, image_probing_model, image_quality_assessment_degradation, diff --git a/modelscope/models/cv/human3d_animation/__init__.py b/modelscope/models/cv/human3d_animation/__init__.py new file mode 100644 index 00000000..07f94b10 --- /dev/null +++ b/modelscope/models/cv/human3d_animation/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .generate_skeleton import gen_skeleton_bvh + from .utils import (read_obj, write_obj, render, rotate_x, rotate_y, + translate, projection) + +else: + _import_structure = { + 'generate_skeleton': ['gen_skeleton_bvh'], + 'utils': [ + 'read_obj', 'write_obj', 'render', 'rotate_x', 'rotate_y', + 'translate', 'projection' + ], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/cv/human3d_animation/bvh_writer.py b/modelscope/models/cv/human3d_animation/bvh_writer.py new file mode 100644 index 00000000..beacdffe --- /dev/null +++ b/modelscope/models/cv/human3d_animation/bvh_writer.py @@ -0,0 +1,184 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import numpy as np +import torch + +from .transforms import aa2quat, batch_rodrigues, mat2aa, quat2euler + + +def write_bvh(parent, + offset, + rotation, + position, + names, + frametime, + order, + path, + endsite=None): + file = open(path, 'w') + frame = rotation.shape[0] + joint_num = rotation.shape[1] + order = order.upper() + + file_string = 'HIERARCHY\n' + + seq = [] + + def write_static(idx, prefix): + nonlocal parent, offset, rotation, names + nonlocal order, endsite, file_string, seq + seq.append(idx) + if idx == 0: + name_label = 'ROOT ' + names[idx] + channel_label = 'CHANNELS 6 Xposition Yposition Zposition \ + {}rotation {}rotation {}rotation'.format(*order) + else: + name_label = 'JOINT ' + names[idx] + channel_label = 'CHANNELS 3 {}rotation {}rotation \ + {}rotation'.format(*order) + offset_label = 'OFFSET %.6f %.6f %.6f' % ( + offset[idx][0], offset[idx][1], offset[idx][2]) + + file_string += prefix + name_label + '\n' + file_string += prefix + '{\n' + file_string += prefix + '\t' + offset_label + '\n' + file_string += prefix + '\t' + channel_label + '\n' + + has_child = False + for y in range(idx + 1, rotation.shape[1]): + if parent[y] == idx: + has_child = True + write_static(y, prefix + '\t') + if not has_child: + file_string += prefix + '\t' + 'End Site\n' + file_string += prefix + '\t' + '{\n' + file_string += prefix + '\t\t' + 'OFFSET 0 0 0\n' + file_string += prefix + '\t' + '}\n' + + file_string += prefix + '}\n' + + write_static(0, '') + + file_string += 'MOTION\n' + 'Frames: {}\n'.format( + frame) + 'Frame Time: %.8f\n' % frametime + for i in range(frame): + file_string += '%.6f %.6f %.6f ' % (position[i][0], position[i][1], + position[i][2]) + + for j in range(joint_num): + idx = seq[j] + file_string += '%.6f %.6f %.6f ' % ( + rotation[i][idx][0], rotation[i][idx][1], rotation[i][idx][2]) + + file_string += '\n' + + file.write(file_string) + return file_string + + +class WriterWrapper: + + def __init__(self, parents): + self.parents = parents + + def axis2euler(self, rot): + rot = rot.reshape(rot.shape[0], -1, 3) # 45, 24, 3 + quat = aa2quat(rot) + euler = quat2euler(quat, order='xyz') + rot = euler + return rot + + def mapper_rot_mixamo(self, rot, n_bone): + rot = rot.reshape(rot.shape[0], -1, 3) + + smpl_mapper = [ + 0, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 17, 21, 15, 18, 22, 19, + 23, 20, 24 + ] + + if n_bone > 24: + hand_mapper = list(range(25, 65)) + smpl_mapper += hand_mapper + + new_rot = torch.zeros((rot.shape[0], n_bone, 3)) # n, 24, 3 + new_rot[:, :len(smpl_mapper), :] = rot[:, smpl_mapper, :] + + return new_rot + + def transform_rot_with_restpose(self, rot, rest_pose, node_list, n_bone): + + rest_pose = batch_rodrigues(rest_pose.reshape(-1, 3)).reshape( + 1, n_bone, 3, 3) # N*3-> N*3*3 + + frame_num = rot.shape[0] + rot = rot.reshape(rot.shape[0], -1, 3) + new_rot = rot.clone() + for k in range(frame_num): + action_rot = batch_rodrigues(rot[k].reshape(-1, 3)).reshape( + 1, n_bone, 3, 3) + for i in node_list: + rot1 = rest_pose[0, i, :, :] + rot2 = action_rot[0, i, :, :] + nrot = torch.matmul(rot2, torch.inverse(rot1)) + nvec = mat2aa(nrot) + new_rot[k, i, :] = nvec + + new_rot = self.axis2euler(new_rot) # =# 45,24,3 + return new_rot + + def transform_rot_with_stdApose(self, rot, rest_pose): + print('transform_rot_with_stdApose') + rot = rot.reshape(rot.shape[0], -1, 3) + rest_pose = self.axis2euler(rest_pose) + print(rot.shape) + print(rest_pose.shape) + smpl_left_arm_idx = 18 + smpl_right_arm_idx = 19 + std_arm_rot = torch.tensor([[21.7184, -4.8148, 16.3985], + [-20.1108, 10.7190, -8.9279]]) + x = rest_pose[:, smpl_left_arm_idx:smpl_right_arm_idx + 1, :] + delta = (x - std_arm_rot) + rot[:, smpl_left_arm_idx:smpl_right_arm_idx + 1, :] -= delta + return rot + + def write(self, + filename, + offset, + rot=None, + action_loc=None, + rest_pose=None, + correct_arm=0): # offset: [24,3], rot:[45,72] + if not isinstance(offset, torch.Tensor): + offset = torch.tensor(offset) + n_bone = offset.shape[0] # 24 + pos = offset[0].unsqueeze(0) # 1,3 + + if rot is None: + rot = np.zeros((1, n_bone, 3)) + else: # rot: 45, 72 + if rest_pose is None: + rot = self.mapper_rot_mixamo(rot, n_bone) + else: + if correct_arm == 1: + rot = self.mapper_rot_mixamo(rot, n_bone) + print(rot.shape) + node_list_chage = [16, 17] + n_bone = rot.shape[1] + print(rot[0, 19, :]) + else: + node_list_chage = [1, 2, 3, 6, 9, 12, 13, 14, 15, 16, 17] + rot = self.transform_rot_with_restpose( + rot, rest_pose, node_list_chage, n_bone) + + rest = torch.zeros((1, n_bone * 3)) + rest = self.axis2euler(rest) + frames_add = 1 + rest = rest.repeat(frames_add, 1, 1) + rot = torch.cat((rest, rot), 0) + + pos = pos.repeat(rot.shape[0], 1) + action_len = action_loc.shape[0] + pos[-action_len:, :] = action_loc[..., :] + + names = ['%02d' % i for i in range(n_bone)] + write_bvh(self.parents, offset, rot, pos, names, 0.0333, 'xyz', + filename) diff --git a/modelscope/models/cv/human3d_animation/generate_skeleton.py b/modelscope/models/cv/human3d_animation/generate_skeleton.py new file mode 100644 index 00000000..556cdbd3 --- /dev/null +++ b/modelscope/models/cv/human3d_animation/generate_skeleton.py @@ -0,0 +1,167 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import pickle + +import numpy as np +import torch + +from .bvh_writer import WriterWrapper +from .utils import matrix_to_axis_angle, rotation_6d_to_matrix + + +def laod_smpl_params(pose_fname): + with open(pose_fname, 'rb') as f: + data = pickle.load(f) + pose = torch.from_numpy(data['pose']) + beta = torch.from_numpy(data['betas']) + trans = torch.from_numpy(data['trans']) + if 'joints' in data: + joints = torch.from_numpy(data['joints']) + joints = joints.reshape(1, -1, 3) + else: + joints = None + trans = trans.reshape(1, 3) + beta = beta.reshape(1, -1)[:, :10] + pose = pose.reshape(-1, 24 * 3) + return pose, beta, trans, joints + + +def set_pose_param(pose, start, end): + pose[:, start * 3:(end + 1) * 3] = 0 + return pose + + +def load_test_anim(filename, device, mode='move'): + anim = np.load(filename) + anim = torch.tensor(anim, device=device, dtype=torch.float) + poses = anim[:, :-3] + loc = anim[:, -3:] + if os.path.basename(filename)[:5] == 'comb_': + loc = loc / 100 + repeat = 0 + idx = -1 + for i in range(poses.shape[0]): + if i == 0: + continue + if repeat >= 5: + idx = i + break + if poses[i].equal(poses[i - 1]): + repeat += 1 + else: + repeat = 0 + poses = poses[:idx - 5, :] + loc = loc[:idx - 5, :] + + if mode == 'inplace': + loc[1:, :] = loc[0, :] + + return poses, loc + + +def load_syn_motion(filename, device, mode='move'): + data = np.load(filename, allow_pickle=True).item() + anim = data['thetas'] + n_joint, c, t = anim.shape + + anim = torch.tensor(anim, device=device, dtype=torch.float) + anim = anim.permute(2, 0, 1) # 180, 24, 6 + poses = anim.reshape(-1, 6) + poses = rotation_6d_to_matrix(poses) + poses = matrix_to_axis_angle(poses) + poses = poses.reshape(-1, 24, 3) + + loc = data['root_translation'] + loc = torch.tensor(loc, device=device, dtype=torch.float) + loc = loc.permute(1, 0) + + if mode == 'inplace': + loc = torch.zeros((t, 3)) + + print('load %s' % filename) + + return poses, loc + + +def load_action(action_name, + model_dir, + action_dir, + mode='move', + device=torch.device('cpu')): + action_path = os.path.join(action_dir, action_name + '.npy') + if not os.path.exists(action_path): + print('can not find action %s, use default action instead' % + (action_name)) + action_path = os.path.join(model_dir, '3D-assets', 'SwingDancing.npy') + print('load action %s' % action_path) + test_pose, test_loc = load_test_anim( + action_path, device, mode=mode) # pose:[45,72], loc:[45,1,3] + + return test_pose, test_loc + + +def load_action_list(action, + model_dir, + action_dir, + mode='move', + device=torch.device('cpu')): + action_list = action.split(',') + test_pose, test_loc = load_action( + action_list[0], model_dir, action_dir, mode=mode, device=device) + final_loc = test_loc[-1, :] + idx = 0 + if len(action_list) > 1: + for action in action_list: + if idx == 0: + idx += 1 + continue + print('load action %s' % action) + pose, loc = load_action( + action, model_dir, action_dir, mode=mode, device=device) + delta_loc = final_loc - loc[0, :] + loc += delta_loc + final_loc = loc[-1, :] + test_pose = torch.cat([test_pose, pose], 0) + test_loc = torch.cat([test_loc, loc], 0) + idx += 1 + return test_pose, test_loc + + +def gen_skeleton_bvh(model_dir, action_dir, case_dir, action, mode='move'): + outpath_a = os.path.join(case_dir, 'skeleton_a.bvh') + device = torch.device('cpu') + assets_dir = os.path.join(model_dir, '3D-assets') + pkl_path = os.path.join(assets_dir, 'smpl.pkl') + poses, shapes, trans, joints = laod_smpl_params(pkl_path) + if action.endswith('.npy'): + skeleton_path = os.path.join(assets_dir, 'skeleton_nohand.npy') + else: + skeleton_path = os.path.join(assets_dir, 'skeleton.npy') + data = np.load(skeleton_path, allow_pickle=True).item() + skeleton = data['skeleton'] + parent = data['parent'] + skeleton = skeleton.squeeze(0) + bvh_writer = WriterWrapper(parent) + + if action.endswith('.npy'): + action_path = action + print('load action %s' % action_path) + test_pose, test_loc = load_syn_motion(action_path, device, mode=mode) + bvh_writer.write( + outpath_a, + skeleton, + test_pose, + action_loc=test_loc, + rest_pose=poses) + + else: + print('load action %s' % action) + test_pose, test_loc = load_action_list( + action, model_dir, action_dir, mode='move', device=device) + std_y = torch.tensor(0.99) + test_loc = test_loc + (skeleton[0, 1] - std_y) + bvh_writer.write(outpath_a, skeleton, test_pose, action_loc=test_loc) + + print('save %s' % outpath_a) + + return 0 diff --git a/modelscope/models/cv/human3d_animation/transforms.py b/modelscope/models/cv/human3d_animation/transforms.py new file mode 100644 index 00000000..388c34ad --- /dev/null +++ b/modelscope/models/cv/human3d_animation/transforms.py @@ -0,0 +1,316 @@ +# ------------------------------------------------------------------------ +# Modified from https://github.com/facebookresearch/pytorch3d +# All Rights Reserved. +# ------------------------------------------------------------------------ +import numpy as np +import torch +import torch.nn.functional as F +from torch import Tensor + + +def batch_mm(matrix, matrix_batch): + """ + https://github.com/pytorch/pytorch/issues/14489#issuecomment-607730242 + :param matrix: Sparse or dense matrix, size (m, n). + :param matrix_batch: Batched dense matrices, size (b, n, k). + :return: The batched matrix-matrix product, + size (m, n) x (b, n, k) = (b, m, k). + """ + batch_size = matrix_batch.shape[0] + # Stack the vector batch into columns. (b, n, k) -> (n, b, k) -> (n, b*k) + vectors = matrix_batch.transpose(0, 1).reshape(matrix.shape[1], -1) + + # A matrix-matrix product is a batched matrix-vector + # product of the columns. + # And then reverse the reshaping. + # (m, n) x (n, b*k) = (m, b*k) -> (m, b, k) -> (b, m, k) + return matrix.mm(vectors).reshape(matrix.shape[0], batch_size, + -1).transpose(1, 0) + + +def aa2quat(rots, form='wxyz', unified_orient=True): + """ + Convert angle-axis representation to wxyz quaternion + and to the half plan (w >= 0) + @param rots: angle-axis rotations, (*, 3) + @param form: quaternion format, either 'wxyz' or 'xyzw' + @param unified_orient: Use unified orientation for quaternion + (quaternion is dual cover of SO3) + :return: + """ + angles = rots.norm(dim=-1, keepdim=True) + norm = angles.clone() + norm[norm < 1e-8] = 1 + axis = rots / norm + quats = torch.empty( + rots.shape[:-1] + (4, ), device=rots.device, dtype=rots.dtype) + angles = angles * 0.5 + if form == 'wxyz': + quats[..., 0] = torch.cos(angles.squeeze(-1)) + quats[..., 1:] = torch.sin(angles) * axis + elif form == 'xyzw': + quats[..., :3] = torch.sin(angles) * axis + quats[..., 3] = torch.cos(angles.squeeze(-1)) + + if unified_orient: + idx = quats[..., 0] < 0 + quats[idx, :] *= -1 + + return quats + + +def quat2aa(quats): + """ + Convert wxyz quaternions to angle-axis representation + :param quats: + :return: + """ + _cos = quats[..., 0] + xyz = quats[..., 1:] + _sin = xyz.norm(dim=-1) + norm = _sin.clone() + norm[norm < 1e-7] = 1 + axis = xyz / norm.unsqueeze(-1) + angle = torch.atan2(_sin, _cos) * 2 + return axis * angle.unsqueeze(-1) + + +def quat2mat(quats: torch.Tensor): + """ + Convert (w, x, y, z) quaternions to 3x3 rotation matrix + :param quats: quaternions of shape (..., 4) + :return: rotation matrices of shape (..., 3, 3) + """ + qw = quats[..., 0] + qx = quats[..., 1] + qy = quats[..., 2] + qz = quats[..., 3] + + x2 = qx + qx + y2 = qy + qy + z2 = qz + qz + xx = qx * x2 + yy = qy * y2 + wx = qw * x2 + xy = qx * y2 + yz = qy * z2 + wy = qw * y2 + xz = qx * z2 + zz = qz * z2 + wz = qw * z2 + + m = torch.empty( + quats.shape[:-1] + (3, 3), device=quats.device, dtype=quats.dtype) + m[..., 0, 0] = 1.0 - (yy + zz) + m[..., 0, 1] = xy - wz + m[..., 0, 2] = xz + wy + m[..., 1, 0] = xy + wz + m[..., 1, 1] = 1.0 - (xx + zz) + m[..., 1, 2] = yz - wx + m[..., 2, 0] = xz - wy + m[..., 2, 1] = yz + wx + m[..., 2, 2] = 1.0 - (xx + yy) + + return m + + +def quat2euler(q, order='xyz', degrees=True): + """ + Convert (w, x, y, z) quaternions to xyz euler angles. + This is used for bvh output. + """ + q0 = q[..., 0] + q1 = q[..., 1] + q2 = q[..., 2] + q3 = q[..., 3] + es = torch.empty(q0.shape + (3, ), device=q.device, dtype=q.dtype) + + if order == 'xyz': + es[..., 2] = torch.atan2(2 * (q0 * q3 - q1 * q2), + q0 * q0 + q1 * q1 - q2 * q2 - q3 * q3) + es[..., 1] = torch.asin((2 * (q1 * q3 + q0 * q2)).clip(-1, 1)) + es[..., 0] = torch.atan2(2 * (q0 * q1 - q2 * q3), + q0 * q0 - q1 * q1 - q2 * q2 + q3 * q3) + else: + raise NotImplementedError('Cannot convert to ordering %s' % order) + + if degrees: + es = es * 180 / np.pi + + return es + + +def aa2mat(rots): + """ + Convert angle-axis representation to rotation matrix + :param rots: angle-axis representation + :return: + """ + quat = aa2quat(rots) + mat = quat2mat(quat) + return mat + + +def inv_affine(mat): + """ + Calculate the inverse of any affine transformation + """ + affine = torch.zeros((mat.shape[:2] + (1, 4))) + affine[..., 3] = 1 + vert_mat = torch.cat((mat, affine), dim=2) + vert_mat_inv = torch.inverse(vert_mat) + return vert_mat_inv[..., :3, :] + + +def inv_rigid_affine(mat): + """ + Calculate the inverse of a rigid affine transformation + """ + res = mat.clone() + res[..., :3] = mat[..., :3].transpose(-2, -1) + res[..., + 3] = -torch.matmul(res[..., :3], mat[..., 3].unsqueeze(-1)).squeeze(-1) + return res + + +def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor: + """ + Returns torch.sqrt(torch.max(0, x)) + but with a zero subgradient where x is 0. + """ + ret = torch.zeros_like(x) + positive_mask = x > 0 + ret[positive_mask] = torch.sqrt(x[positive_mask]) + return ret + + +def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as rotation matrices to quaternions. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + + Returns: + quaternions with real part first, as tensor of shape (..., 4). + """ + if matrix.size(-1) != 3 or matrix.size(-2) != 3: + raise ValueError(f'Invalid rotation matrix shape {matrix.shape}.') + + batch_dim = matrix.shape[:-2] + m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind( + matrix.reshape(batch_dim + (9, )), dim=-1) + + q_abs = _sqrt_positive_part( + torch.stack( + [ + 1.0 + m00 + m11 + m22, + 1.0 + m00 - m11 - m22, + 1.0 - m00 + m11 - m22, + 1.0 - m00 - m11 + m22, + ], + dim=-1, + )) + + # we produce the desired quaternion multiplied by each of r, i, j, k + quat_by_rijk = torch.stack( + [ + torch.stack([q_abs[..., 0]**2, m21 - m12, m02 - m20, m10 - m01], + dim=-1), + torch.stack([m21 - m12, q_abs[..., 1]**2, m10 + m01, m02 + m20], + dim=-1), + torch.stack([m02 - m20, m10 + m01, q_abs[..., 2]**2, m12 + m21], + dim=-1), + torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3]**2], + dim=-1), + ], + dim=-2, + ) + + flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device) + quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr)) + + return quat_candidates[F.one_hot(q_abs.argmax( + dim=-1), num_classes=4) > 0.5, :].reshape(batch_dim + (4, )) + + +def quaternion_to_axis_angle(quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as quaternions to axis/angle. + + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + norms = torch.norm(quaternions[..., 1:], p=2, dim=-1, keepdim=True) + half_angles = torch.atan2(norms, quaternions[..., :1]) + angles = 2 * half_angles + eps = 1e-6 + small_angles = angles.abs() < eps + sin_half_angles_over_angles = torch.empty_like(angles) + sin_half_angles_over_angles[~small_angles] = ( + torch.sin(half_angles[~small_angles]) / angles[~small_angles]) + # for x small, sin(x/2) is about x/2 - (x/2)^3/6 + # so sin(x/2)/x is about 1/2 - (x*x)/48 + sin_half_angles_over_angles[small_angles] = ( + 0.5 - (angles[small_angles] * angles[small_angles]) / 48) + return quaternions[..., 1:] / sin_half_angles_over_angles + + +def mat2aa(matrix: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as rotation matrices to axis/angle. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + return quaternion_to_axis_angle(matrix_to_quaternion(matrix)) + + +def batch_rodrigues(rot_vecs: Tensor, epsilon: float = 1e-8) -> Tensor: + ''' Calculates the rotation matrices for a batch of rotation vectors + Parameters + ---------- + rot_vecs: torch.tensor Nx3 + array of N axis-angle vectors + Returns + ------- + R: torch.tensor Nx3x3 + The rotation matrices for the given axis-angle parameters + ''' + assert len(rot_vecs.shape) == 2, ( + f'Expects an array of size Bx3, but received {rot_vecs.shape}') + + batch_size = rot_vecs.shape[0] + device = rot_vecs.device + dtype = rot_vecs.dtype + + angle = torch.norm(rot_vecs + epsilon, dim=1, keepdim=True, p=2) + rot_dir = rot_vecs / angle + + cos = torch.unsqueeze(torch.cos(angle), dim=1) + sin = torch.unsqueeze(torch.sin(angle), dim=1) + + # Bx1 arrays + rx, ry, rz = torch.split(rot_dir, 1, dim=1) + K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device) + + zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device) + K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1) \ + .view((batch_size, 3, 3)) + + ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0) + rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K) + return rot_mat diff --git a/modelscope/models/cv/human3d_animation/utils.py b/modelscope/models/cv/human3d_animation/utils.py new file mode 100644 index 00000000..6be9fb25 --- /dev/null +++ b/modelscope/models/cv/human3d_animation/utils.py @@ -0,0 +1,375 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os + +import cv2 +import numpy as np +import nvdiffrast.torch as dr +import torch +import torch.nn.functional as F + + +def read_obj(obj_path, print_shape=False): + with open(obj_path, 'r') as f: + bfm_lines = f.readlines() + + vertices = [] + faces = [] + uvs = [] + vns = [] + faces_uv = [] + faces_normal = [] + max_face_length = 0 + for line in bfm_lines: + if line[:2] == 'v ': + vertex = [ + float(a) for a in line.strip().split(' ')[1:] if len(a) > 0 + ] + vertices.append(vertex) + + if line[:2] == 'f ': + items = line.strip().split(' ')[1:] + face = [int(a.split('/')[0]) for a in items if len(a) > 0] + max_face_length = max(max_face_length, len(face)) + faces.append(face) + + if '/' in items[0] and len(items[0].split('/')[1]) > 0: + face_uv = [int(a.split('/')[1]) for a in items if len(a) > 0] + faces_uv.append(face_uv) + + if '/' in items[0] and len(items[0].split('/')) >= 3 and len( + items[0].split('/')[2]) > 0: + face_normal = [ + int(a.split('/')[2]) for a in items if len(a) > 0 + ] + faces_normal.append(face_normal) + + if line[:3] == 'vt ': + items = line.strip().split(' ')[1:] + uv = [float(a) for a in items if len(a) > 0] + uvs.append(uv) + + if line[:3] == 'vn ': + items = line.strip().split(' ')[1:] + vn = [float(a) for a in items if len(a) > 0] + vns.append(vn) + + vertices = np.array(vertices).astype(np.float32) + if max_face_length <= 3: + faces = np.array(faces).astype(np.int32) + else: + print('not a triangle face mesh!') + + if vertices.shape[1] == 3: + mesh = { + 'vertices': vertices, + 'faces': faces, + } + else: + mesh = { + 'vertices': vertices[:, :3], + 'colors': vertices[:, 3:], + 'faces': faces, + } + + if len(uvs) > 0: + uvs = np.array(uvs).astype(np.float32) + mesh['uvs'] = uvs + + if len(vns) > 0: + vns = np.array(vns).astype(np.float32) + mesh['normals'] = vns + + if len(faces_uv) > 0: + if max_face_length <= 3: + faces_uv = np.array(faces_uv).astype(np.int32) + mesh['faces_uv'] = faces_uv + + if len(faces_normal) > 0: + if max_face_length <= 3: + faces_normal = np.array(faces_normal).astype(np.int32) + mesh['faces_normal'] = faces_normal + + if print_shape: + print('num of vertices', len(vertices)) + print('num of faces', len(faces)) + return mesh + + +def write_obj(save_path, mesh): + save_dir = os.path.dirname(save_path) + save_name = os.path.splitext(os.path.basename(save_path))[0] + + if 'texture_map' in mesh: + cv2.imwrite( + os.path.join(save_dir, save_name + '.png'), mesh['texture_map']) + + with open(os.path.join(save_dir, save_name + '.mtl'), 'w') as wf: + wf.write('newmtl material_0\n') + wf.write('Ka 1.000000 0.000000 0.000000\n') + wf.write('Kd 1.000000 1.000000 1.000000\n') + wf.write('Ks 0.000000 0.000000 0.000000\n') + wf.write('Tr 0.000000\n') + wf.write('illum 0\n') + wf.write('Ns 0.000000\n') + wf.write('map_Kd {}\n'.format(save_name + '.png')) + + with open(save_path, 'w') as wf: + if 'texture_map' in mesh: + wf.write('# Create by ModelScope\n') + wf.write('mtllib ./{}.mtl\n'.format(save_name)) + + if 'colors' in mesh: + for i, v in enumerate(mesh['vertices']): + wf.write('v {} {} {} {} {} {}\n'.format( + v[0], v[1], v[2], mesh['colors'][i][0], + mesh['colors'][i][1], mesh['colors'][i][2])) + else: + for v in mesh['vertices']: + wf.write('v {} {} {}\n'.format(v[0], v[1], v[2])) + + if 'uvs' in mesh: + for uv in mesh['uvs']: + wf.write('vt {} {}\n'.format(uv[0], uv[1])) + + if 'normals' in mesh: + for vn in mesh['normals']: + wf.write('vn {} {} {}\n'.format(vn[0], vn[1], vn[2])) + + if 'faces' in mesh: + for ind, face in enumerate(mesh['faces']): + if 'faces_uv' in mesh or 'faces_normal' in mesh: + if 'faces_uv' in mesh: + face_uv = mesh['faces_uv'][ind] + else: + face_uv = face + if 'faces_normal' in mesh: + face_normal = mesh['faces_normal'][ind] + else: + face_normal = face + row = 'f ' + ' '.join([ + '{}/{}/{}'.format(face[i], face_uv[i], face_normal[i]) + for i in range(len(face)) + ]) + '\n' + else: + row = 'f ' + ' '.join( + ['{}'.format(face[i]) + for i in range(len(face))]) + '\n' + wf.write(row) + + +def projection(x=0.1, n=1.0, f=50.0): + return np.array([[n / x, 0, 0, 0], [0, n / x, 0, 0], + [0, 0, -(f + n) / (f - n), -(2 * f * n) / (f - n)], + [0, 0, -1, 0]]).astype(np.float32) + + +def translate(x, y, z): + return np.array([[1, 0, 0, x], [0, 1, 0, y], [0, 0, 1, z], + [0, 0, 0, 1]]).astype(np.float32) + + +def rotate_x(a): + s, c = np.sin(a), np.cos(a) + return np.array([[1, 0, 0, 0], [0, c, s, 0], [0, -s, c, 0], + [0, 0, 0, 1]]).astype(np.float32) + + +def rotate_y(a): + s, c = np.sin(a), np.cos(a) + return np.array([[c, 0, s, 0], [0, 1, 0, 0], [-s, 0, c, 0], + [0, 0, 0, 1]]).astype(np.float32) + + +def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + return torch.sum(x * y, -1, keepdim=True) + + +def reflect(x: torch.Tensor, n: torch.Tensor) -> torch.Tensor: + return 2 * dot(x, n) * n - x + + +def length(x: torch.Tensor, eps: float = 1e-20) -> torch.Tensor: + return torch.sqrt(torch.clamp( + dot(x, x), + min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN + + +def safe_normalize(x: torch.Tensor, eps: float = 1e-20) -> torch.Tensor: + return x / length(x, eps) + + +def transform_pos(mtx, pos): + t_mtx = torch.from_numpy(mtx).cuda() if isinstance(mtx, + np.ndarray) else mtx + posw = torch.cat([pos, torch.ones([pos.shape[0], 1]).cuda()], axis=1) + return torch.matmul(posw, t_mtx.t())[None, ...] + + +def render(glctx, mtx, pos, pos_idx, uv, uv_idx, tex, resolution, enable_mip, + max_mip_level): + pos_clip = transform_pos(mtx, pos) + rast_out, rast_out_db = dr.rasterize( + glctx, pos_clip, pos_idx, resolution=[resolution, resolution]) + + if enable_mip: + texc, texd = dr.interpolate( + uv[None, ...], + rast_out, + uv_idx, + rast_db=rast_out_db, + diff_attrs='all') + color = dr.texture( + tex[None, ...], + texc, + texd, + filter_mode='linear-mipmap-linear', + max_mip_level=max_mip_level) + else: + texc, _ = dr.interpolate(uv[None, ...], rast_out, uv_idx) + color = dr.texture(tex[None, ...], texc, filter_mode='linear') + + pos_idx = pos_idx.type(torch.long) + v0 = pos[pos_idx[:, 0], :] + v1 = pos[pos_idx[:, 1], :] + v2 = pos[pos_idx[:, 2], :] + face_normals = safe_normalize(torch.cross(v1 - v0, v2 - v0)) + face_normal_indices = (torch.arange( + 0, face_normals.shape[0], dtype=torch.int64, + device='cuda')[:, None]).repeat(1, 3) + gb_geometric_normal, _ = dr.interpolate(face_normals[None, ...], rast_out, + face_normal_indices.int()) + normal = (gb_geometric_normal + 1) * 0.5 + mask = torch.clamp(rast_out[..., -1:], 0, 1) + color = color * mask + (1 - mask) * torch.ones_like(color) + normal = normal * mask + (1 - mask) * torch.ones_like(normal) + + return color, mask, normal + + +# The following code is based on https://github.com/Mathux/ACTOR.git +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. +# Check PYTORCH3D_LICENCE before use + + +def _copysign(a, b): + """ + Return a tensor where each element has the absolute value taken from the, + corresponding element of a, with sign taken from the corresponding + element of b. This is like the standard copysign floating-point operation, + but is not careful about negative 0 and NaN. + + Args: + a: source tensor. + b: tensor whose signs will be used, of the same shape as a. + + Returns: + Tensor of the same shape as a with the signs of b. + """ + signs_differ = (a < 0) != (b < 0) + return torch.where(signs_differ, -a, a) + + +def _sqrt_positive_part(x): + """ + Returns torch.sqrt(torch.max(0, x)) + but with a zero subgradient where x is 0. + """ + ret = torch.zeros_like(x) + positive_mask = x > 0 + ret[positive_mask] = torch.sqrt(x[positive_mask]) + return ret + + +def matrix_to_quaternion(matrix): + """ + Convert rotations given as rotation matrices to quaternions. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + + Returns: + quaternions with real part first, as tensor of shape (..., 4). + """ + if matrix.size(-1) != 3 or matrix.size(-2) != 3: + raise ValueError(f'Invalid rotation matrix shape f{matrix.shape}.') + m00 = matrix[..., 0, 0] + m11 = matrix[..., 1, 1] + m22 = matrix[..., 2, 2] + o0 = 0.5 * _sqrt_positive_part(1 + m00 + m11 + m22) + x = 0.5 * _sqrt_positive_part(1 + m00 - m11 - m22) + y = 0.5 * _sqrt_positive_part(1 - m00 + m11 - m22) + z = 0.5 * _sqrt_positive_part(1 - m00 - m11 + m22) + o1 = _copysign(x, matrix[..., 2, 1] - matrix[..., 1, 2]) + o2 = _copysign(y, matrix[..., 0, 2] - matrix[..., 2, 0]) + o3 = _copysign(z, matrix[..., 1, 0] - matrix[..., 0, 1]) + return torch.stack((o0, o1, o2, o3), -1) + + +def quaternion_to_axis_angle(quaternions): + """ + Convert rotations given as quaternions to axis/angle. + + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + norms = torch.norm(quaternions[..., 1:], p=2, dim=-1, keepdim=True) + half_angles = torch.atan2(norms, quaternions[..., :1]) + angles = 2 * half_angles + eps = 1e-6 + small_angles = angles.abs() < eps + sin_half_angles_over_angles = torch.empty_like(angles) + sin_half_angles_over_angles[~small_angles] = ( + torch.sin(half_angles[~small_angles]) / angles[~small_angles]) + # for x small, sin(x/2) is about x/2 - (x/2)^3/6 + # so sin(x/2)/x is about 1/2 - (x*x)/48 + sin_half_angles_over_angles[small_angles] = ( + 0.5 - (angles[small_angles] * angles[small_angles]) / 48) + return quaternions[..., 1:] / sin_half_angles_over_angles + + +def matrix_to_axis_angle(matrix): + """ + Convert rotations given as rotation matrices to axis/angle. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + return quaternion_to_axis_angle(matrix_to_quaternion(matrix)) + + +def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor: + """ + Converts 6D rotation representation by Zhou et al. [1] to rotation matrix + using Gram--Schmidt orthogonalisation per Section B of [1]. + Args: + d6: 6D rotation representation, of size (*, 6) + + Returns: + batch of rotation matrices of size (*, 3, 3) + + [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. + On the Continuity of Rotation Representations in Neural Networks. + IEEE Conference on Computer Vision and Pattern Recognition, 2019. + Retrieved from http://arxiv.org/abs/1812.07035 + """ + + a1, a2 = d6[..., :3], d6[..., 3:] + b1 = F.normalize(a1, dim=-1) + b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1 + b2 = F.normalize(b2, dim=-1) + b3 = torch.cross(b1, b2, dim=-1) + return torch.stack((b1, b2, b3), dim=-2) diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index 0fd760eb..82c5ce10 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -861,6 +861,8 @@ TASK_OUTPUTS = { # } # } Tasks.face_reconstruction: [OutputKeys.OUTPUT], + Tasks.human3d_render: [OutputKeys.OUTPUT], + Tasks.human3d_animation: [OutputKeys.OUTPUT], # 3D head reconstruction result for single sample # { diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py index fdc63810..f465a722 100644 --- a/modelscope/pipeline_inputs.py +++ b/modelscope/pipeline_inputs.py @@ -305,6 +305,16 @@ TASK_INPUTS = { InputKeys.IMAGE: InputType.IMAGE, 'target_pose_path': InputType.TEXT }, + Tasks.human3d_render: { + 'dataset_id': InputType.TEXT, + 'case_id': InputType.TEXT, + }, + Tasks.human3d_animation: { + 'dataset_id': InputType.TEXT, + 'case_id': InputType.TEXT, + 'action_dataset': InputType.TEXT, + 'action': InputType.TEXT + }, Tasks.image_view_transform: { InputKeys.IMAGE: InputType.IMAGE, 'target_view': InputType.LIST diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py index 00fc21d8..6fcd77ea 100644 --- a/modelscope/pipelines/cv/__init__.py +++ b/modelscope/pipelines/cv/__init__.py @@ -113,6 +113,8 @@ if TYPE_CHECKING: from .pedestrian_attribute_recognition_pipeline import PedestrainAttributeRecognitionPipeline from .image_panoptic_segmentation_pipeline import ImagePanopticSegmentationPipeline from .text_to_360panorama_image_pipeline import Text2360PanoramaImagePipeline + from .human3d_render_pipeline import Human3DRenderPipeline + from .human3d_animation_pipeline import Human3DAnimationPipeline else: _import_structure = { 'action_recognition_pipeline': ['ActionRecognitionPipeline'], @@ -283,6 +285,8 @@ else: 'text_to_360panorama_image_pipeline': [ 'Text2360PanoramaImagePipeline' ], + 'human3d_render_pipeline': ['Human3DRenderPipeline'], + 'human3d_animation_pipeline': ['Human3DAnimationPipeline'], } import sys diff --git a/modelscope/pipelines/cv/human3d_animation_pipeline.py b/modelscope/pipelines/cv/human3d_animation_pipeline.py new file mode 100644 index 00000000..d03cd8a3 --- /dev/null +++ b/modelscope/pipelines/cv/human3d_animation_pipeline.py @@ -0,0 +1,135 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +from typing import Any, Dict + +import cv2 + +from modelscope.metainfo import Pipelines +from modelscope.models.cv.human3d_animation import (gen_skeleton_bvh, read_obj, + write_obj) +from modelscope.msdatasets import MsDataset +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.human3d_animation, module_name=Pipelines.human3d_animation) +class Human3DAnimationPipeline(Pipeline): + """ Human3D library render pipeline + Example: + + ```python + >>> from modelscope.pipelines import pipeline + >>> human3d = pipeline(Tasks.human3d_animation, + 'damo/cv_3d-human-animation') + >>> human3d({ + 'dataset_id': 'damo/3DHuman_synthetic_dataset', # dataset id (str) + 'case_id': '3f2a7538253e42a8', # case id (str) + 'action_dataset': 'damo/3DHuman_action_dataset', # action data id + 'action': 'ArmsHipHopDance' # action name or action file path (str) + 'save_dir': 'output' # save directory (str) + }) + >>> # + ``` + """ + + def __init__(self, model, device='gpu', **kwargs): + """ + use model to create a image sky change pipeline for image editing + Args: + model (str or Model): model_id on modelscope hub + device (str): only support gpu + """ + super().__init__(model=model, **kwargs) + self.model_dir = model + logger.info('model_dir:', self.model_dir) + + def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs + + def gen_skeleton(self, case_dir, action_dir, action): + self.case_dir = case_dir + self.action_dir = action_dir + self.action = action + status = gen_skeleton_bvh(self.model_dir, self.action_dir, + self.case_dir, self.action) + return status + + def gen_weights(self, save_dir=None): + case_name = os.path.basename(self.case_dir) + action_name = os.path.basename(self.action).replace('.npy', '') + if save_dir is None: + gltf_path = os.path.join(self.case_dir, '%s-%s.glb' % + (case_name, action_name)) + else: + os.makedirs(save_dir, exist_ok=True) + gltf_path = os.path.join(save_dir, '%s-%s.glb' % + (case_name, action_name)) + exec_path = os.path.join(self.model_dir, 'skinning.py') + + cmd = f'blender -b -P {exec_path} -- --input {self.case_dir}' \ + f' --gltf_path {gltf_path} --action {self.action}' + os.system(cmd) + return gltf_path + + def animate(self, mesh_path, action_dir, action, save_dir=None): + case_dir = os.path.dirname(os.path.abspath(mesh_path)) + tex_path = mesh_path.replace('.obj', '.png') + mesh = read_obj(mesh_path) + tex = cv2.imread(tex_path) + vertices = mesh['vertices'] + cent = (vertices.max(axis=0) + vertices.min(axis=0)) / 2 + new_cent = (0, 1.8 / 2, 0) + vertices -= (cent - new_cent) + mesh['vertices'] = vertices + mesh['texture_map'] = tex + write_obj(mesh_path, mesh) + + self.gen_skeleton(case_dir, action_dir, action) + gltf_path = self.gen_weights(save_dir) + if os.path.exists(gltf_path): + logger.info('save animation succeed!') + else: + logger.info('save animation failed!') + return gltf_path + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + dataset_id = input['dataset_id'] + case_id = input['case_id'] + action_data_id = input['action_dataset'] + action = input['action'] + if 'save_dir' in input: + save_dir = input['save_dir'] + else: + save_dir = None + + if case_id.endswith('.obj'): + mesh_path = case_id + else: + dataset_name = dataset_id.split('/')[-1] + user_name = dataset_id.split('/')[0] + data_dir = MsDataset.load( + dataset_name, namespace=user_name, + subset_name=case_id).config_kwargs['split_config']['test'] + case_dir = os.path.join(data_dir, case_id) + mesh_path = os.path.join(case_dir, 'body.obj') + logger.info('load mesh:', mesh_path) + + dataset_name = action_data_id.split('/')[-1] + user_name = action_data_id.split('/')[0] + action_dir = MsDataset.load( + dataset_name, namespace=user_name, + split='test').config_kwargs['split_config']['test'] + action_dir = os.path.join(action_dir, 'actions_a') + + output = self.animate(mesh_path, action_dir, action, save_dir) + + return {OutputKeys.OUTPUT: output} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs diff --git a/modelscope/pipelines/cv/human3d_render_pipeline.py b/modelscope/pipelines/cv/human3d_render_pipeline.py new file mode 100644 index 00000000..44d0bb21 --- /dev/null +++ b/modelscope/pipelines/cv/human3d_render_pipeline.py @@ -0,0 +1,169 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import io +import os +from typing import Any, Dict + +import cv2 +import numpy as np +import nvdiffrast.torch as dr +import torch +import tqdm + +from modelscope.metainfo import Pipelines +from modelscope.models.cv.face_reconstruction.utils import mesh_to_string +from modelscope.models.cv.human3d_animation import (projection, read_obj, + render, rotate_x, rotate_y, + translate) +from modelscope.msdatasets import MsDataset +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Model, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.pipelines.util import is_model +from modelscope.utils.constant import Invoke, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.human3d_render, module_name=Pipelines.human3d_render) +class Human3DRenderPipeline(Pipeline): + """ Human3D library render pipeline + Example: + + ```python + >>> from modelscope.pipelines import pipeline + >>> human3d = pipeline(Tasks.human3d_render, + 'damo/cv_3d-human-synthesis-library') + >>> human3d({ + 'data_dir': '/data/human3d-syn-library', # data dir path (str) + 'case_id': '3f2a7538253e42a8', # case id (str) + }) + >>> # + ``` + """ + + def __init__(self, model: str, device='gpu', **kwargs): + """ + use model to create a image sky change pipeline for image editing + Args: + model (str or Model): model_id on modelscope hub + device (str): only support gpu + """ + super().__init__(model=model, **kwargs) + self.model_dir = model + + def preprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs + + def load_3d_model(self, mesh_path): + mesh = read_obj(mesh_path) + tex_path = mesh_path.replace('.obj', '.png') + if not os.path.exists(tex_path): + tex = np.zeros((256, 256, 3), dtype=np.uint8) + else: + tex = cv2.imread(tex_path) + mesh['texture_map'] = tex.copy() + return mesh, tex + + def format_nvdiffrast_format(self, mesh, tex): + vert = mesh['vertices'] + tri = mesh['faces'] + tri = tri - 1 if tri.min() == 1 else tri + vert_uv = mesh['uvs'] + tri_uv = mesh['faces_uv'] + tri_uv = tri_uv - 1 if tri_uv.min() == 1 else tri_uv + vtx_pos = torch.from_numpy(vert.astype(np.float32)).cuda() + pos_idx = torch.from_numpy(tri.astype(np.int32)).cuda() + vtx_uv = torch.from_numpy(vert_uv.astype(np.float32)).cuda() + uv_idx = torch.from_numpy(tri_uv.astype(np.int32)).cuda() + tex = tex[::-1, :, ::-1] + tex = torch.from_numpy(tex.astype(np.float32) / 255.0).cuda() + return vtx_pos, pos_idx, vtx_uv, uv_idx, tex + + def render_scene(self, mesh_path): + if not os.path.exists(mesh_path): + logger.info('can not found %s, use default one' % mesh_path) + mesh_path = os.path.join(self.model_dir, '3D-assets', + '3f2a7538253e42a8', 'body.obj') + + mesh, texture = self.load_3d_model(mesh_path) + vtx_pos, pos_idx, vtx_uv, uv_idx, tex = self.format_nvdiffrast_format( + mesh, texture) + + glctx = dr.RasterizeCudaContext() + ang = 0.0 + frame_length = 80 + step = 2 * np.pi / frame_length + frames_color = [] + frames_normals = [] + for i in tqdm.tqdm(range(frame_length)): + proj = projection(x=0.4, n=1.0, f=200.0) + a_rot = np.matmul(rotate_x(-0.1), rotate_y(ang)) + a_mv = np.matmul(translate(0, 0, -2.5), a_rot) + r_mvp = np.matmul(proj, a_mv).astype(np.float32) + pred_img, pred_mask, normal = render( + glctx, + r_mvp, + vtx_pos, + pos_idx, + vtx_uv, + uv_idx, + tex, + resolution=512, + enable_mip=False, + max_mip_level=9) + color = np.clip( + np.rint(pred_img[0].detach().cpu().numpy() * 255.0), 0, + 255).astype(np.uint8)[::-1, :, :] + normals = np.clip( + np.rint(normal[0].detach().cpu().numpy() * 255.0), 0, + 255).astype(np.uint8)[::-1, :, :] + frames_color.append(color) + frames_normals.append(normals) + ang = ang + step + + logger.info('load case %s done' + % os.path.basename(os.path.dirname(mesh_path))) + + return mesh, frames_color, frames_normals + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + dataset_id = input['dataset_id'] + case_id = input['case_id'] + if case_id.endswith('.obj'): + mesh_path = case_id + else: + dataset_name = dataset_id.split('/')[-1] + user_name = dataset_id.split('/')[0] + data_dir = MsDataset.load( + dataset_name, namespace=user_name, + subset_name=case_id).config_kwargs['split_config']['test'] + case_dir = os.path.join(data_dir, case_id) + mesh_path = os.path.join(case_dir, 'body.obj') + + mesh, colors, normals = self.render_scene(mesh_path) + + results = { + 'mesh': mesh, + 'frames_color': colors, + 'frames_normal': normals, + } + return {OutputKeys.OUTPUT_OBJ: None, OutputKeys.OUTPUT: results} + + def postprocess(self, inputs, **kwargs) -> Dict[str, Any]: + render = kwargs.get('render', False) + output_obj = inputs[OutputKeys.OUTPUT_OBJ] + results = inputs[OutputKeys.OUTPUT] + + if render: + output_obj = io.BytesIO() + mesh_str = mesh_to_string(results['mesh']) + mesh_bytes = mesh_str.encode(encoding='utf-8') + output_obj.write(mesh_bytes) + + result = { + OutputKeys.OUTPUT_OBJ: output_obj, + OutputKeys.OUTPUT: None if render else results, + } + return result diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 330abd70..aba6e382 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -165,6 +165,8 @@ class CVTasks(object): nerf_recon_4k = 'nerf-recon-4k' nerf_recon_vq_compression = 'nerf-recon-vq-compression' surface_recon_common = 'surface-recon-common' + human3d_render = 'human3d-render' + human3d_animation = 'human3d-animation' image_control_3d_portrait = 'image-control-3d-portrait' # vision efficient tuning diff --git a/tests/pipelines/test_human3d_animation.py b/tests/pipelines/test_human3d_animation.py new file mode 100644 index 00000000..75fc4c9d --- /dev/null +++ b/tests/pipelines/test_human3d_animation.py @@ -0,0 +1,32 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class Human3DAnimationTest(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'damo/cv_3d-human-animation' + self.task = Tasks.human3d_animation + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_modelhub(self): + human3d = pipeline(self.task, model=self.model_id) + input = { + 'dataset_id': 'damo/3DHuman_synthetic_dataset', + 'case_id': '3f2a7538253e42a8', + 'action_dataset': 'damo/3DHuman_action_dataset', + 'action': 'SwingDancing', + 'save_dir': 'outputs', + } + output = human3d(input) + print('saved animation file to %s' % output) + + print('human3d_animation.test_run_modelhub done') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_human3d_render.py b/tests/pipelines/test_human3d_render.py new file mode 100644 index 00000000..e1840af4 --- /dev/null +++ b/tests/pipelines/test_human3d_render.py @@ -0,0 +1,56 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import unittest + +import imageio + +from modelscope.models.cv.human3d_animation.utils import write_obj +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class Human3DRenderTest(unittest.TestCase): + + def setUp(self) -> None: + self.model_id = 'damo/cv_3d-human-synthesis-library' + self.task = Tasks.human3d_render + + def save_results(self, result, save_root): + os.makedirs(save_root, exist_ok=True) + + mesh = result[OutputKeys.OUTPUT]['mesh'] + write_obj(os.path.join(save_root, 'mesh.obj'), mesh) + + frames_color = result[OutputKeys.OUTPUT]['frames_color'] + imageio.mimwrite( + os.path.join(save_root, 'render_color.gif'), + frames_color, + duration=33) + del frames_color + + frames_normals = result[OutputKeys.OUTPUT]['frames_normal'] + imageio.mimwrite( + os.path.join(save_root, 'render_normals.gif'), + frames_normals, + duration=33) + del frames_normals + + print(f'Output written to {os.path.abspath(save_root)}') + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_modelhub(self): + human3d = pipeline(self.task, model=self.model_id) + input = { + 'dataset_id': 'damo/3DHuman_synthetic_dataset', + 'case_id': '3f2a7538253e42a8', + } + output = human3d(input) + self.save_results(output, './human3d_results') + + print('human3d_render.test_run_modelhub done') + + +if __name__ == '__main__': + unittest.main()