add cv_pointnet2_sceneflow-estimation_general

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11201880
2026-05-18 05:05:00 +02:00 · 2022-12-29 08:09:57 +08:00
parent f7a7504782
commit 42557b0867
16 changed files with 1596 additions and 5 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,3 +7,4 @@
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.avi filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
--- a/data/test/pointclouds/flyingthings_pcd1.npy
+++ b/data/test/pointclouds/flyingthings_pcd1.npy
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:9fa9f5c8a49d457a7b6f4239e438699e60541e7602e8b3b66da9f7b6d55096ab
 size 1735856
--- a/data/test/pointclouds/flyingthings_pcd2.npy
+++ b/data/test/pointclouds/flyingthings_pcd2.npy
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:86618feded6ae9fbcc772b9a7da17bad7d8b9c68ae0d505a239d110a3a0a7bf4
 size 1735856
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -62,6 +62,7 @@ class Models(object):
    video_human_matting = 'video-human-matting'
    video_object_segmentation = 'video-object-segmentation'
    real_basicvsr = 'real-basicvsr'
    rcp_sceneflow_estimation = 'rcp-sceneflow-estimation'
    # EasyCV models
    yolox = 'YOLOX'
@@ -253,6 +254,7 @@ class Pipelines(object):
    video_human_matting = 'video-human-matting'
    video_object_segmentation = 'video-object-segmentation'
    video_super_resolution = 'realbasicvsr-video-super-resolution'
    pointcloud_sceneflow_estimation = 'pointcloud-sceneflow-estimation'
    # nlp tasks
    automatic_post_editing = 'automatic-post-editing'
--- a/modelscope/models/cv/init.py
+++ b/modelscope/models/cv/init.py
@@ -11,10 +11,11 @@ from . import (action_recognition, animal_recognition, body_2d_keypoints,
               image_reid_person, image_semantic_segmentation,
               image_to_image_generation, image_to_image_translation,
               language_guided_video_summarization, movie_scene_segmentation,
-               object_detection, product_retrieval_embedding,
+               object_detection, pointcloud_sceneflow_estimation,
-               realtime_object_detection, referring_video_object_segmentation,
+               product_retrieval_embedding, realtime_object_detection,
-               salient_detection, shop_segmentation, super_resolution,
+               referring_video_object_segmentation, salient_detection,
-               video_object_segmentation, video_single_object_tracking,
+               shop_segmentation, super_resolution, video_object_segmentation,
-               video_summarization, video_super_resolution, virual_tryon)
+               video_single_object_tracking, video_summarization,
               video_super_resolution, virual_tryon)
 # yapf: enable
--- a/modelscope/models/cv/pointcloud_sceneflow_estimation/init.py
+++ b/modelscope/models/cv/pointcloud_sceneflow_estimation/init.py
@@ -0,0 +1,22 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING
 from modelscope.utils.import_utils import LazyImportModule
 if TYPE_CHECKING:
    from .rcp_model import SceneFlowEstimation
 else:
    _import_structure = {
        'rcp_model': ['SceneFlowEstimation'],
    }
    import sys
    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/models/cv/pointcloud_sceneflow_estimation/common.py
+++ b/modelscope/models/cv/pointcloud_sceneflow_estimation/common.py
@@ -0,0 +1,446 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from . import pointnet2_utils as pointutils
 RADIUS = 2.5
 def index_points_group(points, knn_idx):
    """
    Input:
        points: input points data, [B, N, C]
        knn_idx: sample index data, [B, N, K]
    Return:
        new_points:, indexed points data, [B, N, K, C]
    """
    points_flipped = points.permute(0, 2, 1).contiguous()
    new_points = pointutils.grouping_operation(points_flipped,
                                               knn_idx.int()).permute(
                                                   0, 2, 3, 1)
    return new_points
 def curvature(pc, nsample=10, radius=RADIUS):
    # pc: B 3 N
    assert pc.shape[1] == 3
    pc = pc.permute(0, 2, 1)
    dist, kidx = pointutils.knn(nsample, pc.contiguous(),
                                pc.contiguous())  # (B, N, 10)
    if radius is not None:
        tmp_idx = kidx[:, :, 0].unsqueeze(2).repeat(1, 1,
                                                    nsample).to(kidx.device)
        kidx[dist > radius] = tmp_idx[dist > radius]
    grouped_pc = index_points_group(pc, kidx)  # B N 10 3
    pc_curvature = torch.sum(grouped_pc - pc.unsqueeze(2), dim=2) / 9.0
    return pc_curvature  # B N 3
 class PointNetSetAbstractionRatio(nn.Module):
    def __init__(self,
                 ratio,
                 radius,
                 nsample,
                 in_channel,
                 mlp,
                 group_all,
                 return_fps=False,
                 use_xyz=True,
                 use_act=True,
                 act=F.relu,
                 mean_aggr=False,
                 use_instance_norm=False):
        super(PointNetSetAbstractionRatio, self).__init__()
        self.ratio = ratio
        self.radius = radius
        self.nsample = nsample
        self.group_all = group_all
        self.use_xyz = use_xyz
        self.use_act = use_act
        self.mean_aggr = mean_aggr
        self.act = act
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = (in_channel + 3) if use_xyz else in_channel
        for out_channel in mlp:
            self.mlp_convs.append(
                nn.Conv2d(last_channel, out_channel, 1, bias=False))
            if use_instance_norm:
                self.mlp_bns.append(
                    nn.InstanceNorm2d(out_channel, affine=True))
            else:
                self.mlp_bns.append(nn.BatchNorm2d(out_channel))
            last_channel = out_channel
        if group_all:
            self.queryandgroup = pointutils.GroupAll(self.use_xyz)
        else:
            self.queryandgroup = pointutils.QueryAndGroup(
                radius, nsample, self.use_xyz)
        self.return_fps = return_fps
    def forward(self, xyz, points, fps_idx=None):
        """
        Input:
            xyz: input points position data, [B, C, N]
            points: input points data, [B, D, N]
        Return:
            new_xyz: sampled points position data, [B, C, S]
            new_points: sample points feature data, [B, D', S]
        """
        B, C, N = xyz.shape
        npoint = int(N * self.ratio)
        xyz = xyz.contiguous()
        xyz_t = xyz.permute(0, 2, 1).contiguous()
        if (self.group_all is False) and (npoint != -1):
            if fps_idx is None:
                fps_idx = pointutils.furthest_point_sample(xyz_t,
                                                           npoint)  # [B, N]
            new_xyz = pointutils.gather_operation(xyz, fps_idx)  # [B, C, N]
        else:
            new_xyz = xyz
        new_points, _ = self.queryandgroup(xyz_t,
                                           new_xyz.transpose(2,
                                                             1).contiguous(),
                                           points)  # [B, 3+C, N, S]
        # new_xyz: sampled points position data, [B, C, npoint]
        # new_points: sampled points data, [B, C+D, npoint, nsample]
        for i, conv in enumerate(self.mlp_convs):
            if self.use_act:
                bn = self.mlp_bns[i]
                new_points = self.act(bn(conv(new_points)))
            else:
                new_points = conv(new_points)
        if self.mean_aggr:
            new_points = torch.mean(new_points, -1)
        else:
            new_points = torch.max(new_points, -1)[0]
        if self.return_fps:
            return new_xyz, new_points, fps_idx
        else:
            return new_xyz, new_points
 class PointNetSetAbstraction(nn.Module):
    def __init__(self,
                 npoint,
                 radius,
                 nsample,
                 in_channel,
                 mlp,
                 group_all,
                 return_fps=False,
                 use_xyz=True,
                 use_act=True,
                 act=F.relu,
                 mean_aggr=False,
                 use_instance_norm=False):
        super(PointNetSetAbstraction, self).__init__()
        self.npoint = npoint
        self.radius = radius
        self.nsample = nsample
        self.group_all = group_all
        self.use_xyz = use_xyz
        self.use_act = use_act
        self.mean_aggr = mean_aggr
        self.act = act
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = (in_channel + 3) if use_xyz else in_channel
        for out_channel in mlp:
            self.mlp_convs.append(
                nn.Conv2d(last_channel, out_channel, 1, bias=False))
            if use_instance_norm:
                self.mlp_bns.append(
                    nn.InstanceNorm2d(out_channel, affine=True))
            else:
                self.mlp_bns.append(nn.BatchNorm2d(out_channel))
            last_channel = out_channel
        if group_all:
            self.queryandgroup = pointutils.GroupAll(self.use_xyz)
        else:
            self.queryandgroup = pointutils.QueryAndGroup(
                radius, nsample, self.use_xyz)
        self.return_fps = return_fps
    def forward(self, xyz, points, fps_idx=None):
        """
        Input:
            xyz: input points position data, [B, C, N]
            points: input points data, [B, D, N]
        Return:
            new_xyz: sampled points position data, [B, S, C]
            new_points: sample points feature data, [B, S, D']
        """
        # device = xyz.device
        B, C, N = xyz.shape
        xyz = xyz.contiguous()
        xyz_t = xyz.permute(0, 2, 1).contiguous()
        if (self.group_all is False) and (self.npoint != -1):
            if fps_idx is None:
                fps_idx = pointutils.furthest_point_sample(
                    xyz_t, self.npoint)  # [B, N]
            new_xyz = pointutils.gather_operation(xyz, fps_idx)  # [B, C, N]
        else:
            new_xyz = xyz
        new_points, _ = self.queryandgroup(xyz_t,
                                           new_xyz.transpose(2,
                                                             1).contiguous(),
                                           points)  # [B, 3+C, N, S]
        # new_xyz: sampled points position data, [B, C, npoint]
        # new_points: sampled points data, [B, C+D, npoint, nsample]
        for i, conv in enumerate(self.mlp_convs):
            if self.use_act:
                bn = self.mlp_bns[i]
                new_points = self.act(bn(conv(new_points)))
            else:
                new_points = conv(new_points)
        if self.mean_aggr:
            new_points = torch.mean(new_points, -1)
        else:
            new_points = torch.max(new_points, -1)[0]
        if self.return_fps:
            return new_xyz, new_points, fps_idx
        else:
            return new_xyz, new_points
 class PointNetFeaturePropogation(nn.Module):
    def __init__(self, in_channel, mlp, learn_mask=False, nsample=3):
        super(PointNetFeaturePropogation, self).__init__()
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        self.apply_mlp = mlp is not None
        last_channel = in_channel
        self.nsample = nsample
        if self.apply_mlp:
            for out_channel in mlp:
                self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
                self.mlp_bns.append(nn.BatchNorm1d(out_channel))
                last_channel = out_channel
        if learn_mask:
            self.queryandgroup = pointutils.QueryAndGroup(
                None, 9, use_xyz=True)
            last_channel = (128 + 3)
            for out_channel in [32, 1]:
                self.mlp_convs.append(
                    nn.Conv2d(last_channel, out_channel, 1, bias=False))
                self.mlp_bns.append(nn.BatchNorm2d(out_channel))
                last_channel = out_channel
    def forward(self, pos1, pos2, feature1, feature2, hidden=None):
        """
        Input:
            pos1: input points position data, [B, C, N]
            pos2: sampled input points position data, [B, C, S]
            feature1: input points data, [B, D, N]
            feature2: input points data, [B, D, S]
        Return:
            feat_new: upsampled points data, [B, D', N]
        """
        pos1_t = pos1.permute(0, 2, 1).contiguous()
        pos2_t = pos2.permute(0, 2, 1).contiguous()
        B, C, N = pos1.shape
        if hidden is None:
            if self.nsample == 3:
                dists, idx = pointutils.three_nn(pos1_t, pos2_t)
            else:
                dists, idx = pointutils.knn(self.nsample, pos1_t, pos2_t)
            dists[dists < 1e-10] = 1e-10
            weight = 1.0 / dists
            weight = weight / torch.sum(weight, -1, keepdim=True)  # [B,N,3]
            interpolated_feat = torch.sum(
                pointutils.grouping_operation(feature2, idx)
                * weight.view(B, 1, N, self.nsample),
                dim=-1)  # [B,C,N,3]
        else:
            dist, idx = pointutils.knn(9, pos1_t, pos2_t)
            new_feat, _ = self.queryandgroup(pos2_t, pos1_t,
                                             hidden)  # [B, 3+C, N, 9]
            for i, conv in enumerate(self.mlp_convs):
                new_feat = conv(new_feat)
            weight = torch.softmax(new_feat, dim=-1)  # [B, 1, N, 9]
            interpolated_feat = torch.sum(
                pointutils.grouping_operation(feature2, idx) * weight,
                dim=-1)  # [B, C, N]
        if feature1 is not None:
            feat_new = torch.cat([interpolated_feat, feature1], 1)
        else:
            feat_new = interpolated_feat
        if self.apply_mlp:
            for i, conv in enumerate(self.mlp_convs):
                bn = self.mlp_bns[i]
                feat_new = F.relu(bn(conv(feat_new)))
        return feat_new
 class Sinkhorn(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, corr, epsilon, gamma, max_iter):
        # Early return if no iteration
        if max_iter == 0:
            return corr
        # Init. of Sinkhorn algorithm
        power = gamma / (gamma + epsilon)
        a = (
            torch.ones((corr.shape[0], corr.shape[1], 1),
                       device=corr.device,
                       dtype=corr.dtype) / corr.shape[1])
        prob1 = (
            torch.ones((corr.shape[0], corr.shape[1], 1),
                       device=corr.device,
                       dtype=corr.dtype) / corr.shape[1])
        prob2 = (
            torch.ones((corr.shape[0], corr.shape[2], 1),
                       device=corr.device,
                       dtype=corr.dtype) / corr.shape[2])
        # Sinkhorn algorithm
        for _ in range(max_iter):
            # Update b
            KTa = torch.bmm(corr.transpose(1, 2), a)
            b = torch.pow(prob2 / (KTa + 1e-8), power)
            # Update a
            Kb = torch.bmm(corr, b)
            a = torch.pow(prob1 / (Kb + 1e-8), power)
        # Transportation map
        T = torch.mul(torch.mul(a, corr), b.transpose(1, 2))
        return T
 class PointWiseOptimLayer(nn.Module):
    def __init__(self, nsample, radius, in_channel, mlp, use_curvature=True):
        super().__init__()
        self.nsample = nsample
        self.radius = radius
        self.use_curvature = use_curvature
        self.pos_embed = nn.Sequential(
            nn.Conv1d(3, 32, 1), nn.ReLU(inplace=True), nn.Conv1d(32, 64, 1))
        self.qk_net = nn.Sequential(
            nn.Conv1d(in_channel + 64, in_channel + 64, 1),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channel + 64, in_channel + 64, 1))
        if self.use_curvature:
            self.curvate_net = nn.Sequential(
                nn.Conv1d(3, 32, 1), nn.ReLU(inplace=True),
                nn.Conv1d(32, 32, 1))
            self.mlp_conv = nn.Conv1d(
                in_channel + 64 + 32, mlp[-1], 1, bias=True)
        else:
            self.mlp_conv = nn.Conv1d(in_channel + 64, mlp[-1], 1, bias=True)
    def forward(self,
                pos1,
                pos2,
                feature1,
                feature2,
                nsample,
                radius=None,
                pos1_raw=None,
                return_score=False):
        """
        Input:
            pos1: (batch_size, 3, npoint)
            pos2: (batch_size, 3, npoint)
            feature1: (batch_size, channel, npoint)
            feature2: (batch_size, channel, npoint)
        Output:
            pos1: (batch_size, 3, npoint)
            cost: (batch_size, channel, npoint)
        """
        pos1_t = pos1.permute(0, 2, 1).contiguous()
        pos2_t = pos2.permute(0, 2, 1).contiguous()
        self.nsample = nsample
        self.radius = radius
        dist, idx = pointutils.knn(self.nsample, pos1_t, pos2_t)  # [B, N, K]
        if self.radius is not None:
            tmp_idx = idx[:, :,
                          0].unsqueeze(2).repeat(1, 1,
                                                 self.nsample).to(idx.device)
            idx[dist > self.radius] = tmp_idx[dist > self.radius]
        pos1_embed_norm = self.pos_embed(pos1)
        pos2_embed_norm = self.pos_embed(pos2)  # [B, C1, N]
        feat1_w_pos = torch.cat([feature1, pos1_embed_norm], dim=1)
        feat2_w_pos = torch.cat([feature2, pos2_embed_norm],
                                dim=1)  # [B, C1+C2, N]
        feat1_w_pos = self.qk_net(feat1_w_pos)
        feat2_w_pos = self.qk_net(feat2_w_pos)  # [B, C1+C2, N]
        feat2_grouped = pointutils.grouping_operation(feat2_w_pos,
                                                      idx)  # [B, C1+C2, N, S]
        score = torch.softmax(
            feat1_w_pos.unsqueeze(-1) * feat2_grouped * 1.
            / math.sqrt(feat1_w_pos.shape[1]),
            dim=-1)  # [B, C1+C2, N, S]
        cost = (score * (feat1_w_pos.unsqueeze(-1) - feat2_grouped)**2).sum(
            dim=-1)  # [B, C1+C2, N]
        if self.use_curvature:
            curvate1_raw = curvature(pos1_raw).permute(0, 2, 1)  # [B, 3, N]
            curvate1 = curvature(pos1).permute(0, 2, 1)  # [B, 3, N]
            curvate_cost = self.curvate_net(curvate1_raw) - self.curvate_net(
                curvate1)
            curvate_cost = curvate_cost**2
            cost = self.mlp_conv(torch.cat([cost, curvate_cost],
                                           dim=1))  # [B, C, N]
        else:
            cost = self.mlp_conv(cost)  # [B, C, N]
        if return_score:
            pos2_grouped = pointutils.grouping_operation(pos2,
                                                         idx)  # [B, 3, N, S]
            # [B, N, K]
            index = (dist > self.radius).sum(
                dim=2, keepdim=True).float() > (dist.shape[2] - 0.1
                                                )  # [B, N, 1]
            index = index.unsqueeze(1).repeat(1, score.shape[1], 1,
                                              dist.shape[2])  # [B, N, K]
            score_tmp = score.clone()
            score_tmp[index] = 0.0
            score = score_tmp
            return pos1, cost, score, pos2_grouped
        else:
            return pos1, cost
--- a/modelscope/models/cv/pointcloud_sceneflow_estimation/pointnet2_utils.py
+++ b/modelscope/models/cv/pointcloud_sceneflow_estimation/pointnet2_utils.py
@@ -0,0 +1,360 @@
 # The implementation is adopt from PointNet2, open-sourced under MIT license,
 # made publicy available at https://github.com/sshaoshuai/Pointnet2.PyTorch
 from typing import Tuple
 import pointnet2_cuda as pointnet2
 import torch
 import torch.nn as nn
 from torch.autograd import Function, Variable
 class FurthestPointSampling(Function):
    @staticmethod
    def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
        """
        Uses iterative furthest point sampling to select a set of npoint features that have the largest
        minimum distance
        :param ctx:
        :param xyz: (B, N, 3) where N > npoint
        :param npoint: int, number of features in the sampled set
        :return:
             output: (B, npoint) tensor containing the set
        """
        assert xyz.is_contiguous()
        B, N, _ = xyz.size()
        output = torch.cuda.IntTensor(B, npoint)
        temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
        pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp,
                                                  output)
        return output
    @staticmethod
    def backward(xyz, a=None):
        return None, None
 furthest_point_sample = FurthestPointSampling.apply
 class GatherOperation(Function):
    @staticmethod
    def forward(ctx, features: torch.Tensor,
                idx: torch.Tensor) -> torch.Tensor:
        """
        :param ctx:
        :param features: (B, C, N)
        :param idx: (B, npoint) index tensor of the features to gather
        :return:
            output: (B, C, npoint)
        """
        assert features.is_contiguous()
        assert idx.is_contiguous()
        B, npoint = idx.size()
        _, C, N = features.size()
        output = torch.cuda.FloatTensor(B, C, npoint)
        pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)
        ctx.for_backwards = (idx, C, N)
        return output
    @staticmethod
    def backward(ctx, grad_out):
        idx, C, N = ctx.for_backwards
        B, npoint = idx.size()
        grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
        grad_out_data = grad_out.data.contiguous()
        pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data,
                                             idx, grad_features.data)
        return grad_features, None
 gather_operation = GatherOperation.apply
 class KNN(Function):
    @staticmethod
    def forward(ctx, k: int, unknown: torch.Tensor,
                known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Find the three nearest neighbors of unknown in known
        :param ctx:
        :param unknown: (B, N, 3)
        :param known: (B, M, 3)
        :return:
            dist: (B, N, k) l2 distance to the three nearest neighbors
            idx: (B, N, k) index of 3 nearest neighbors
        """
        assert unknown.is_contiguous()
        assert known.is_contiguous()
        B, N, _ = unknown.size()
        m = known.size(1)
        dist2 = torch.cuda.FloatTensor(B, N, k)
        idx = torch.cuda.IntTensor(B, N, k)
        pointnet2.knn_wrapper(B, N, m, k, unknown, known, dist2, idx)
        return torch.sqrt(dist2), idx
    @staticmethod
    def backward(ctx, a=None, b=None):
        return None, None, None
 knn = KNN.apply
 class ThreeNN(Function):
    @staticmethod
    def forward(ctx, unknown: torch.Tensor,
                known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Find the three nearest neighbors of unknown in known
        :param ctx:
        :param unknown: (B, N, 3)
        :param known: (B, M, 3)
        :return:
            dist: (B, N, 3) l2 distance to the three nearest neighbors
            idx: (B, N, 3) index of 3 nearest neighbors
        """
        assert unknown.is_contiguous()
        assert known.is_contiguous()
        B, N, _ = unknown.size()
        m = known.size(1)
        dist2 = torch.cuda.FloatTensor(B, N, 3)
        idx = torch.cuda.IntTensor(B, N, 3)
        pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
        return torch.sqrt(dist2), idx
    @staticmethod
    def backward(ctx, a=None, b=None):
        return None, None
 three_nn = ThreeNN.apply
 class ThreeInterpolate(Function):
    @staticmethod
    def forward(ctx, features: torch.Tensor, idx: torch.Tensor,
                weight: torch.Tensor) -> torch.Tensor:
        """
        Performs weight linear interpolation on 3 features
        :param ctx:
        :param features: (B, C, M) Features descriptors to be interpolated from
        :param idx: (B, n, 3) three nearest neighbors of the target features in features
        :param weight: (B, n, 3) weights
        :return:
            output: (B, C, N) tensor of the interpolated features
        """
        assert features.is_contiguous()
        assert idx.is_contiguous()
        assert weight.is_contiguous()
        B, c, m = features.size()
        n = idx.size(1)
        ctx.three_interpolate_for_backward = (idx, weight, m)
        output = torch.cuda.FloatTensor(B, c, n)
        pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight,
                                            output)
        return output
    @staticmethod
    def backward(
        ctx, grad_out: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        :param ctx:
        :param grad_out: (B, C, N) tensor with gradients of outputs
        :return:
            grad_features: (B, C, M) tensor with gradients of features
            None:
            None:
        """
        idx, weight, m = ctx.three_interpolate_for_backward
        B, c, n = grad_out.size()
        grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
        grad_out_data = grad_out.data.contiguous()
        pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data,
                                                 idx, weight,
                                                 grad_features.data)
        return grad_features, None, None
 three_interpolate = ThreeInterpolate.apply
 class GroupingOperation(Function):
    @staticmethod
    def forward(ctx, features: torch.Tensor,
                idx: torch.Tensor) -> torch.Tensor:
        """
        :param ctx:
        :param features: (B, C, N) tensor of features to group
        :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with
        :return:
            output: (B, C, npoint, nsample) tensor
        """
        assert features.is_contiguous()
        assert idx.is_contiguous()
        idx = idx.int()
        B, nfeatures, nsample = idx.size()
        _, C, N = features.size()
        output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
        pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features,
                                       idx, output)
        ctx.for_backwards = (idx, N)
        return output
    @staticmethod
    def backward(ctx,
                 grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        :param ctx:
        :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward
        :return:
            grad_features: (B, C, N) gradient of the features
        """
        idx, N = ctx.for_backwards
        B, C, npoint, nsample = grad_out.size()
        grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
        grad_out_data = grad_out.data.contiguous()
        pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample,
                                            grad_out_data, idx,
                                            grad_features.data)
        return grad_features, None
 grouping_operation = GroupingOperation.apply
 class BallQuery(Function):
    @staticmethod
    def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor,
                new_xyz: torch.Tensor) -> torch.Tensor:
        """
        :param ctx:
        :param radius: float, radius of the balls
        :param nsample: int, maximum number of features in the balls
        :param xyz: (B, N, 3) xyz coordinates of the features
        :param new_xyz: (B, npoint, 3) centers of the ball query
        :return:
            idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls
        """
        assert new_xyz.is_contiguous()
        assert xyz.is_contiguous()
        B, N, _ = xyz.size()
        npoint = new_xyz.size(1)
        idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
        pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz,
                                     xyz, idx)
        return idx
    @staticmethod
    def backward(ctx, a=None):
        return None, None, None, None
 ball_query = BallQuery.apply
 class QueryAndGroup(nn.Module):
    def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
        """
        :param radius: float, radius of ball
        :param nsample: int, maximum number of features to gather in the ball
        :param use_xyz:
        """
        super().__init__()
        self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
    def forward(self,
                xyz: torch.Tensor,
                new_xyz: torch.Tensor,
                features: torch.Tensor = None) -> Tuple[torch.Tensor]:
        """
        :param xyz: (B, N, 3) xyz coordinates of the features
        :param new_xyz: (B, npoint, 3) centroids
        :param features: (B, C, N) descriptors of the features
        :return:
            new_features: (B, 3 + C, npoint, nsample)
        """
        # idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
        B, N, C = new_xyz.shape
        dist, idx = knn(self.nsample, new_xyz, xyz)
        if self.radius is not None:
            tmp_idx = idx[:, :,
                          0].unsqueeze(2).repeat(1, 1,
                                                 self.nsample).to(idx.device)
            idx[dist > self.radius] = tmp_idx[dist > self.radius]
        xyz_trans = xyz.transpose(1, 2).contiguous()
        grouped_xyz = grouping_operation(xyz_trans,
                                         idx)  # (B, 3, npoint, nsample)
        grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
        if features is not None:
            grouped_features = grouping_operation(features, idx)
            if self.use_xyz:
                new_features = torch.cat([grouped_xyz, grouped_features],
                                         dim=1)  # (B, C + 3, npoint, nsample)
            else:
                new_features = grouped_features
        else:
            assert self.use_xyz, 'Cannot have not features and not use xyz as a feature!'
            new_features = grouped_xyz
        return new_features, grouped_xyz
 class GroupAll(nn.Module):
    def __init__(self, use_xyz: bool = True):
        super().__init__()
        self.use_xyz = use_xyz
    def forward(self,
                xyz: torch.Tensor,
                new_xyz: torch.Tensor,
                features: torch.Tensor = None):
        """
        :param xyz: (B, N, 3) xyz coordinates of the features
        :param new_xyz: ignored
        :param features: (B, C, N) descriptors of the features
        :return:
            new_features: (B, C + 3, 1, N)
        """
        grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
        if features is not None:
            grouped_features = features.unsqueeze(2)
            if self.use_xyz:
                new_features = torch.cat([grouped_xyz, grouped_features],
                                         dim=1)  # (B, 3 + C, 1, N)
            else:
                new_features = grouped_features
        else:
            new_features = grouped_xyz
        return new_features, grouped_xyz
--- a/modelscope/models/cv/pointcloud_sceneflow_estimation/rcp_model.py
+++ b/modelscope/models/cv/pointcloud_sceneflow_estimation/rcp_model.py
@@ -0,0 +1,64 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os.path as osp
 import numpy as np
 import torch
 from modelscope.metainfo import Models
 from modelscope.models.base.base_torch_model import TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.outputs import OutputKeys
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger
 from .sf_rcp import SF_RCP
 logger = get_logger()
@MODELS.register_module(
    Tasks.pointcloud_sceneflow_estimation,
    module_name=Models.rcp_sceneflow_estimation)
 class SceneFlowEstimation(TorchModel):
    def __init__(self, model_dir: str, **kwargs):
        """str -- model file root."""
        super().__init__(model_dir, **kwargs)
        assert torch.cuda.is_available(
        ), 'current model only support run in gpu'
        # build model
        self.model = SF_RCP(
            npoint=8192,
            use_instance_norm=False,
            model_name='SF_RCP',
            use_insrance_norm=False,
            use_curvature=True)
        # load model
        model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE)
        logger.info(f'load ckpt from:{model_path}')
        checkpoint = torch.load(model_path, map_location='cpu')
        self.model.load_state_dict({k: v for k, v in checkpoint.items()})
        self.model.cuda()
        self.model.eval()
    def forward(self, Inputs):
        return self.model(Inputs['pcd1'], Inputs['pcd2'], Inputs['pcd1'],
                          Inputs['pcd2'])[-1]
    def postprocess(self, Inputs):
        output = Inputs['output']
        results = {OutputKeys.OUTPUT: output.detach().cpu().numpy()[0]}
        return results
    def inference(self, data):
        results = self.forward(data)
        return results
--- a/modelscope/models/cv/pointcloud_sceneflow_estimation/sf_rcp.py
+++ b/modelscope/models/cv/pointcloud_sceneflow_estimation/sf_rcp.py
@@ -0,0 +1,523 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from .common import (PointNetFeaturePropogation, PointNetSetAbstraction,
                     PointWiseOptimLayer, Sinkhorn)
 class FeatureMatching(nn.Module):
    def __init__(self, npoint, use_instance_norm, supporth_th, feature_norm,
                 max_iter):
        super(FeatureMatching, self).__init__()
        self.support_th = supporth_th**2  # 10m
        self.feature_norm = feature_norm
        self.max_iter = max_iter
        # Mass regularisation
        self.gamma = torch.nn.Parameter(torch.zeros(1))
        # Entropic regularisation
        self.epsilon = torch.nn.Parameter(torch.zeros(1))
        self.sinkhorn = Sinkhorn()
        self.extract_glob = FeatureExtractionGlobal(npoint, use_instance_norm)
        # upsample flow
        self.fp0 = PointNetFeaturePropogation(in_channel=3, mlp=[])
        self.sa1 = PointNetSetAbstraction(
            npoint=int(npoint / 16),
            radius=None,
            nsample=16,
            in_channel=3,
            mlp=[32, 32, 64],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.fp1 = PointNetFeaturePropogation(in_channel=64, mlp=[])
        self.sa2 = PointNetSetAbstraction(
            npoint=int(npoint / 8),
            radius=None,
            nsample=16,
            in_channel=64,
            mlp=[64, 64, 128],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.fp2 = PointNetFeaturePropogation(in_channel=128, mlp=[])
        self.flow_regressor = FlowRegressor(npoint, use_instance_norm)
        self.flow_up_sample = PointNetFeaturePropogation(in_channel=3, mlp=[])
    def upsample_flow(self, pc1_l, pc1_l_glob, flow_inp):
        """
            flow_inp: [B, N, 3]
            return: [B, 3, N]
        """
        flow_inp = flow_inp.permute(0, 2, 1).contiguous()  # [B, 3, N]
        flow_feat = self.fp0(pc1_l_glob['s16'], pc1_l_glob['s32'], None,
                             flow_inp)
        _, corr_feats_l2 = self.sa1(pc1_l_glob['s16'], flow_feat)
        flow_feat = self.fp1(pc1_l_glob['s8'], pc1_l_glob['s16'], None,
                             corr_feats_l2)
        _, flow_feat = self.sa2(pc1_l_glob['s8'], flow_feat)
        flow_feat = self.fp2(pc1_l['s4'], pc1_l_glob['s8'], None, flow_feat)
        flow, flow_lr = self.flow_regressor(pc1_l, flow_feat)
        flow_up = self.flow_up_sample(pc1_l['s1'], pc1_l_glob['s32'], None,
                                      flow_inp)
        flow_lr_up = self.flow_up_sample(pc1_l['s4'], pc1_l_glob['s32'], None,
                                         flow_inp)
        flow, flow_lr = flow + flow_up, flow_lr + flow_lr_up
        return flow, flow_lr
    def calc_feats_corr(self, pcloud1, pcloud2, feature1, feature2, norm):
        """
            pcloud1, pcloud2: [B, N, 3]
            feature1, feature2: [B, N, C]
        """
        if norm:
            feature1 = feature1 / torch.sqrt(
                torch.sum(feature1**2, -1, keepdim=True) + 1e-6)
            feature2 = feature2 / torch.sqrt(
                torch.sum(feature2**2, -1, keepdim=True) + 1e-6)
            corr_mat = torch.bmm(feature1,
                                 feature2.transpose(1, 2))  # [B, N1, N2]
        else:
            corr_mat = torch.bmm(feature1, feature2.transpose(
                1, 2)) / feature1.shape[2]**.5  # [B, N1, N2]
        if self.support_th is not None:
            distance_matrix = torch.sum(
                pcloud1**2, -1, keepdim=True)  # [B, N1, 1]
            distance_matrix = distance_matrix + torch.sum(
                pcloud2**2, -1, keepdim=True).transpose(1, 2)  # [B, N1, N2]
            distance_matrix = distance_matrix - 2 * torch.bmm(
                pcloud1, pcloud2.transpose(1, 2))  # [B, N1, N2]
            support = (distance_matrix < self.support_th)  # [B, N1, N2]
            support = support.float()
        else:
            support = torch.ones_like(corr_mat)
        return corr_mat, support
    def calc_corr_mat(self, pcloud1, pcloud2, feature1, feature2):
        """
            pcloud1, pcloud2: [B, N, 3]
            feature1, feature2: [B, N, C]
            corr_mat: [B, N1, N2]
        """
        epsilon = torch.exp(self.epsilon) + 0.03
        corr_mat, support = self.calc_feats_corr(
            pcloud1, pcloud2, feature1, feature2, norm=self.feature_norm)
        C = 1.0 - corr_mat
        corr_mat = torch.exp(-C / epsilon) * support
        return corr_mat
    def get_flow_init(self, pcloud1, pcloud2, feats1, feats2):
        """
            pcloud1, pcloud2: [B, 3, N]
            feats1, feats2: [B, C, N]
        """
        corr_mat = self.calc_corr_mat(
            pcloud1.permute(0, 2, 1), pcloud2.permute(0, 2, 1),
            feats1.permute(0, 2, 1), feats2.permute(0, 2, 1))
        corr_mat = self.sinkhorn(corr_mat,
                                 torch.exp(self.epsilon) + 0.03, self.gamma,
                                 self.max_iter)
        row_sum = corr_mat.sum(-1, keepdim=True)  # [B, N1, 1]
        flow_init = (corr_mat @ pcloud2.permute(0, 2, 1).contiguous()) / (
            row_sum + 1e-6) - pcloud1.permute(0, 2,
                                              1).contiguous()  # [B, N1, 3]
        return flow_init
    def forward(self, pc1_l, pc2_l, feats1, feats2):
        """
        pc1_l, pc2_l: dict([B, 3, N])
        feats1, feats2: [B, C, N]
        """
        pc1_l_glob, feats1_glob = self.extract_glob(pc1_l['s4'], feats1)
        pc2_l_glob, feats2_glob = self.extract_glob(pc2_l['s4'], feats2)
        flow_init_s32 = self.get_flow_init(pc1_l_glob['s32'],
                                           pc2_l_glob['s32'], feats1_glob,
                                           feats2_glob)
        flow_init, flow_init_s4 = self.upsample_flow(pc1_l, pc1_l_glob,
                                                     flow_init_s32)
        return flow_init, flow_init_s4
 class FlowRegressor(nn.Module):
    def __init__(self, npoint, use_instance_norm, input_dim=128, nsample=32):
        super(FlowRegressor, self).__init__()
        self.sa1 = PointNetSetAbstraction(
            npoint=int(npoint / 4),
            radius=None,
            nsample=nsample,
            in_channel=input_dim,
            mlp=[input_dim, input_dim],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.sa2 = PointNetSetAbstraction(
            npoint=int(npoint / 4),
            radius=None,
            nsample=nsample,
            in_channel=input_dim,
            mlp=[input_dim, input_dim],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.fc = nn.Sequential(
            nn.Linear(input_dim, input_dim), nn.ReLU(inplace=True),
            nn.Linear(input_dim, 3))
        self.up_sample = PointNetFeaturePropogation(in_channel=3, mlp=[])
    def forward(self, pc1_l, feats):
        """
            pc1_l: dict([B, 3, N])
            feats: [B, C, N]
            return: [B, 3, N]
        """
        _, x = self.sa1(pc1_l['s4'], feats)
        _, x = self.sa2(pc1_l['s4'], x)
        x = x.permute(0, 2, 1).contiguous()  # [B, N, C]
        x = self.fc(x)
        flow_lr = x.permute(0, 2, 1).contiguous()  # [B, 3, N]
        flow = self.up_sample(pc1_l['s1'], pc1_l['s4'], None,
                              flow_lr)  # [B, 3, N]
        return flow, flow_lr
 class FeatureExtractionGlobal(nn.Module):
    def __init__(self, npoint, use_instance_norm):
        super(FeatureExtractionGlobal, self).__init__()
        self.sa1 = PointNetSetAbstraction(
            npoint=int(npoint / 8),
            radius=None,
            nsample=32,
            in_channel=64,
            mlp=[128, 128, 128],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.sa2 = PointNetSetAbstraction(
            npoint=int(npoint / 16),
            radius=None,
            nsample=24,
            in_channel=128,
            mlp=[128, 128, 128],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.sa3 = PointNetSetAbstraction(
            npoint=int(npoint / 32),
            radius=None,
            nsample=16,
            in_channel=128,
            mlp=[256, 256, 256],
            group_all=False,
            use_instance_norm=use_instance_norm)
    def forward(self, pc, feature):
        pc_l1, feat_l1 = self.sa1(pc, feature)
        pc_l2, feat_l2 = self.sa2(pc_l1, feat_l1)
        pc_l3, feat_l3 = self.sa3(pc_l2, feat_l2)
        pc_l = dict(s8=pc_l1, s16=pc_l2, s32=pc_l3)
        return pc_l, feat_l3
 class FeatureExtraction(nn.Module):
    def __init__(self, npoint, use_instance_norm):
        super(FeatureExtraction, self).__init__()
        self.sa1 = PointNetSetAbstraction(
            npoint=int(npoint / 2),
            radius=None,
            nsample=32,
            in_channel=3,
            mlp=[32, 32, 32],
            group_all=False,
            return_fps=True,
            use_instance_norm=use_instance_norm)
        self.sa2 = PointNetSetAbstraction(
            npoint=int(npoint / 4),
            radius=None,
            nsample=32,
            in_channel=32,
            mlp=[64, 64, 64],
            group_all=False,
            return_fps=True,
            use_instance_norm=use_instance_norm)
    def forward(self, pc, feature, fps_idx=None):
        """
            pc: [B, 3, N]
            feature: [B, 3, N]
        """
        fps_idx1 = fps_idx['s2'] if fps_idx is not None else None
        pc_l1, feat_l1, fps_idx1 = self.sa1(pc, feature, fps_idx=fps_idx1)
        fps_idx2 = fps_idx['s4'] if fps_idx is not None else None
        pc_l2, feat_l2, fps_idx2 = self.sa2(pc_l1, feat_l1, fps_idx=fps_idx2)
        pc_l = dict(s1=pc, s2=pc_l1, s4=pc_l2)
        fps_idx = dict(s2=fps_idx1, s4=fps_idx2)
        return pc_l, feat_l2, fps_idx
 class HiddenInitNet(nn.Module):
    def __init__(self, npoint, use_instance_norm):
        super(HiddenInitNet, self).__init__()
        self.sa1 = PointNetSetAbstraction(
            npoint=int(npoint / 4),
            radius=None,
            nsample=8,
            in_channel=64,
            mlp=[128, 128, 128],
            group_all=False,
            use_instance_norm=use_instance_norm)
        self.sa2 = PointNetSetAbstraction(
            npoint=int(npoint / 4),
            radius=None,
            nsample=8,
            in_channel=128,
            mlp=[128],
            group_all=False,
            use_act=False,
            use_instance_norm=use_instance_norm)
    def forward(self, pc, feature):
        _, feat_l1 = self.sa1(pc, feature)
        _, feat_l2 = self.sa2(pc, feat_l1)
        h_init = torch.tanh(feat_l2)
        return h_init
 class GRUReg(nn.Module):
    def __init__(self, npoint, hidden_dim, input_dim, use_instance_norm):
        super().__init__()
        in_ch = hidden_dim + input_dim
        self.flow_proj = nn.ModuleList([
            PointNetSetAbstraction(
                npoint=int(npoint / 4),
                radius=None,
                nsample=16,
                in_channel=3,
                mlp=[32, 32, 32],
                group_all=False,
                use_instance_norm=use_instance_norm),
            PointNetSetAbstraction(
                npoint=int(npoint / 4),
                radius=None,
                nsample=8,
                in_channel=32,
                mlp=[16, 16, 16],
                group_all=False,
                use_instance_norm=use_instance_norm)
        ])
        self.hidden_init_net = HiddenInitNet(npoint, use_instance_norm)
        self.gru_layers = nn.ModuleList([
            PointNetSetAbstraction(
                npoint=int(npoint / 4),
                radius=None,
                nsample=4,
                in_channel=in_ch,
                mlp=[hidden_dim],
                group_all=False,
                use_act=False,
                use_instance_norm=use_instance_norm),
            PointNetSetAbstraction(
                npoint=int(npoint / 4),
                radius=None,
                nsample=4,
                in_channel=in_ch,
                mlp=[hidden_dim],
                group_all=False,
                use_act=False,
                use_instance_norm=use_instance_norm),
            PointNetSetAbstraction(
                npoint=int(npoint / 4),
                radius=None,
                nsample=4,
                in_channel=in_ch,
                mlp=[hidden_dim],
                group_all=False,
                use_act=False,
                use_instance_norm=use_instance_norm)
        ])
    def gru(self, h, gru_inp, pc):
        hx = torch.cat([h, gru_inp], dim=1)
        z = torch.sigmoid(self.gru_layers[0](pc, hx)[1])
        r = torch.sigmoid(self.gru_layers[1](pc, hx)[1])
        q = torch.tanh(self.gru_layers[2](pc, torch.cat([r * h, gru_inp],
                                                        dim=1))[1])
        h = (1 - z) * h + z * q
        return h
    def get_gru_input(self, feats1_new, cost, flow, pc):
        flow_feats = flow
        for flow_conv in self.flow_proj:
            _, flow_feats = flow_conv(pc, flow_feats)
        gru_inp = torch.cat([feats1_new, cost, flow_feats, flow],
                            dim=1)  # [64, 128, 16, 3]
        return gru_inp
    def forward(self, h, feats1_new, cost, flow_lr, pc1_l):
        gru_inp = self.get_gru_input(feats1_new, cost, flow_lr, pc=pc1_l['s4'])
        h = self.gru(h, gru_inp, pc1_l['s4'])
        return h
 class SF_RCP(nn.Module):
    def __init__(self, npoint=8192, use_instance_norm=False, **kwargs):
        super().__init__()
        self.radius = kwargs.get('radius', 3.5)
        self.nsample = kwargs.get('nsample', 6)
        self.radius_min = kwargs.get('radius_min', 3.5)
        self.nsample_min = kwargs.get('nsample_min', 6)
        self.use_curvature = kwargs.get('use_curvature', True)
        self.flow_ratio = kwargs.get('flow_ratio', 0.1)
        self.init_max_iter = kwargs.get('init_max_iter', 0)
        self.init_feature_norm = kwargs.get('init_feature_norm', True)
        self.support_th = kwargs.get('support_th', 10)
        self.feature_extraction = FeatureExtraction(npoint, use_instance_norm)
        self.feature_matching = FeatureMatching(
            npoint,
            use_instance_norm,
            supporth_th=self.support_th,
            feature_norm=self.init_feature_norm,
            max_iter=self.init_max_iter)
        self.pointwise_optim_layer = PointWiseOptimLayer(
            nsample=self.nsample,
            radius=self.radius,
            in_channel=64,
            mlp=[128, 128, 128],
            use_curvature=self.use_curvature)
        self.gru = GRUReg(
            npoint,
            hidden_dim=128,
            input_dim=128 + 64 + 16 + 3,
            use_instance_norm=use_instance_norm)
        self.flow_regressor = FlowRegressor(npoint, use_instance_norm)
    def initialization(self, pc1_l, pc2_l, feats1, feats2):
        """
            pc1: [B, 3, N]
            pc2: [B, 3, N]
            feature1: [B, 3, N]
            feature2: [B, 3, N]
        """
        flow, flow_lr = self.feature_matching(pc1_l, pc2_l, feats1, feats2)
        return flow, flow_lr
    def pointwise_optimization(self, pc1_l_new, pc2_l, feats1_new, feats2,
                               pc1_l, flow_lr, iter):
        _, cost, score, pos2_grouped = self.pointwise_optim_layer(
            pc1_l_new['s4'],
            pc2_l['s4'],
            feats1_new,
            feats2,
            nsample=max(self.nsample_min, self.nsample // (2**iter)),
            radius=max(self.radius_min, self.radius / (2**iter)),
            pos1_raw=pc1_l['s4'],
            return_score=True)
        # pc1_new_l_loc: [B, 3, N, S]
        # pos2_grouped: [B, C, N, S]
        delta_flow_tmp = ((pos2_grouped - pc1_l_new['s4'].unsqueeze(-1))
                          * score.mean(dim=1, keepdim=True)).sum(
                              dim=-1)  # [B, 3, N]
        flow_lr = flow_lr + self.flow_ratio * delta_flow_tmp
        return flow_lr, cost
    def update_pos(self, pc, pc_lr, flow, flow_lr):
        pc = pc + flow
        pc_lr = pc_lr + flow_lr
        return pc, pc_lr
    def forward(self, pc1, pc2, feature1, feature2, iters=1):
        """
            pc1: [B, N, 3]
            pc2: [B, N, 3]
            feature1: [B, N, 3]
            feature2: [B, N, 3]
        """
        # prepare
        flow_predictions = []
        pc1 = pc1.permute(0, 2, 1).contiguous()  # B 3 N
        pc2 = pc2.permute(0, 2, 1).contiguous()  # B 3 N
        feature1 = feature1.permute(0, 2, 1).contiguous()  # B 3 N
        feature2 = feature2.permute(0, 2, 1).contiguous()  # B 3 N
        # feature extraction
        pc1_l, feats1, fps_idx1 = self.feature_extraction(pc1, feature1)
        pc2_l, feats2, _ = self.feature_extraction(pc2, feature2)
        # initialization, flow_lr_init(flow_low_resolution)
        flow_init, flow_lr_init = self.initialization(pc1_l, pc2_l, feats1,
                                                      feats2)
        flow_predictions.append(flow_init.permute(0, 2, 1))
        # gru init hidden state
        h = self.gru.hidden_init_net(pc1_l['s4'], feats1)
        # update position
        pc1_lr_raw = pc1_l['s4']
        pc1_new, pc1_lr_new = self.update_pos(pc1, pc1_lr_raw, flow_init,
                                              flow_lr_init)
        # iterative optim
        for iter in range(iters - 1):
            pc1_new = pc1_new.detach()
            pc1_lr_new = pc1_lr_new.detach()
            flow_lr = pc1_lr_new - pc1_lr_raw
            pc1_l_new, feats1_new, _ = self.feature_extraction(
                pc1_new, pc1_new, fps_idx1)
            # pointwise optimization to get udpated flow_lr and cost
            flow_lr_update, cost = self.pointwise_optimization(
                pc1_l_new, pc2_l, feats1_new, feats2, pc1_l, flow_lr, iter)
            flow_lr = flow_lr_update
            # gru regularization
            h = self.gru(h, feats1_new, cost, flow_lr, pc1_l)
            # pred flow_lr
            delta_flow, delta_flow_lr = self.flow_regressor(pc1_l, h)
            pc1_new, pc1_lr_new = self.update_pos(pc1_new, pc1_lr_new,
                                                  delta_flow, delta_flow_lr)
            flow = pc1_new - pc1
            flow_predictions.append(flow.permute(0, 2, 1))
        return flow_predictions
--- a/modelscope/outputs/outputs.py
+++ b/modelscope/outputs/outputs.py
@@ -50,6 +50,8 @@ class OutputKeys(object):
    SCENE_NUM = 'scene_num'
    SCENE_META_LIST = 'scene_meta_list'
    SHOT_META_LIST = 'shot_meta_list'
    PCD12 = 'pcd12'
    PCD12_ALIGN = 'pcd12_align'
 TASK_OUTPUTS = {
--- a/modelscope/pipelines/cv/init.py
+++ b/modelscope/pipelines/cv/init.py
@@ -70,6 +70,7 @@ if TYPE_CHECKING:
    from .image_skychange_pipeline import ImageSkychangePipeline
    from .video_object_segmentation_pipeline import VideoObjectSegmentationPipeline
    from .video_super_resolution_pipeline import VideoSuperResolutionPipeline
    from .pointcloud_sceneflow_estimation_pipeline import PointCloudSceneFlowEstimationPipeline
 else:
    _import_structure = {
@@ -162,6 +163,9 @@ else:
            'VideoObjectSegmentationPipeline'
        ],
        'video_super_resolution_pipeline': ['VideoSuperResolutionPipeline'],
        'pointcloud_sceneflow_estimation_pipeline': [
            'PointCloudSceneFlowEstimationPipeline'
        ]
    }
    import sys
--- a/modelscope/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py
+++ b/modelscope/pipelines/cv/pointcloud_sceneflow_estimation_pipeline.py
@@ -0,0 +1,114 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import Any, Dict, Union
 import numpy as np
 import torch
 from plyfile import PlyData, PlyElement
 from modelscope.metainfo import Pipelines
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Input, Model, Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import LoadImage
 from modelscope.utils.constant import Tasks
 from modelscope.utils.cv.image_utils import depth_to_color
 from modelscope.utils.logger import get_logger
 logger = get_logger()
@PIPELINES.register_module(
    Tasks.pointcloud_sceneflow_estimation,
    module_name=Pipelines.pointcloud_sceneflow_estimation)
 class PointCloudSceneFlowEstimationPipeline(Pipeline):
    def __init__(self, model: str, **kwargs):
        """
        use `model` to create a image depth estimation pipeline for prediction
        Args:
            model: model id on modelscope hub.
        """
        super().__init__(model=model, **kwargs)
        logger.info('pointcloud scenflow estimation model, pipeline init')
    def check_input_pcd(self, pcd):
        assert pcd.ndim == 2, 'pcd ndim must equal to 2'
        assert pcd.shape[1] == 3, 'pcd.shape[1] must equal to 3'
    def preprocess(self, input: Input) -> Dict[str, Any]:
        assert isinstance(input, tuple), 'only support tuple input'
        assert isinstance(input[0], str) and isinstance(
            input[1], str), 'only support tuple input with str type'
        pcd1_file, pcd2_file = input
        logger.info(f'input pcd file:{pcd1_file},  \n  {pcd2_file}')
        pcd1 = np.load(pcd1_file)
        pcd2 = np.load(pcd2_file)
        self.check_input_pcd(pcd1)
        self.check_input_pcd(pcd2)
        pcd1_torch = torch.from_numpy(pcd1).float().unsqueeze(0).cuda()
        pcd2_torch = torch.from_numpy(pcd2).float().unsqueeze(0).cuda()
        data = {
            'pcd1': pcd1_torch,
            'pcd2': pcd2_torch,
            'pcd1_ori': pcd1,
            'pcd2_ori': pcd2
        }
        return data
    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
        results = {}
        output = self.model.inference(input)
        results['output'] = output
        results['pcd1_ori'] = input['pcd1_ori']
        results['pcd2_ori'] = input['pcd2_ori']
        return results
    def save_ply_data(self, pcd1, pcd2):
        vertexs = np.concatenate([pcd1, pcd2], axis=0)
        color1 = np.array([[255, 0, 0]], dtype=np.uint8)
        color2 = np.array([[0, 255, 0]], dtype=np.uint8)
        color1 = np.tile(color1, (pcd1.shape[0], 1))
        color2 = np.tile(color2, (pcd2.shape[0], 1))
        vertex_colors = np.concatenate([color1, color2], axis=0)
        vertexs = np.array([tuple(v) for v in vertexs],
                           dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')])
        vertex_colors = np.array([tuple(v) for v in vertex_colors],
                                 dtype=[('red', 'u1'), ('green', 'u1'),
                                        ('blue', 'u1')])
        vertex_all = np.empty(
            len(vertexs), vertexs.dtype.descr + vertex_colors.dtype.descr)
        for prop in vertexs.dtype.names:
            vertex_all[prop] = vertexs[prop]
        for prop in vertex_colors.dtype.names:
            vertex_all[prop] = vertex_colors[prop]
        el = PlyElement.describe(vertex_all, 'vertex')
        ply_data = PlyData([el])
        # .write(save_name)
        return ply_data
    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        results = self.model.postprocess(inputs)
        flow = results[OutputKeys.OUTPUT]
        pcd1 = inputs['pcd1_ori']
        pcd2 = inputs['pcd2_ori']
        if isinstance(pcd1, torch.Tensor):
            pcd1 = pcd1.cpu().numpy()
        if isinstance(pcd2, torch.Tensor):
            pcd2 = pcd2.cpu().numpy()
        if isinstance(flow, torch.Tensor):
            flow = flow.cpu().numpy()
        outputs = {
            OutputKeys.OUTPUT: flow,
            OutputKeys.PCD12: self.save_ply_data(pcd1, pcd2),
            OutputKeys.PCD12_ALIGN: self.save_ply_data(pcd1 + flow, pcd2),
        }
        return outputs
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -104,6 +104,9 @@ class CVTasks(object):
    video_summarization = 'video-summarization'
    image_reid_person = 'image-reid-person'
    # pointcloud task
    pointcloud_sceneflow_estimation = 'pointcloud-sceneflow-estimation'
 class NLPTasks(object):
    # nlp tasks
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -24,6 +24,7 @@ onnxruntime>=1.10
 opencv-python
 pai-easycv>=0.6.3.9
 pandas
 plyfile>=0.7.4
 psutil
 regex
 scikit-image>=0.19.3
--- a/tests/pipelines/test_pointcloud_sceneflow_estimation.py
+++ b/tests/pipelines/test_pointcloud_sceneflow_estimation.py
@@ -0,0 +1,42 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 import numpy as np
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 class PointCloudSceneFlowEstimationTest(unittest.TestCase,
                                        DemoCompatibilityCheck):
    def setUp(self) -> None:
        self.task = 'pointcloud-sceneflow-estimation'
        self.model_id = 'damo/cv_pointnet2_sceneflow-estimation_general'
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_pointcloud_scenelfow_estimation(self):
        input_location = ('data/test/pointclouds/flyingthings_pcd1.npy',
                          'data/test/pointclouds/flyingthings_pcd2.npy')
        estimator = pipeline(
            Tasks.pointcloud_sceneflow_estimation, model=self.model_id)
        result = estimator(input_location)
        flow = result[OutputKeys.OUTPUT]
        pcd12 = result[OutputKeys.PCD12]
        pcd12_align = result[OutputKeys.PCD12_ALIGN]
        print(f'pred flow shape:{flow.shape}')
        np.save('flow.npy', flow)
        # visualization
        pcd12.write('pcd12.ply')
        pcd12_align.write('pcd12_align.ply')
        print('test_pointcloud_scenelfow_estimation DONE')
 if __name__ == '__main__':
    unittest.main()