mirror of
https://github.com/modelscope/modelscope.git
synced 2026-02-24 04:01:10 +01:00
add cv/image-defrcn-fewshot-detection
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11364804 * add model defrcn-fewshot-detection * add requirements check
This commit is contained in:
3
data/test/images/image_voc2007_000001.jpg
Normal file
3
data/test/images/image_voc2007_000001.jpg
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3f0bdad67d01aa452929683b74a124a2926b6bce534c85f3ee0f00e20eeacab0
|
||||
size 78771
|
||||
@@ -76,6 +76,7 @@ class Models(object):
|
||||
image_casmvs_depth_estimation = 'image-casmvs-depth-estimation'
|
||||
vop_retrieval_model = 'vop-retrieval-model'
|
||||
ddcolor = 'ddcolor'
|
||||
defrcn = 'defrcn'
|
||||
image_face_fusion = 'image-face-fusion'
|
||||
|
||||
# EasyCV models
|
||||
@@ -296,6 +297,7 @@ class Pipelines(object):
|
||||
image_multi_view_depth_estimation = 'image-multi-view-depth-estimation'
|
||||
vop_retrieval = 'vop-video-text-retrieval'
|
||||
ddcolor_image_colorization = 'ddcolor-image-colorization'
|
||||
image_fewshot_detection = 'image-fewshot-detection'
|
||||
image_face_fusion = 'image-face-fusion'
|
||||
|
||||
# nlp tasks
|
||||
@@ -416,6 +418,7 @@ class Trainers(object):
|
||||
referring_video_object_segmentation = 'referring-video-object-segmentation'
|
||||
image_classification_team = 'image-classification-team'
|
||||
image_classification = 'image-classification'
|
||||
image_fewshot_detection = 'image-fewshot-detection'
|
||||
|
||||
# nlp trainers
|
||||
bert_sentiment_analysis = 'bert-sentiment-analysis'
|
||||
|
||||
@@ -5,20 +5,20 @@ from . import (action_recognition, animal_recognition, body_2d_keypoints,
|
||||
body_3d_keypoints, cartoon, cmdssl_video_embedding,
|
||||
crowd_counting, face_2d_keypoints, face_detection,
|
||||
face_generation, human_wholebody_keypoint, image_classification,
|
||||
image_color_enhance, image_colorization, image_denoise,
|
||||
image_inpainting, image_instance_segmentation, image_matching,
|
||||
image_mvs_depth_estimation, image_panoptic_segmentation,
|
||||
image_portrait_enhancement, image_reid_person,
|
||||
image_semantic_segmentation, image_to_image_generation,
|
||||
image_to_image_translation, language_guided_video_summarization,
|
||||
movie_scene_segmentation, object_detection,
|
||||
panorama_depth_estimation, pointcloud_sceneflow_estimation,
|
||||
product_retrieval_embedding, realtime_object_detection,
|
||||
referring_video_object_segmentation, salient_detection,
|
||||
shop_segmentation, super_resolution, video_frame_interpolation,
|
||||
video_object_segmentation, video_single_object_tracking,
|
||||
video_stabilization, video_summarization,
|
||||
video_super_resolution, virual_tryon, vision_middleware,
|
||||
vop_retrieval)
|
||||
image_color_enhance, image_colorization, image_defrcn_fewshot,
|
||||
image_denoise, image_inpainting, image_instance_segmentation,
|
||||
image_matching, image_mvs_depth_estimation,
|
||||
image_panoptic_segmentation, image_portrait_enhancement,
|
||||
image_reid_person, image_semantic_segmentation,
|
||||
image_to_image_generation, image_to_image_translation,
|
||||
language_guided_video_summarization, movie_scene_segmentation,
|
||||
object_detection, panorama_depth_estimation,
|
||||
pointcloud_sceneflow_estimation, product_retrieval_embedding,
|
||||
realtime_object_detection, referring_video_object_segmentation,
|
||||
salient_detection, shop_segmentation, super_resolution,
|
||||
video_frame_interpolation, video_object_segmentation,
|
||||
video_single_object_tracking, video_stabilization,
|
||||
video_summarization, video_super_resolution, virual_tryon,
|
||||
vision_middleware, vop_retrieval)
|
||||
|
||||
# yapf: enable
|
||||
|
||||
20
modelscope/models/cv/image_defrcn_fewshot/__init__.py
Normal file
20
modelscope/models/cv/image_defrcn_fewshot/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .defrcn_for_fewshot import DeFRCNForFewShot
|
||||
|
||||
else:
|
||||
_import_structure = {'defrcn_for_fewshot': ['DeFRCNForFewShot']}
|
||||
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__,
|
||||
extra_objects={},
|
||||
)
|
||||
@@ -0,0 +1,80 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch
|
||||
|
||||
from modelscope.metainfo import Models
|
||||
from modelscope.models.base.base_torch_model import TorchModel
|
||||
from modelscope.models.builder import MODELS
|
||||
from modelscope.utils.config import Config
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
from modelscope.utils.logger import get_logger
|
||||
from .models.defaults_config import _C
|
||||
from .models.defrcn import DeFRCN
|
||||
from .utils.requirements_check import requires_version
|
||||
|
||||
logger = get_logger()
|
||||
__all__ = ['DeFRCNForFewShot']
|
||||
|
||||
|
||||
@MODELS.register_module(
|
||||
Tasks.image_fewshot_detection, module_name=Models.defrcn)
|
||||
class DeFRCNForFewShot(TorchModel):
|
||||
""" Few-shot object detection model DeFRCN. The model requires detectron2-0.3 and pytorch-1.11.
|
||||
Model config params mainly from detectron2, you can use detectron2 config file to initialize model.
|
||||
Detail configs can be visited on detectron2.config.defaults and .models.defaults_config.
|
||||
"""
|
||||
|
||||
def __init__(self, model_dir: str, *args, **kwargs):
|
||||
"""initialize the few-shot defrcn model from the `model_dir` path.
|
||||
|
||||
Args:
|
||||
model_dir (str): the model path.
|
||||
|
||||
"""
|
||||
requires_version()
|
||||
|
||||
super().__init__(model_dir, *args, **kwargs)
|
||||
|
||||
self.model_dir = model_dir
|
||||
self.config = Config.from_file(
|
||||
os.path.join(self.model_dir, ModelFile.CONFIGURATION))
|
||||
|
||||
if 'config_path' in kwargs:
|
||||
self.config.merge_from_dict(
|
||||
{'model.config_path': kwargs['config_path']})
|
||||
|
||||
self.model_cfg = _C.clone()
|
||||
self.model_cfg.merge_from_file(
|
||||
os.path.join(model_dir, self.config.model.config_path))
|
||||
|
||||
if 'model_weights' in kwargs:
|
||||
self.model_cfg.merge_from_list(
|
||||
['MODEL.WEIGHTS', kwargs['model_weights']])
|
||||
|
||||
self.model_cfg.freeze()
|
||||
|
||||
self.model = DeFRCN(self.model_cfg)
|
||||
|
||||
def forward(self, inputs) -> Any:
|
||||
"""return the result by the model
|
||||
|
||||
Args:
|
||||
inputs (list): the preprocessed data
|
||||
|
||||
Returns:
|
||||
Any: results
|
||||
"""
|
||||
if self.training:
|
||||
return self.model.forward(inputs)
|
||||
else:
|
||||
return self.model.inference(inputs)
|
||||
|
||||
def inference(self, input: Dict[str, Any]) -> Any:
|
||||
with torch.no_grad():
|
||||
results = self.model([input])
|
||||
return results[0] if len(results) > 0 else None
|
||||
|
||||
def get_model_cfg(self):
|
||||
return self.model_cfg
|
||||
20
modelscope/models/cv/image_defrcn_fewshot/models/__init__.py
Normal file
20
modelscope/models/cv/image_defrcn_fewshot/models/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from modelscope.utils.import_utils import LazyImportModule
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .defrcn import DeFRCN
|
||||
|
||||
else:
|
||||
_import_structure = {'defrcn': ['DeFRCN']}
|
||||
|
||||
import sys
|
||||
|
||||
sys.modules[__name__] = LazyImportModule(
|
||||
__name__,
|
||||
globals()['__file__'],
|
||||
_import_structure,
|
||||
module_spec=__spec__,
|
||||
extra_objects={},
|
||||
)
|
||||
@@ -0,0 +1,38 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/config/defaults.py
|
||||
|
||||
from detectron2.config.defaults import _C
|
||||
|
||||
_CC = _C
|
||||
|
||||
# ----------- Backbone ----------- #
|
||||
_CC.MODEL.BACKBONE.FREEZE = False
|
||||
_CC.MODEL.BACKBONE.FREEZE_AT = 3
|
||||
|
||||
# ------------- RPN -------------- #
|
||||
_CC.MODEL.RPN.FREEZE = False
|
||||
_CC.MODEL.RPN.ENABLE_DECOUPLE = False
|
||||
_CC.MODEL.RPN.BACKWARD_SCALE = 1.0
|
||||
|
||||
# ------------- ROI -------------- #
|
||||
_CC.MODEL.ROI_HEADS.NAME = 'Res5ROIHeads'
|
||||
_CC.MODEL.ROI_HEADS.FREEZE_FEAT = False
|
||||
_CC.MODEL.ROI_HEADS.ENABLE_DECOUPLE = False
|
||||
_CC.MODEL.ROI_HEADS.BACKWARD_SCALE = 1.0
|
||||
_CC.MODEL.ROI_HEADS.OUTPUT_LAYER = 'FastRCNNOutputLayers'
|
||||
_CC.MODEL.ROI_HEADS.CLS_DROPOUT = False
|
||||
_CC.MODEL.ROI_HEADS.DROPOUT_RATIO = 0.8
|
||||
_CC.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 7 # for faster
|
||||
|
||||
# ------------- TEST ------------- #
|
||||
_CC.TEST.PCB_ENABLE = False
|
||||
_CC.TEST.PCB_MODELTYPE = 'resnet' # res-like
|
||||
_CC.TEST.PCB_MODELPATH = ''
|
||||
_CC.TEST.PCB_ALPHA = 0.50
|
||||
_CC.TEST.PCB_UPPER = 1.0
|
||||
_CC.TEST.PCB_LOWER = 0.05
|
||||
|
||||
# ------------ Other ------------- #
|
||||
_CC.SOLVER.WEIGHT_DECAY = 5e-5
|
||||
_CC.MUTE_HEADER = True
|
||||
179
modelscope/models/cv/image_defrcn_fewshot/models/defrcn.py
Normal file
179
modelscope/models/cv/image_defrcn_fewshot/models/defrcn.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/modeling/meta_arch/rcnn.py
|
||||
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
import torch
|
||||
from detectron2.layers import ShapeSpec
|
||||
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
|
||||
from detectron2.modeling.backbone.resnet import build_resnet_backbone
|
||||
from detectron2.modeling.box_regression import Box2BoxTransform
|
||||
from detectron2.modeling.matcher import Matcher
|
||||
from detectron2.modeling.postprocessing import detector_postprocess
|
||||
from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead
|
||||
from detectron2.structures import ImageList
|
||||
from torch import nn
|
||||
|
||||
from .gdl import AffineLayer, decouple_layer
|
||||
from .roi_heads import Res5ROIHeads
|
||||
|
||||
|
||||
class DeFRCN(nn.Module):
|
||||
|
||||
def __init__(self, cfg):
|
||||
super().__init__()
|
||||
self.cfg = cfg
|
||||
|
||||
self.device = torch.device(cfg.MODEL.DEVICE)
|
||||
|
||||
self.backbone = build_resnet_backbone(
|
||||
cfg, ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
|
||||
self._SHAPE_ = self.backbone.output_shape()
|
||||
|
||||
rpn_config = DeFRCN.from_rpn_config(cfg, self._SHAPE_)
|
||||
self.proposal_generator = RPN(**rpn_config)
|
||||
|
||||
self.roi_heads = Res5ROIHeads(cfg, self._SHAPE_)
|
||||
self.normalizer = self.normalize_fn()
|
||||
self.affine_rpn = AffineLayer(
|
||||
num_channels=self._SHAPE_['res4'].channels, bias=True)
|
||||
self.affine_rcnn = AffineLayer(
|
||||
num_channels=self._SHAPE_['res4'].channels, bias=True)
|
||||
self.to(self.device)
|
||||
|
||||
if cfg.MODEL.BACKBONE.FREEZE:
|
||||
for p in self.backbone.parameters():
|
||||
p.requires_grad = False
|
||||
|
||||
if cfg.MODEL.RPN.FREEZE:
|
||||
for p in self.proposal_generator.parameters():
|
||||
p.requires_grad = False
|
||||
|
||||
if cfg.MODEL.ROI_HEADS.FREEZE_FEAT:
|
||||
for p in self.roi_heads.res5.parameters():
|
||||
p.requires_grad = False
|
||||
|
||||
def forward(self, batched_inputs):
|
||||
if not self.training:
|
||||
return self.inference(batched_inputs)
|
||||
assert 'instances' in batched_inputs[0]
|
||||
gt_instances = [x['instances'].to(self.device) for x in batched_inputs]
|
||||
proposal_losses, detector_losses, _, _ = self._forward_once_(
|
||||
batched_inputs, gt_instances)
|
||||
losses = {}
|
||||
losses.update(detector_losses)
|
||||
losses.update(proposal_losses)
|
||||
return losses
|
||||
|
||||
def inference(self, batched_inputs):
|
||||
assert not self.training
|
||||
_, _, results, image_sizes = self._forward_once_(batched_inputs, None)
|
||||
processed_results = []
|
||||
for r, input, image_size in zip(results, batched_inputs, image_sizes):
|
||||
height = input.get('height', image_size[0])
|
||||
width = input.get('width', image_size[1])
|
||||
r = detector_postprocess(r, height, width)
|
||||
processed_results.append({'instances': r})
|
||||
return processed_results
|
||||
|
||||
def _forward_once_(self, batched_inputs, gt_instances=None):
|
||||
|
||||
images = self.preprocess_image(batched_inputs)
|
||||
features = self.backbone(images.tensor)
|
||||
|
||||
features_de_rpn = features
|
||||
if self.cfg.MODEL.RPN.ENABLE_DECOUPLE:
|
||||
scale = self.cfg.MODEL.RPN.BACKWARD_SCALE
|
||||
features_de_rpn = {
|
||||
k: self.affine_rpn(decouple_layer(features[k], scale))
|
||||
for k in features
|
||||
}
|
||||
proposals, proposal_losses = self.proposal_generator(
|
||||
images, features_de_rpn, gt_instances)
|
||||
|
||||
features_de_rcnn = features
|
||||
if self.cfg.MODEL.ROI_HEADS.ENABLE_DECOUPLE:
|
||||
scale = self.cfg.MODEL.ROI_HEADS.BACKWARD_SCALE
|
||||
features_de_rcnn = {
|
||||
k: self.affine_rcnn(decouple_layer(features[k], scale))
|
||||
for k in features
|
||||
}
|
||||
results, detector_losses = self.roi_heads(images, features_de_rcnn,
|
||||
proposals, gt_instances)
|
||||
|
||||
return proposal_losses, detector_losses, results, images.image_sizes
|
||||
|
||||
def preprocess_image(self, batched_inputs):
|
||||
images = [x['image'].to(self.device) for x in batched_inputs]
|
||||
images = [self.normalizer(x) for x in images]
|
||||
images = ImageList.from_tensors(images,
|
||||
self.backbone.size_divisibility)
|
||||
return images
|
||||
|
||||
def normalize_fn(self):
|
||||
assert len(self.cfg.MODEL.PIXEL_MEAN) == len(self.cfg.MODEL.PIXEL_STD)
|
||||
num_channels = len(self.cfg.MODEL.PIXEL_MEAN)
|
||||
pixel_mean = (
|
||||
torch.Tensor(self.cfg.MODEL.PIXEL_MEAN).to(self.device).view(
|
||||
num_channels, 1, 1))
|
||||
pixel_std = (
|
||||
torch.Tensor(self.cfg.MODEL.PIXEL_STD).to(self.device).view(
|
||||
num_channels, 1, 1))
|
||||
return lambda x: (x - pixel_mean) / pixel_std
|
||||
|
||||
@classmethod
|
||||
def from_rpn_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
|
||||
in_features = cfg.MODEL.RPN.IN_FEATURES
|
||||
ret = {
|
||||
'in_features':
|
||||
in_features,
|
||||
'min_box_size':
|
||||
cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE,
|
||||
'nms_thresh':
|
||||
cfg.MODEL.RPN.NMS_THRESH,
|
||||
'batch_size_per_image':
|
||||
cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE,
|
||||
'positive_fraction':
|
||||
cfg.MODEL.RPN.POSITIVE_FRACTION,
|
||||
'loss_weight': {
|
||||
'loss_rpn_cls':
|
||||
cfg.MODEL.RPN.LOSS_WEIGHT,
|
||||
'loss_rpn_loc':
|
||||
cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT,
|
||||
},
|
||||
'anchor_boundary_thresh':
|
||||
cfg.MODEL.RPN.BOUNDARY_THRESH,
|
||||
'box2box_transform':
|
||||
Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS),
|
||||
'box_reg_loss_type':
|
||||
cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE,
|
||||
'smooth_l1_beta':
|
||||
cfg.MODEL.RPN.SMOOTH_L1_BETA,
|
||||
}
|
||||
|
||||
ret['pre_nms_topk'] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
|
||||
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST)
|
||||
ret['post_nms_topk'] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
|
||||
cfg.MODEL.RPN.POST_NMS_TOPK_TEST)
|
||||
|
||||
# ret["anchor_generator"] = build_anchor_generator(cfg, [input_shape[f] for f in in_features])
|
||||
anchor_cfg = DefaultAnchorGenerator.from_config(
|
||||
cfg, [input_shape[f] for f in in_features])
|
||||
ret['anchor_generator'] = DefaultAnchorGenerator(**anchor_cfg)
|
||||
ret['anchor_matcher'] = Matcher(
|
||||
cfg.MODEL.RPN.IOU_THRESHOLDS,
|
||||
cfg.MODEL.RPN.IOU_LABELS,
|
||||
allow_low_quality_matches=True)
|
||||
rpn_head_cfg = {
|
||||
'in_channels':
|
||||
[s.channels for s in [input_shape[f] for f in in_features]][0],
|
||||
'num_anchors':
|
||||
ret['anchor_generator'].num_anchors[0],
|
||||
'box_dim':
|
||||
ret['anchor_generator'].box_dim
|
||||
}
|
||||
|
||||
ret['head'] = StandardRPNHead(**rpn_head_cfg)
|
||||
return ret
|
||||
274
modelscope/models/cv/image_defrcn_fewshot/models/fast_rcnn.py
Normal file
274
modelscope/models/cv/image_defrcn_fewshot/models/fast_rcnn.py
Normal file
@@ -0,0 +1,274 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/modeling/meta_arch/rcnn.py
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from detectron2.layers import batched_nms, cat
|
||||
from detectron2.modeling.roi_heads.fast_rcnn import \
|
||||
fast_rcnn_inference_single_image
|
||||
from detectron2.utils.events import get_event_storage
|
||||
from fvcore.nn import smooth_l1_loss
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
def fast_rcnn_inference(boxes, scores, image_shapes, score_thresh, nms_thresh,
|
||||
topk_per_image):
|
||||
|
||||
result_per_image = [
|
||||
fast_rcnn_inference_single_image(
|
||||
boxes_per_image,
|
||||
scores_per_image,
|
||||
image_shape,
|
||||
score_thresh,
|
||||
nms_thresh,
|
||||
topk_per_image,
|
||||
) for scores_per_image, boxes_per_image, image_shape in zip(
|
||||
scores, boxes, image_shapes)
|
||||
]
|
||||
return tuple(list(x) for x in zip(*result_per_image))
|
||||
|
||||
|
||||
class FastRCNNOutputs(object):
|
||||
"""
|
||||
A class that stores information about outputs of a Fast R-CNN head.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
box2box_transform,
|
||||
pred_class_logits,
|
||||
pred_proposal_deltas,
|
||||
proposals,
|
||||
smooth_l1_beta,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
|
||||
box2box transform instance for proposal-to-detection transformations.
|
||||
pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
|
||||
logits for all R predicted object instances.
|
||||
Each row corresponds to a predicted object instance.
|
||||
pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
|
||||
class-specific or class-agnostic regression. It stores the predicted deltas that
|
||||
transform proposals into final box detections.
|
||||
B is the box dimension (4 or 5).
|
||||
When B is 4, each row is [dx, dy, dw, dh (, ....)].
|
||||
When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
|
||||
proposals (list[Instances]): A list of N Instances, where Instances i stores the
|
||||
proposals for image i, in the field "proposal_boxes".
|
||||
When training, each Instances must have ground-truth labels
|
||||
stored in the field "gt_classes" and "gt_boxes".
|
||||
smooth_l1_beta (float): The transition point between L1 and L2 loss in
|
||||
the smooth L1 loss function. When set to 0, the loss becomes L1. When
|
||||
set to +inf, the loss becomes constant 0.
|
||||
"""
|
||||
self.box2box_transform = box2box_transform
|
||||
self.num_preds_per_image = [len(p) for p in proposals]
|
||||
self.pred_class_logits = pred_class_logits
|
||||
self.pred_proposal_deltas = pred_proposal_deltas
|
||||
self.smooth_l1_beta = smooth_l1_beta
|
||||
|
||||
box_type = type(proposals[0].proposal_boxes)
|
||||
# cat(..., dim=0) concatenates over all images in the batch
|
||||
self.proposals = box_type.cat([p.proposal_boxes for p in proposals])
|
||||
assert (not self.proposals.tensor.requires_grad
|
||||
), 'Proposals should not require gradients!'
|
||||
self.image_shapes = [x.image_size for x in proposals]
|
||||
|
||||
# The following fields should exist only when training.
|
||||
if proposals[0].has('gt_boxes'):
|
||||
self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
|
||||
assert proposals[0].has('gt_classes')
|
||||
self.gt_classes = cat([p.gt_classes for p in proposals], dim=0)
|
||||
|
||||
def _log_accuracy(self):
|
||||
"""
|
||||
Log the accuracy metrics to EventStorage.
|
||||
"""
|
||||
num_instances = self.gt_classes.numel()
|
||||
pred_classes = self.pred_class_logits.argmax(dim=1)
|
||||
bg_class_ind = self.pred_class_logits.shape[1] - 1
|
||||
|
||||
fg_inds = (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)
|
||||
num_fg = fg_inds.nonzero().numel()
|
||||
fg_gt_classes = self.gt_classes[fg_inds]
|
||||
fg_pred_classes = pred_classes[fg_inds]
|
||||
|
||||
num_false_negative = ((
|
||||
fg_pred_classes == bg_class_ind).nonzero().numel())
|
||||
num_accurate = (pred_classes == self.gt_classes).nonzero().numel()
|
||||
fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel()
|
||||
|
||||
storage = get_event_storage()
|
||||
storage.put_scalar('fast_rcnn/cls_accuracy',
|
||||
num_accurate / num_instances)
|
||||
if num_fg > 0:
|
||||
storage.put_scalar('fast_rcnn/fg_cls_accuracy',
|
||||
fg_num_accurate / num_fg)
|
||||
storage.put_scalar('fast_rcnn/false_negative',
|
||||
num_false_negative / num_fg)
|
||||
|
||||
def softmax_cross_entropy_loss(self):
|
||||
"""
|
||||
Compute the softmax cross entropy loss for box classification.
|
||||
|
||||
Returns:
|
||||
scalar Tensor
|
||||
"""
|
||||
self._log_accuracy()
|
||||
return F.cross_entropy(
|
||||
self.pred_class_logits, self.gt_classes, reduction='mean')
|
||||
|
||||
def smooth_l1_loss(self):
|
||||
"""
|
||||
Compute the smooth L1 loss for box regression.
|
||||
|
||||
Returns:
|
||||
scalar Tensor
|
||||
"""
|
||||
gt_proposal_deltas = self.box2box_transform.get_deltas(
|
||||
self.proposals.tensor, self.gt_boxes.tensor)
|
||||
box_dim = gt_proposal_deltas.size(1) # 4 or 5
|
||||
cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
|
||||
device = self.pred_proposal_deltas.device
|
||||
|
||||
bg_class_ind = self.pred_class_logits.shape[1] - 1
|
||||
|
||||
fg_inds = torch.nonzero((self.gt_classes >= 0)
|
||||
& (self.gt_classes < bg_class_ind)).squeeze(1)
|
||||
if cls_agnostic_bbox_reg:
|
||||
# pred_proposal_deltas only corresponds to foreground class for agnostic
|
||||
gt_class_cols = torch.arange(box_dim, device=device)
|
||||
else:
|
||||
fg_gt_classes = self.gt_classes[fg_inds]
|
||||
gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(
|
||||
box_dim, device=device)
|
||||
|
||||
loss_box_reg = smooth_l1_loss(
|
||||
self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
|
||||
gt_proposal_deltas[fg_inds],
|
||||
self.smooth_l1_beta,
|
||||
reduction='sum',
|
||||
)
|
||||
|
||||
loss_box_reg = loss_box_reg / self.gt_classes.numel()
|
||||
return loss_box_reg
|
||||
|
||||
def losses(self):
|
||||
"""
|
||||
Compute the default losses for box head in Fast(er) R-CNN,
|
||||
with softmax cross entropy loss and smooth L1 loss.
|
||||
|
||||
Returns:
|
||||
A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg".
|
||||
"""
|
||||
return {
|
||||
'loss_cls': self.softmax_cross_entropy_loss(),
|
||||
'loss_box_reg': self.smooth_l1_loss(),
|
||||
}
|
||||
|
||||
def predict_boxes(self):
|
||||
"""
|
||||
Returns:
|
||||
list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes
|
||||
for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
|
||||
the number of predicted objects for image i and B is the box dimension (4 or 5)
|
||||
"""
|
||||
num_pred = len(self.proposals)
|
||||
B = self.proposals.tensor.shape[1]
|
||||
K = self.pred_proposal_deltas.shape[1] // B
|
||||
boxes = self.box2box_transform.apply_deltas(
|
||||
self.pred_proposal_deltas.view(num_pred * K, B),
|
||||
self.proposals.tensor.unsqueeze(1).expand(num_pred, K,
|
||||
B).reshape(-1, B),
|
||||
)
|
||||
return boxes.view(num_pred, K * B).split(
|
||||
self.num_preds_per_image, dim=0)
|
||||
|
||||
def predict_probs(self):
|
||||
"""
|
||||
Returns:
|
||||
list[Tensor]: A list of Tensors of predicted class probabilities for each image.
|
||||
Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
|
||||
for image i.
|
||||
"""
|
||||
probs = F.softmax(self.pred_class_logits, dim=-1)
|
||||
return probs.split(self.num_preds_per_image, dim=0)
|
||||
|
||||
def inference(self, score_thresh, nms_thresh, topk_per_image):
|
||||
"""
|
||||
Args:
|
||||
score_thresh (float): same as fast_rcnn_inference.
|
||||
nms_thresh (float): same as fast_rcnn_inference.
|
||||
topk_per_image (int): same as fast_rcnn_inference.
|
||||
Returns:
|
||||
list[Instances]: same as fast_rcnn_inference.
|
||||
list[Tensor]: same as fast_rcnn_inference.
|
||||
"""
|
||||
boxes = self.predict_boxes()
|
||||
scores = self.predict_probs()
|
||||
image_shapes = self.image_shapes
|
||||
|
||||
return fast_rcnn_inference(
|
||||
boxes,
|
||||
scores,
|
||||
image_shapes,
|
||||
score_thresh,
|
||||
nms_thresh,
|
||||
topk_per_image,
|
||||
)
|
||||
|
||||
|
||||
class FastRCNNOutputLayers(nn.Module):
|
||||
"""
|
||||
Two linear layers for predicting Fast R-CNN outputs:
|
||||
(1) proposal-to-detection box regression deltas
|
||||
(2) classification scores
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
cfg,
|
||||
input_size,
|
||||
num_classes,
|
||||
cls_agnostic_bbox_reg,
|
||||
box_dim=4):
|
||||
"""
|
||||
Args:
|
||||
cfg: config
|
||||
input_size (int): channels, or (channels, height, width)
|
||||
num_classes (int): number of foreground classes
|
||||
cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
|
||||
box_dim (int): the dimension of bounding boxes.
|
||||
Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes
|
||||
"""
|
||||
super(FastRCNNOutputLayers, self).__init__()
|
||||
|
||||
if not isinstance(input_size, int):
|
||||
input_size = np.prod(input_size)
|
||||
|
||||
# The prediction layer for num_classes foreground classes and one
|
||||
# background class
|
||||
self.cls_score = nn.Linear(input_size, num_classes + 1)
|
||||
num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
|
||||
self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim)
|
||||
|
||||
nn.init.normal_(self.cls_score.weight, std=0.01)
|
||||
nn.init.normal_(self.bbox_pred.weight, std=0.001)
|
||||
for b in [self.cls_score, self.bbox_pred]:
|
||||
nn.init.constant_(b.bias, 0)
|
||||
|
||||
self._do_cls_dropout = cfg.MODEL.ROI_HEADS.CLS_DROPOUT
|
||||
self._dropout_ratio = cfg.MODEL.ROI_HEADS.DROPOUT_RATIO
|
||||
|
||||
def forward(self, x):
|
||||
if x.dim() > 2:
|
||||
x = torch.flatten(x, start_dim=1)
|
||||
proposal_deltas = self.bbox_pred(x)
|
||||
|
||||
if self._do_cls_dropout:
|
||||
x = F.dropout(x, self._dropout_ratio, training=self.training)
|
||||
scores = self.cls_score(x)
|
||||
|
||||
return scores, proposal_deltas
|
||||
43
modelscope/models/cv/image_defrcn_fewshot/models/gdl.py
Normal file
43
modelscope/models/cv/image_defrcn_fewshot/models/gdl.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/modeling/meta_arch/gdl.py
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Function
|
||||
|
||||
|
||||
class GradientDecoupleLayer(Function):
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, x, _lambda):
|
||||
ctx._lambda = _lambda
|
||||
return x
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
grad_output = grad_output * ctx._lambda
|
||||
return grad_output, None
|
||||
|
||||
|
||||
class AffineLayer(nn.Module):
|
||||
|
||||
def __init__(self, num_channels, bias=False):
|
||||
super(AffineLayer, self).__init__()
|
||||
weight = torch.FloatTensor(1, num_channels, 1, 1).fill_(1)
|
||||
self.weight = nn.Parameter(weight, requires_grad=True)
|
||||
|
||||
self.bias = None
|
||||
if bias:
|
||||
bias = torch.FloatTensor(1, num_channels, 1, 1).fill_(0)
|
||||
self.bias = nn.Parameter(bias, requires_grad=True)
|
||||
|
||||
def forward(self, X):
|
||||
out = X * self.weight.expand_as(X)
|
||||
if self.bias is not None:
|
||||
out = out + self.bias.expand_as(X)
|
||||
return out
|
||||
|
||||
|
||||
def decouple_layer(x, _lambda):
|
||||
return GradientDecoupleLayer.apply(x, _lambda)
|
||||
302
modelscope/models/cv/image_defrcn_fewshot/models/roi_heads.py
Normal file
302
modelscope/models/cv/image_defrcn_fewshot/models/roi_heads.py
Normal file
@@ -0,0 +1,302 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/modeling/roi_heads/roi_heads.py
|
||||
|
||||
from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from detectron2.layers import ShapeSpec
|
||||
from detectron2.modeling.backbone.resnet import BottleneckBlock, make_stage
|
||||
from detectron2.modeling.box_regression import Box2BoxTransform
|
||||
from detectron2.modeling.matcher import Matcher
|
||||
from detectron2.modeling.poolers import ROIPooler
|
||||
from detectron2.modeling.proposal_generator.proposal_utils import \
|
||||
add_ground_truth_to_proposals
|
||||
from detectron2.modeling.roi_heads import select_foreground_proposals
|
||||
from detectron2.modeling.sampling import subsample_labels
|
||||
from detectron2.structures import Boxes, Instances, pairwise_iou
|
||||
from detectron2.utils.events import get_event_storage
|
||||
from torch import nn
|
||||
|
||||
from .fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs
|
||||
|
||||
|
||||
class ROIHeads(torch.nn.Module):
|
||||
"""
|
||||
ROIHeads perform all per-region computation in an R-CNN.
|
||||
|
||||
It contains logic of cropping the regions, extract per-region features,
|
||||
and make per-region predictions.
|
||||
|
||||
It can have many variants, implemented as subclasses of this class.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
|
||||
super(ROIHeads, self).__init__()
|
||||
|
||||
# fmt: off
|
||||
self.batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
|
||||
self.positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
|
||||
self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST
|
||||
self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST
|
||||
self.test_detections_per_img = cfg.TEST.DETECTIONS_PER_IMAGE
|
||||
self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
|
||||
self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
|
||||
self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT
|
||||
self.feature_strides = {k: v.stride for k, v in input_shape.items()}
|
||||
self.feature_channels = {k: v.channels for k, v in input_shape.items()}
|
||||
self.cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG
|
||||
self.smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA
|
||||
# fmt: on
|
||||
|
||||
# Matcher to assign box proposals to gt boxes
|
||||
self.proposal_matcher = Matcher(
|
||||
cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
|
||||
cfg.MODEL.ROI_HEADS.IOU_LABELS,
|
||||
allow_low_quality_matches=False,
|
||||
)
|
||||
|
||||
# Box2BoxTransform for bounding box regression
|
||||
self.box2box_transform = Box2BoxTransform(
|
||||
weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
|
||||
|
||||
def _sample_proposals(self, matched_idxs, matched_labels, gt_classes):
|
||||
"""
|
||||
Based on the matching between N proposals and M groundtruth,
|
||||
sample the proposals and set their classification labels.
|
||||
|
||||
Args:
|
||||
matched_idxs (Tensor): a vector of length N, each is the best-matched
|
||||
gt index in [0, M) for each proposal.
|
||||
matched_labels (Tensor): a vector of length N, the matcher's label
|
||||
(one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal.
|
||||
gt_classes (Tensor): a vector of length M.
|
||||
|
||||
Returns:
|
||||
Tensor: a vector of indices of sampled proposals. Each is in [0, N).
|
||||
Tensor: a vector of the same length, the classification label for
|
||||
each sampled proposal. Each sample is labeled as either a category in
|
||||
[0, num_classes) or the background (num_classes).
|
||||
"""
|
||||
has_gt = gt_classes.numel() > 0
|
||||
# Get the corresponding GT for each proposal
|
||||
if has_gt:
|
||||
gt_classes = gt_classes[matched_idxs]
|
||||
# Label unmatched proposals (0 label from matcher) as background (label=num_classes)
|
||||
gt_classes[matched_labels == 0] = self.num_classes
|
||||
# Label ignore proposals (-1 label)
|
||||
gt_classes[matched_labels == -1] = -1
|
||||
else:
|
||||
gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
|
||||
|
||||
sampled_fg_idxs, sampled_bg_idxs = subsample_labels(
|
||||
gt_classes,
|
||||
self.batch_size_per_image,
|
||||
self.positive_sample_fraction,
|
||||
self.num_classes,
|
||||
)
|
||||
|
||||
sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0)
|
||||
return sampled_idxs, gt_classes[sampled_idxs]
|
||||
|
||||
@torch.no_grad()
|
||||
def label_and_sample_proposals(self, proposals, targets):
|
||||
"""
|
||||
Prepare some proposals to be used to train the ROI heads.
|
||||
It performs box matching between `proposals` and `targets`, and assigns
|
||||
training labels to the proposals.
|
||||
It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
|
||||
with a fraction of positives that is no larger than `self.positive_sample_fraction.
|
||||
|
||||
Args:
|
||||
See :meth:`ROIHeads.forward`
|
||||
|
||||
Returns:
|
||||
list[Instances]:
|
||||
length `N` list of `Instances`s containing the proposals
|
||||
sampled for training. Each `Instances` has the following fields:
|
||||
- proposal_boxes: the proposal boxes
|
||||
- gt_boxes: the ground-truth box that the proposal is assigned to
|
||||
(this is only meaningful if the proposal has a label > 0; if label = 0
|
||||
then the ground-truth box is random)
|
||||
Other fields such as "gt_classes" that's included in `targets`.
|
||||
"""
|
||||
gt_boxes = [x.gt_boxes for x in targets]
|
||||
|
||||
if self.proposal_append_gt:
|
||||
proposals = add_ground_truth_to_proposals(gt_boxes, proposals)
|
||||
|
||||
proposals_with_gt = []
|
||||
|
||||
num_fg_samples = []
|
||||
num_bg_samples = []
|
||||
for proposals_per_image, targets_per_image in zip(proposals, targets):
|
||||
has_gt = len(targets_per_image) > 0
|
||||
match_quality_matrix = pairwise_iou(
|
||||
targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
|
||||
matched_idxs, matched_labels = self.proposal_matcher(
|
||||
match_quality_matrix)
|
||||
sampled_idxs, gt_classes = self._sample_proposals(
|
||||
matched_idxs, matched_labels, targets_per_image.gt_classes)
|
||||
|
||||
# Set target attributes of the sampled proposals:
|
||||
proposals_per_image = proposals_per_image[sampled_idxs]
|
||||
proposals_per_image.gt_classes = gt_classes
|
||||
|
||||
# We index all the attributes of targets that start with "gt_"
|
||||
# and have not been added to proposals yet (="gt_classes").
|
||||
if has_gt:
|
||||
sampled_targets = matched_idxs[sampled_idxs]
|
||||
|
||||
for (
|
||||
trg_name,
|
||||
trg_value,
|
||||
) in targets_per_image.get_fields().items():
|
||||
if trg_name.startswith(
|
||||
'gt_') and not proposals_per_image.has(trg_name):
|
||||
proposals_per_image.set(trg_name,
|
||||
trg_value[sampled_targets])
|
||||
else:
|
||||
gt_boxes = Boxes(
|
||||
targets_per_image.gt_boxes.tensor.new_zeros(
|
||||
(len(sampled_idxs), 4)))
|
||||
proposals_per_image.gt_boxes = gt_boxes
|
||||
|
||||
num_bg_samples.append(
|
||||
(gt_classes == self.num_classes).sum().item())
|
||||
num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
|
||||
proposals_with_gt.append(proposals_per_image)
|
||||
|
||||
# Log the number of fg/bg samples that are selected for training ROI heads
|
||||
storage = get_event_storage()
|
||||
storage.put_scalar('roi_head/num_fg_samples', np.mean(num_fg_samples))
|
||||
storage.put_scalar('roi_head/num_bg_samples', np.mean(num_bg_samples))
|
||||
|
||||
return proposals_with_gt
|
||||
|
||||
def forward(self, images, features, proposals, targets=None):
|
||||
"""
|
||||
Args:
|
||||
images (ImageList):
|
||||
features (dict[str: Tensor]): input data as a mapping from feature
|
||||
map name to tensor. Axis 0 represents the number of images `N` in
|
||||
the input data; axes 1-3 are channels, height, and width, which may
|
||||
vary between feature maps (e.g., if a feature pyramid is used).
|
||||
proposals (list[Instances]): length `N` list of `Instances`s. The i-th
|
||||
`Instances` contains object proposals for the i-th input image,
|
||||
with fields "proposal_boxes" and "objectness_logits".
|
||||
targets (list[Instances], optional): length `N` list of `Instances`s. The i-th
|
||||
`Instances` contains the ground-truth per-instance annotations
|
||||
for the i-th input image. Specify `targets` during training only.
|
||||
It may have the following fields:
|
||||
- gt_boxes: the bounding box of each instance.
|
||||
- gt_classes: the label for each instance with a category ranging in [0, #class].
|
||||
|
||||
Returns:
|
||||
results (list[Instances]): length `N` list of `Instances`s containing the
|
||||
detected instances. Returned during inference only; may be []
|
||||
during training.
|
||||
losses (dict[str: Tensor]): mapping from a named loss to a tensor
|
||||
storing the loss. Used during training only.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Res5ROIHeads(ROIHeads):
|
||||
"""
|
||||
The ROIHeads in a typical "C4" R-CNN model, where the heads share the
|
||||
cropping and the per-region feature computation by a Res5 block.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, input_shape):
|
||||
super().__init__(cfg, input_shape)
|
||||
|
||||
assert len(self.in_features) == 1
|
||||
|
||||
# fmt: off
|
||||
pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
|
||||
pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
|
||||
pooler_scales = (1.0 / self.feature_strides[self.in_features[0]], )
|
||||
sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
|
||||
|
||||
self.pooler = ROIPooler(
|
||||
output_size=pooler_resolution,
|
||||
scales=pooler_scales,
|
||||
sampling_ratio=sampling_ratio,
|
||||
pooler_type=pooler_type,
|
||||
)
|
||||
|
||||
self.res5, out_channels = self._build_res5_block(cfg)
|
||||
self.box_predictor = FastRCNNOutputLayers(cfg, out_channels,
|
||||
self.num_classes,
|
||||
self.cls_agnostic_bbox_reg)
|
||||
|
||||
def _build_res5_block(self, cfg):
|
||||
# fmt: off
|
||||
stage_channel_factor = 2**3 # res5 is 8x res2
|
||||
num_groups = cfg.MODEL.RESNETS.NUM_GROUPS
|
||||
width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
|
||||
bottleneck_channels = num_groups * width_per_group * stage_channel_factor
|
||||
out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor
|
||||
stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1
|
||||
norm = cfg.MODEL.RESNETS.NORM
|
||||
assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \
|
||||
'Deformable conv is not yet supported in res5 head.'
|
||||
# fmt: on
|
||||
|
||||
blocks = make_stage(
|
||||
BottleneckBlock,
|
||||
3,
|
||||
first_stride=2,
|
||||
in_channels=out_channels // 2,
|
||||
bottleneck_channels=bottleneck_channels,
|
||||
out_channels=out_channels,
|
||||
num_groups=num_groups,
|
||||
norm=norm,
|
||||
stride_in_1x1=stride_in_1x1,
|
||||
)
|
||||
return nn.Sequential(*blocks), out_channels
|
||||
|
||||
def _shared_roi_transform(self, features, boxes):
|
||||
x = self.pooler(features, boxes)
|
||||
x = self.res5(x)
|
||||
return x
|
||||
|
||||
def forward(self, images, features, proposals, targets=None):
|
||||
"""
|
||||
See :class:`ROIHeads.forward`.
|
||||
"""
|
||||
del images
|
||||
|
||||
if self.training:
|
||||
proposals = self.label_and_sample_proposals(proposals, targets)
|
||||
del targets
|
||||
|
||||
proposal_boxes = [x.proposal_boxes for x in proposals]
|
||||
box_features = self._shared_roi_transform(
|
||||
[features[f] for f in self.in_features], proposal_boxes)
|
||||
feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1
|
||||
pred_class_logits, pred_proposal_deltas = self.box_predictor(
|
||||
feature_pooled)
|
||||
del feature_pooled
|
||||
|
||||
outputs = FastRCNNOutputs(
|
||||
self.box2box_transform,
|
||||
pred_class_logits,
|
||||
pred_proposal_deltas,
|
||||
proposals,
|
||||
self.smooth_l1_beta,
|
||||
)
|
||||
|
||||
if self.training:
|
||||
del features
|
||||
losses = outputs.losses()
|
||||
return [], losses
|
||||
else:
|
||||
pred_instances, _ = outputs.inference(
|
||||
self.test_score_thresh,
|
||||
self.test_nms_thresh,
|
||||
self.test_detections_per_img,
|
||||
)
|
||||
return pred_instances, {}
|
||||
@@ -0,0 +1,81 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
from packaging import version
|
||||
|
||||
from modelscope.utils.import_utils import _torch_available
|
||||
|
||||
if sys.version_info < (3, 8):
|
||||
import importlib_metadata
|
||||
else:
|
||||
import importlib.metadata as importlib_metadata
|
||||
|
||||
DETECTRON2_REQUIRED_VERSION = version.parse('0.3')
|
||||
|
||||
|
||||
def is_detectron2_version_available():
|
||||
_detectron2_available = importlib.util.find_spec('detectron2') is not None
|
||||
_detectron2_version_available = False
|
||||
if _detectron2_available:
|
||||
_detectron2_version = version.parse(
|
||||
importlib_metadata.version('detectron2'))
|
||||
_detectron2_version_available = (_detectron2_version.major,
|
||||
_detectron2_version.minor) == (
|
||||
DETECTRON2_REQUIRED_VERSION.major,
|
||||
DETECTRON2_REQUIRED_VERSION.minor)
|
||||
|
||||
return _detectron2_version_available
|
||||
|
||||
|
||||
TORCH_REQUIRED_VERSION = version.parse('1.11')
|
||||
|
||||
|
||||
def is_torch_version_available():
|
||||
_torch_version_available = False
|
||||
if _torch_available:
|
||||
torch_version = version.parse(importlib_metadata.version('torch'))
|
||||
_torch_version_available = (torch_version.major,
|
||||
torch_version.minor) == (
|
||||
TORCH_REQUIRED_VERSION.major,
|
||||
TORCH_REQUIRED_VERSION.minor)
|
||||
|
||||
return _torch_version_available
|
||||
|
||||
|
||||
DETECTRON2_IMPORT_ERROR = """
|
||||
{0} requires the detectron2-0.3 but it was not found in your environment.
|
||||
You can install it from modelscope lib with pip:
|
||||
`pip install detectron2==0.3`
|
||||
"""
|
||||
|
||||
TORCH_VERSION_IMPORT_ERROR = """
|
||||
{0} requires the torch-1.11 but it was not found in your environment. You can install it with pip:
|
||||
`pip install torch==1.11`
|
||||
"""
|
||||
|
||||
REQUIREMENTS_MAAPING_VERSION = OrderedDict([
|
||||
('detectron2-0.3', (is_detectron2_version_available,
|
||||
DETECTRON2_IMPORT_ERROR)),
|
||||
('torch-1.11', (is_torch_version_available, TORCH_VERSION_IMPORT_ERROR)),
|
||||
])
|
||||
|
||||
REQUIREMENTS = ['detectron2-0.3', 'torch-1.11']
|
||||
|
||||
|
||||
def requires_version():
|
||||
checks = []
|
||||
for req in REQUIREMENTS:
|
||||
if req in REQUIREMENTS_MAAPING_VERSION:
|
||||
check = REQUIREMENTS_MAAPING_VERSION[req]
|
||||
else:
|
||||
raise NotImplementedError('{} do not supported check'.format(req))
|
||||
checks.append(check)
|
||||
|
||||
failed = [
|
||||
msg.format('DeFRCN') for available, msg in checks if not available()
|
||||
]
|
||||
if failed:
|
||||
raise ImportError(''.join(failed))
|
||||
342
modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
Normal file
342
modelscope/models/cv/image_defrcn_fewshot/utils/voc_register.py
Normal file
@@ -0,0 +1,342 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/data/meta_voc.py
|
||||
|
||||
import os
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import numpy as np
|
||||
from detectron2.data import DatasetCatalog, MetadataCatalog
|
||||
from detectron2.structures import BoxMode
|
||||
from fvcore.common.file_io import PathManager
|
||||
|
||||
# PASCAL VOC categories
|
||||
PASCAL_VOC_ALL_CATEGORIES = {
|
||||
1: [
|
||||
'aeroplane',
|
||||
'bicycle',
|
||||
'boat',
|
||||
'bottle',
|
||||
'car',
|
||||
'cat',
|
||||
'chair',
|
||||
'diningtable',
|
||||
'dog',
|
||||
'horse',
|
||||
'person',
|
||||
'pottedplant',
|
||||
'sheep',
|
||||
'train',
|
||||
'tvmonitor',
|
||||
'bird',
|
||||
'bus',
|
||||
'cow',
|
||||
'motorbike',
|
||||
'sofa',
|
||||
],
|
||||
2: [
|
||||
'bicycle',
|
||||
'bird',
|
||||
'boat',
|
||||
'bus',
|
||||
'car',
|
||||
'cat',
|
||||
'chair',
|
||||
'diningtable',
|
||||
'dog',
|
||||
'motorbike',
|
||||
'person',
|
||||
'pottedplant',
|
||||
'sheep',
|
||||
'train',
|
||||
'tvmonitor',
|
||||
'aeroplane',
|
||||
'bottle',
|
||||
'cow',
|
||||
'horse',
|
||||
'sofa',
|
||||
],
|
||||
3: [
|
||||
'aeroplane',
|
||||
'bicycle',
|
||||
'bird',
|
||||
'bottle',
|
||||
'bus',
|
||||
'car',
|
||||
'chair',
|
||||
'cow',
|
||||
'diningtable',
|
||||
'dog',
|
||||
'horse',
|
||||
'person',
|
||||
'pottedplant',
|
||||
'train',
|
||||
'tvmonitor',
|
||||
'boat',
|
||||
'cat',
|
||||
'motorbike',
|
||||
'sheep',
|
||||
'sofa',
|
||||
]
|
||||
}
|
||||
|
||||
PASCAL_VOC_NOVEL_CATEGORIES = {
|
||||
1: ['bird', 'bus', 'cow', 'motorbike', 'sofa'],
|
||||
2: ['aeroplane', 'bottle', 'cow', 'horse', 'sofa'],
|
||||
3: ['boat', 'cat', 'motorbike', 'sheep', 'sofa']
|
||||
}
|
||||
|
||||
PASCAL_VOC_BASE_CATEGORIES = {
|
||||
1: [
|
||||
'aeroplane',
|
||||
'bicycle',
|
||||
'boat',
|
||||
'bottle',
|
||||
'car',
|
||||
'cat',
|
||||
'chair',
|
||||
'diningtable',
|
||||
'dog',
|
||||
'horse',
|
||||
'person',
|
||||
'pottedplant',
|
||||
'sheep',
|
||||
'train',
|
||||
'tvmonitor',
|
||||
],
|
||||
2: [
|
||||
'bicycle',
|
||||
'bird',
|
||||
'boat',
|
||||
'bus',
|
||||
'car',
|
||||
'cat',
|
||||
'chair',
|
||||
'diningtable',
|
||||
'dog',
|
||||
'motorbike',
|
||||
'person',
|
||||
'pottedplant',
|
||||
'sheep',
|
||||
'train',
|
||||
'tvmonitor',
|
||||
],
|
||||
3: [
|
||||
'aeroplane',
|
||||
'bicycle',
|
||||
'bird',
|
||||
'bottle',
|
||||
'bus',
|
||||
'car',
|
||||
'chair',
|
||||
'cow',
|
||||
'diningtable',
|
||||
'dog',
|
||||
'horse',
|
||||
'person',
|
||||
'pottedplant',
|
||||
'train',
|
||||
'tvmonitor',
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def load_filtered_voc_instances(name: str, root: str, dirname: str, split: str,
|
||||
classnames: str):
|
||||
"""
|
||||
Load Pascal VOC detection annotations to Detectron2 format.
|
||||
Args:
|
||||
dirname: Contain "Annotations", "ImageSets", "JPEGImages"
|
||||
split (str): one of "train", "test", "val", "trainval"
|
||||
"""
|
||||
is_shots = 'shot' in name
|
||||
dicts = []
|
||||
if is_shots:
|
||||
fileids = {}
|
||||
# split_dir = os.path.join("datasets", "vocsplit")
|
||||
split_dir = os.path.join(root, 'vocsplit')
|
||||
shot = name.split('_')[-2].split('shot')[0]
|
||||
seed = int(name.split('_seed')[-1])
|
||||
split_dir = os.path.join(split_dir, 'seed{}'.format(seed))
|
||||
for cls in classnames:
|
||||
with PathManager.open(
|
||||
os.path.join(split_dir,
|
||||
'box_{}shot_{}_train.txt'.format(shot,
|
||||
cls))) as f:
|
||||
fileids_ = np.loadtxt(f, dtype=np.str).tolist()
|
||||
if isinstance(fileids_, str):
|
||||
fileids_ = [fileids_]
|
||||
fileids_ = [
|
||||
fid.split('/')[-1].split('.jpg')[0] for fid in fileids_
|
||||
]
|
||||
fileids[cls] = fileids_
|
||||
|
||||
for cls, fileids_ in fileids.items():
|
||||
dicts_ = []
|
||||
for fileid in fileids_:
|
||||
year = '2012' if '_' in fileid else '2007'
|
||||
# dirname = os.path.join("datasets", "VOC{}".format(year))
|
||||
# anno_file = os.path.join(dirname, "Annotations", fileid + ".xml")
|
||||
# jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
|
||||
|
||||
dir_voc = os.path.join(root, 'VOC{}'.format(year))
|
||||
anno_file = os.path.join(dir_voc, 'Annotations',
|
||||
fileid + '.xml')
|
||||
jpeg_file = os.path.join(dir_voc, 'JPEGImages',
|
||||
fileid + '.jpg')
|
||||
|
||||
tree = ET.parse(anno_file)
|
||||
|
||||
for obj in tree.findall('object'):
|
||||
r = {
|
||||
'file_name': jpeg_file,
|
||||
'image_id': fileid,
|
||||
'height': int(tree.findall('./size/height')[0].text),
|
||||
'width': int(tree.findall('./size/width')[0].text),
|
||||
}
|
||||
cls_ = obj.find('name').text
|
||||
if cls != cls_:
|
||||
continue
|
||||
bbox = obj.find('bndbox')
|
||||
bbox = [
|
||||
float(bbox.find(x).text)
|
||||
for x in ['xmin', 'ymin', 'xmax', 'ymax']
|
||||
]
|
||||
bbox[0] -= 1.0
|
||||
bbox[1] -= 1.0
|
||||
|
||||
instances = [{
|
||||
'category_id': classnames.index(cls),
|
||||
'bbox': bbox,
|
||||
'bbox_mode': BoxMode.XYXY_ABS,
|
||||
}]
|
||||
r['annotations'] = instances
|
||||
dicts_.append(r)
|
||||
if len(dicts_) > int(shot):
|
||||
dicts_ = np.random.choice(dicts_, int(shot), replace=False)
|
||||
dicts.extend(dicts_)
|
||||
else:
|
||||
with PathManager.open(
|
||||
os.path.join(root, dirname, 'ImageSets', 'Main',
|
||||
split + '.txt')) as f:
|
||||
fileids = np.loadtxt(f, dtype=np.str)
|
||||
|
||||
for fileid in fileids:
|
||||
anno_file = os.path.join(root, dirname, 'Annotations',
|
||||
fileid + '.xml')
|
||||
jpeg_file = os.path.join(root, dirname, 'JPEGImages',
|
||||
fileid + '.jpg')
|
||||
|
||||
tree = ET.parse(anno_file)
|
||||
|
||||
r = {
|
||||
'file_name': jpeg_file,
|
||||
'image_id': fileid,
|
||||
'height': int(tree.findall('./size/height')[0].text),
|
||||
'width': int(tree.findall('./size/width')[0].text),
|
||||
}
|
||||
instances = []
|
||||
|
||||
for obj in tree.findall('object'):
|
||||
cls = obj.find('name').text
|
||||
if not (cls in classnames):
|
||||
continue
|
||||
bbox = obj.find('bndbox')
|
||||
bbox = [
|
||||
float(bbox.find(x).text)
|
||||
for x in ['xmin', 'ymin', 'xmax', 'ymax']
|
||||
]
|
||||
bbox[0] -= 1.0
|
||||
bbox[1] -= 1.0
|
||||
|
||||
instances.append({
|
||||
'category_id': classnames.index(cls),
|
||||
'bbox': bbox,
|
||||
'bbox_mode': BoxMode.XYXY_ABS,
|
||||
})
|
||||
r['annotations'] = instances
|
||||
dicts.append(r)
|
||||
|
||||
return dicts
|
||||
|
||||
|
||||
def register_meta_voc(name, root, dirname, split, year, keepclasses, sid):
|
||||
if keepclasses.startswith('base_novel'):
|
||||
thing_classes = PASCAL_VOC_ALL_CATEGORIES[sid]
|
||||
elif keepclasses.startswith('base'):
|
||||
thing_classes = PASCAL_VOC_BASE_CATEGORIES[sid]
|
||||
elif keepclasses.startswith('novel'):
|
||||
thing_classes = PASCAL_VOC_NOVEL_CATEGORIES[sid]
|
||||
|
||||
DatasetCatalog.register(
|
||||
name,
|
||||
lambda: load_filtered_voc_instances(name, root, dirname, split,
|
||||
thing_classes),
|
||||
)
|
||||
|
||||
MetadataCatalog.get(name).set(
|
||||
thing_classes=thing_classes,
|
||||
dirname=os.path.join(root, dirname),
|
||||
year=year,
|
||||
split=split,
|
||||
base_classes=PASCAL_VOC_BASE_CATEGORIES[sid],
|
||||
novel_classes=PASCAL_VOC_NOVEL_CATEGORIES[sid],
|
||||
)
|
||||
|
||||
|
||||
def register_all_voc(root='datasets'):
|
||||
|
||||
METASPLITS = [
|
||||
('voc_2007_trainval_base1', 'VOC2007', 'trainval', 'base1', 1),
|
||||
('voc_2007_trainval_base2', 'VOC2007', 'trainval', 'base2', 2),
|
||||
('voc_2007_trainval_base3', 'VOC2007', 'trainval', 'base3', 3),
|
||||
('voc_2012_trainval_base1', 'VOC2012', 'trainval', 'base1', 1),
|
||||
('voc_2012_trainval_base2', 'VOC2012', 'trainval', 'base2', 2),
|
||||
('voc_2012_trainval_base3', 'VOC2012', 'trainval', 'base3', 3),
|
||||
('voc_2007_trainval_all1', 'VOC2007', 'trainval', 'base_novel_1', 1),
|
||||
('voc_2007_trainval_all2', 'VOC2007', 'trainval', 'base_novel_2', 2),
|
||||
('voc_2007_trainval_all3', 'VOC2007', 'trainval', 'base_novel_3', 3),
|
||||
('voc_2012_trainval_all1', 'VOC2012', 'trainval', 'base_novel_1', 1),
|
||||
('voc_2012_trainval_all2', 'VOC2012', 'trainval', 'base_novel_2', 2),
|
||||
('voc_2012_trainval_all3', 'VOC2012', 'trainval', 'base_novel_3', 3),
|
||||
('voc_2007_test_base1', 'VOC2007', 'test', 'base1', 1),
|
||||
('voc_2007_test_base2', 'VOC2007', 'test', 'base2', 2),
|
||||
('voc_2007_test_base3', 'VOC2007', 'test', 'base3', 3),
|
||||
('voc_2007_test_novel1', 'VOC2007', 'test', 'novel1', 1),
|
||||
('voc_2007_test_novel2', 'VOC2007', 'test', 'novel2', 2),
|
||||
('voc_2007_test_novel3', 'VOC2007', 'test', 'novel3', 3),
|
||||
('voc_2007_test_all1', 'VOC2007', 'test', 'base_novel_1', 1),
|
||||
('voc_2007_test_all2', 'VOC2007', 'test', 'base_novel_2', 2),
|
||||
('voc_2007_test_all3', 'VOC2007', 'test', 'base_novel_3', 3),
|
||||
]
|
||||
for prefix in ['all', 'novel']:
|
||||
for sid in range(1, 4):
|
||||
for shot in [1, 2, 3, 5, 10]:
|
||||
for year in [2007, 2012]:
|
||||
for seed in range(30):
|
||||
seed = '_seed{}'.format(seed)
|
||||
name = 'voc_{}_trainval_{}{}_{}shot{}'.format(
|
||||
year, prefix, sid, shot, seed)
|
||||
dirname = 'VOC{}'.format(year)
|
||||
img_file = '{}_{}shot_split_{}_trainval'.format(
|
||||
prefix, shot, sid)
|
||||
keepclasses = ('base_novel_{}'.format(sid) if prefix
|
||||
== 'all' else 'novel{}'.format(sid))
|
||||
METASPLITS.append(
|
||||
(name, dirname, img_file, keepclasses, sid))
|
||||
|
||||
for name, dirname, split, keepclasses, sid in METASPLITS:
|
||||
if name in DatasetCatalog:
|
||||
continue
|
||||
|
||||
year = 2007 if '2007' in name else 2012
|
||||
register_meta_voc(
|
||||
name,
|
||||
root,
|
||||
dirname,
|
||||
split,
|
||||
year,
|
||||
keepclasses,
|
||||
sid,
|
||||
)
|
||||
MetadataCatalog.get(name).evaluator_type = 'pascal_voc'
|
||||
@@ -82,6 +82,8 @@ TASK_INPUTS = {
|
||||
InputType.IMAGE,
|
||||
Tasks.portrait_matting:
|
||||
InputType.IMAGE,
|
||||
Tasks.image_fewshot_detection:
|
||||
InputType.IMAGE,
|
||||
|
||||
# image editing task result for a single image
|
||||
Tasks.skin_retouching:
|
||||
|
||||
@@ -269,6 +269,9 @@ DEFAULT_MODEL_FOR_PIPELINE = {
|
||||
Tasks.image_multi_view_depth_estimation: (
|
||||
Pipelines.image_multi_view_depth_estimation,
|
||||
'damo/cv_casmvs_multi-view-depth-estimation_general'),
|
||||
Tasks.image_fewshot_detection: (
|
||||
Pipelines.image_fewshot_detection,
|
||||
'damo/cv_resnet101_detection_fewshot-defrcn'),
|
||||
Tasks.image_body_reshaping: (Pipelines.image_body_reshaping,
|
||||
'damo/cv_flow-based-body-reshaping_damo'),
|
||||
Tasks.image_face_fusion: (Pipelines.image_face_fusion,
|
||||
|
||||
@@ -83,6 +83,7 @@ if TYPE_CHECKING:
|
||||
from .image_mvs_depth_estimation_pipeline import ImageMultiViewDepthEstimationPipeline
|
||||
from .panorama_depth_estimation_pipeline import PanoramaDepthEstimationPipeline
|
||||
from .ddcolor_image_colorization_pipeline import DDColorImageColorizationPipeline
|
||||
from .image_defrcn_fewshot_pipeline import ImageDefrcnDetectionPipeline
|
||||
|
||||
else:
|
||||
_import_structure = {
|
||||
@@ -197,6 +198,7 @@ else:
|
||||
'ddcolor_image_colorization_pipeline': [
|
||||
'DDColorImageColorizationPipeline'
|
||||
],
|
||||
'image_defrcn_fewshot_pipeline': ['ImageDefrcnDetectionPipeline'],
|
||||
}
|
||||
|
||||
import sys
|
||||
|
||||
104
modelscope/pipelines/cv/image_defrcn_fewshot_pipeline.py
Normal file
104
modelscope/pipelines/cv/image_defrcn_fewshot_pipeline.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from modelscope.metainfo import Pipelines
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.pipelines.base import Input, Pipeline
|
||||
from modelscope.pipelines.builder import PIPELINES
|
||||
from modelscope.preprocessors import LoadImage
|
||||
from modelscope.utils.constant import ModelFile, Tasks
|
||||
|
||||
|
||||
@PIPELINES.register_module(
|
||||
Tasks.image_fewshot_detection,
|
||||
module_name=Pipelines.image_fewshot_detection)
|
||||
class ImageDefrcnDetectionPipeline(Pipeline):
|
||||
""" Image DeFRCN few-shot detection Pipeline. Given a image,
|
||||
pipeline will return the detection results on the image.
|
||||
Example:
|
||||
|
||||
```python
|
||||
>>> from modelscope.pipelines import pipeline
|
||||
>>> detector = pipeline('image-fewshot-detection', 'damo/cv_resnet101_detection_fewshot-defrcn')
|
||||
>>> detector('/Path/Image')
|
||||
{
|
||||
'scores': [0.8307567834854126, 0.1606406420469284],
|
||||
'labels': ['person', 'dog'],
|
||||
'boxes': [
|
||||
[27.391937255859375, 0.0, 353.0, 500.0],
|
||||
[64.22428131103516, 229.2884521484375, 213.90573120117188, 370.0657958984375]
|
||||
]
|
||||
}
|
||||
>>> #
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, model: str, **kwargs):
|
||||
"""
|
||||
model: model id on modelscope hub.
|
||||
"""
|
||||
super().__init__(model=model, auto_collate=False, **kwargs)
|
||||
|
||||
model_path = os.path.join(self.model.model_dir,
|
||||
ModelFile.TORCH_MODEL_FILE)
|
||||
self.model.model = self._load_pretrained(
|
||||
self.model.model, model_path, self.model.model_cfg.MODEL.DEVICE)
|
||||
|
||||
def _load_pretrained(self, net, load_path, device='cuda', strict=True):
|
||||
|
||||
load_net = torch.load(load_path, map_location=device)
|
||||
if 'scheduler' in load_net:
|
||||
del load_net['scheduler']
|
||||
if 'optimizer' in load_net:
|
||||
del load_net['optimizer']
|
||||
if 'iteration' in load_net:
|
||||
del load_net['iteration']
|
||||
net.load_state_dict(load_net['model'], strict=strict)
|
||||
|
||||
return net
|
||||
|
||||
def preprocess(self, input: Input) -> Dict[str, Any]:
|
||||
|
||||
img = LoadImage.convert_to_ndarray(input)
|
||||
img = img.astype(np.float)
|
||||
|
||||
image = img[..., ::-1].copy() # rgb to bgr
|
||||
tim = torch.Tensor(image).permute(2, 0, 1)
|
||||
|
||||
result = {'image': tim}
|
||||
return result
|
||||
|
||||
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
outputs = self.model.inference(input)
|
||||
result = {'data': outputs}
|
||||
return result
|
||||
|
||||
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if inputs['data'] is None:
|
||||
outputs = {
|
||||
OutputKeys.SCORES: [],
|
||||
OutputKeys.LABELS: [],
|
||||
OutputKeys.BOXES: []
|
||||
}
|
||||
return outputs
|
||||
|
||||
objects = inputs['data']['instances'].get_fields()
|
||||
labels, bboxes = [], []
|
||||
for label, box in zip(objects['pred_classes'], objects['pred_boxes']):
|
||||
labels.append(self.model.config.model.classes[label])
|
||||
bboxes.append(box.tolist())
|
||||
|
||||
scores = objects['scores'].tolist()
|
||||
|
||||
outputs = {
|
||||
OutputKeys.SCORES: scores,
|
||||
OutputKeys.LABELS: labels,
|
||||
OutputKeys.BOXES: bboxes
|
||||
}
|
||||
return outputs
|
||||
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
|
||||
from .movie_scene_segmentation_trainer import MovieSceneSegmentationTrainer
|
||||
from .image_inpainting_trainer import ImageInpaintingTrainer
|
||||
from .referring_video_object_segmentation_trainer import ReferringVideoObjectSegmentationTrainer
|
||||
from .image_defrcn_fewshot_detection_trainer import ImageDefrcnFewshotTrainer
|
||||
|
||||
else:
|
||||
_import_structure = {
|
||||
@@ -20,7 +21,9 @@ else:
|
||||
'movie_scene_segmentation_trainer': ['MovieSceneSegmentationTrainer'],
|
||||
'image_inpainting_trainer': ['ImageInpaintingTrainer'],
|
||||
'referring_video_object_segmentation_trainer':
|
||||
['ReferringVideoObjectSegmentationTrainer']
|
||||
['ReferringVideoObjectSegmentationTrainer'],
|
||||
'image_defrcn_fewshot_detection_trainer':
|
||||
['ImageDefrcnFewshotTrainer']
|
||||
}
|
||||
|
||||
import sys
|
||||
|
||||
316
modelscope/trainers/cv/image_defrcn_fewshot_detection_trainer.py
Normal file
316
modelscope/trainers/cv/image_defrcn_fewshot_detection_trainer.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# The implementation is adopted from er-muyue/DeFRCN
|
||||
# made publicly available under the MIT License at
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/defrcn/engine/defaults.py
|
||||
# https://github.com/er-muyue/DeFRCN/blob/main/tools/model_surgery.py
|
||||
|
||||
import os
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
from detectron2.engine import SimpleTrainer, hooks
|
||||
from detectron2.evaluation import DatasetEvaluators, verify_results
|
||||
from detectron2.utils import comm
|
||||
from torch import nn
|
||||
|
||||
from modelscope.metainfo import Trainers
|
||||
from modelscope.models.base import Model, TorchModel
|
||||
from modelscope.trainers.base import BaseTrainer
|
||||
from modelscope.trainers.builder import TRAINERS
|
||||
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
|
||||
from modelscope.utils.logger import get_logger
|
||||
|
||||
|
||||
class DefaultTrainer(SimpleTrainer):
|
||||
|
||||
def __init__(self, model, cfg):
|
||||
|
||||
from collections import OrderedDict
|
||||
from fvcore.nn.precise_bn import get_bn_modules
|
||||
from torch.nn.parallel import DistributedDataParallel
|
||||
|
||||
from detectron2.data.build import build_detection_train_loader, build_detection_test_loader
|
||||
from detectron2.solver.build import build_optimizer, build_lr_scheduler
|
||||
from detectron2.checkpoint.detection_checkpoint import DetectionCheckpointer
|
||||
from detectron2.utils.logger import setup_logger
|
||||
|
||||
setup_logger()
|
||||
|
||||
optimizer = build_optimizer(cfg, model)
|
||||
data_loader = build_detection_train_loader(cfg)
|
||||
|
||||
if comm.get_world_size() > 1:
|
||||
model = DistributedDataParallel(
|
||||
model,
|
||||
device_ids=[comm.get_local_rank()],
|
||||
broadcast_buffers=False,
|
||||
find_unused_parameters=True)
|
||||
super().__init__(model, data_loader, optimizer)
|
||||
|
||||
self.scheduler = build_lr_scheduler(cfg, optimizer)
|
||||
|
||||
self.checkpointer = DetectionCheckpointer(
|
||||
model,
|
||||
cfg.OUTPUT_DIR,
|
||||
optimizer=optimizer,
|
||||
scheduler=self.scheduler,
|
||||
)
|
||||
self.start_iter = 0
|
||||
self.max_iter = cfg.SOLVER.MAX_ITER
|
||||
self.cfg = cfg
|
||||
|
||||
self.register_hooks(self.build_hooks())
|
||||
|
||||
def resume_or_load(self, resume=True):
|
||||
# The checkpoint stores the training iteration that just finished, thus we start
|
||||
# at the next iteration (or iter zero if there's no checkpoint).
|
||||
self.start_iter = (
|
||||
self.checkpointer.resume_or_load(
|
||||
self.cfg.MODEL.WEIGHTS, resume=resume).get('iteration', -1)
|
||||
+ 1)
|
||||
|
||||
def build_hooks(self):
|
||||
"""
|
||||
Build a list of default hooks, including timing, evaluation,
|
||||
checkpointing, lr scheduling, precise BN, writing events.
|
||||
|
||||
Returns:
|
||||
list[HookBase]:
|
||||
"""
|
||||
cfg = self.cfg.clone()
|
||||
cfg.defrost()
|
||||
cfg.DATALOADER.NUM_WORKERS = 0
|
||||
|
||||
ret = [
|
||||
hooks.IterationTimer(),
|
||||
hooks.LRScheduler(self.optimizer, self.scheduler),
|
||||
hooks.PreciseBN(
|
||||
cfg.TEST.EVAL_PERIOD,
|
||||
self.model,
|
||||
build_detection_train_loader(cfg),
|
||||
cfg.TEST.PRECISE_BN.NUM_ITER,
|
||||
) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
|
||||
else None,
|
||||
]
|
||||
|
||||
if comm.is_main_process():
|
||||
ret.append(
|
||||
hooks.PeriodicCheckpointer(self.checkpointer,
|
||||
cfg.SOLVER.CHECKPOINT_PERIOD))
|
||||
|
||||
def test_and_save_results():
|
||||
self._last_eval_results = self.test(self.cfg, self.model)
|
||||
return self._last_eval_results
|
||||
|
||||
ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
|
||||
|
||||
if comm.is_main_process():
|
||||
ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
|
||||
return ret
|
||||
|
||||
def build_writers(self):
|
||||
from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
|
||||
|
||||
return [
|
||||
CommonMetricPrinter(self.max_iter),
|
||||
JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, 'metrics.json')),
|
||||
TensorboardXWriter(self.cfg.OUTPUT_DIR),
|
||||
]
|
||||
|
||||
def train(self):
|
||||
"""
|
||||
Run training.
|
||||
|
||||
Returns:
|
||||
OrderedDict of results, if evaluation is enabled. Otherwise None.
|
||||
"""
|
||||
super().train(self.start_iter, self.max_iter)
|
||||
if hasattr(self, '_last_eval_results') and comm.is_main_process():
|
||||
verify_results(self.cfg, self._last_eval_results)
|
||||
return self._last_eval_results
|
||||
|
||||
@classmethod
|
||||
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
|
||||
from detectron2.data import MetadataCatalog
|
||||
|
||||
if output_folder is None:
|
||||
output_folder = os.path.join(cfg.OUTPUT_DIR, 'inference')
|
||||
evaluator_list = []
|
||||
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
|
||||
if evaluator_type == 'coco':
|
||||
from detectron2.evaluation import COCOEvaluator
|
||||
evaluator_list.append(
|
||||
COCOEvaluator(dataset_name, True, output_folder))
|
||||
if evaluator_type == 'pascal_voc':
|
||||
from detectron2.evaluation import PascalVOCDetectionEvaluator
|
||||
return PascalVOCDetectionEvaluator(dataset_name)
|
||||
if len(evaluator_list) == 0:
|
||||
raise NotImplementedError(
|
||||
'no Evaluator for the dataset {} with the type {}'.format(
|
||||
dataset_name, evaluator_type))
|
||||
if len(evaluator_list) == 1:
|
||||
return evaluator_list[0]
|
||||
return DatasetEvaluators(evaluator_list)
|
||||
|
||||
@classmethod
|
||||
def test(cls, cfg, model, evaluators=None):
|
||||
from detectron2.engine.defaults import DefaultTrainer as _DefaultTrainer
|
||||
_DefaultTrainer.build_evaluator = cls.build_evaluator
|
||||
|
||||
return _DefaultTrainer.test(cfg, model, evaluators)
|
||||
|
||||
|
||||
@TRAINERS.register_module(module_name=Trainers.image_fewshot_detection)
|
||||
class ImageDefrcnFewshotTrainer(BaseTrainer):
|
||||
|
||||
def __init__(self,
|
||||
model: Optional[Union[TorchModel, nn.Module, str]] = None,
|
||||
cfg_file: Optional[str] = None,
|
||||
arg_parse_fn: Optional[Callable] = None,
|
||||
model_revision: Optional[str] = DEFAULT_MODEL_REVISION,
|
||||
seed: int = 0,
|
||||
cfg_modify_fn: Optional[Callable] = None,
|
||||
**kwargs):
|
||||
|
||||
if isinstance(model, str):
|
||||
self.model_dir = self.get_or_download_model_dir(
|
||||
model, model_revision)
|
||||
if cfg_file is None:
|
||||
cfg_file = os.path.join(self.model_dir,
|
||||
ModelFile.CONFIGURATION)
|
||||
else:
|
||||
assert cfg_file is not None, 'Config file should not be None if model is not from pretrained!'
|
||||
self.model_dir = os.path.dirname(cfg_file)
|
||||
|
||||
super().__init__(cfg_file, arg_parse_fn)
|
||||
|
||||
if cfg_modify_fn is not None:
|
||||
self.cfg = cfg_modify_fn(self.cfg)
|
||||
|
||||
self.logger = get_logger(log_level=self.cfg.get('log_level', 'INFO'))
|
||||
|
||||
if isinstance(model, (TorchModel, nn.Module)):
|
||||
self.model = model
|
||||
else:
|
||||
self.model = self.build_model(**kwargs)
|
||||
|
||||
self.model_cfg = self.model.get_model_cfg()
|
||||
|
||||
if 'datasets_train' in kwargs:
|
||||
self.model_cfg.merge_from_list(
|
||||
['DATASETS.TRAIN', kwargs['datasets_train']])
|
||||
if 'datasets_test' in kwargs:
|
||||
self.model_cfg.merge_from_list(
|
||||
['DATASETS.TEST', kwargs['datasets_test']])
|
||||
if 'work_dir' in kwargs:
|
||||
self.model_cfg.merge_from_list(['OUTPUT_DIR', kwargs['work_dir']])
|
||||
|
||||
if not os.path.exists(self.model_cfg.OUTPUT_DIR):
|
||||
os.makedirs(self.model_cfg.OUTPUT_DIR)
|
||||
|
||||
self.model_cfg.freeze()
|
||||
|
||||
self.data_dir = kwargs.get('data_dir', None)
|
||||
self.data_type = kwargs.get('data_type', 'pascal_voc')
|
||||
|
||||
self.register_data(self.data_type, self.data_dir)
|
||||
|
||||
self.trainer = DefaultTrainer(self.model, self.model_cfg)
|
||||
|
||||
def train(self, *args, **kwargs):
|
||||
self.trainer.resume_or_load()
|
||||
self.trainer.train()
|
||||
|
||||
def evaluate(self, checkpoint_path: str, *args, **kwargs):
|
||||
from detectron2.checkpoint.detection_checkpoint import DetectionCheckpointer
|
||||
|
||||
DetectionCheckpointer(
|
||||
self.model,
|
||||
save_dir=self.model_cfg.OUTPUT_DIR).resume_or_load(checkpoint_path)
|
||||
metric_values = DefaultTrainer.test(self.model_cfg, self.model)
|
||||
return metric_values
|
||||
|
||||
def build_model(self, *args, **kwargs) -> Union[nn.Module, TorchModel]:
|
||||
model = Model.from_pretrained(self.model_dir, **kwargs)
|
||||
if not isinstance(model, nn.Module) and hasattr(model, 'model'):
|
||||
return model.model
|
||||
elif isinstance(model, nn.Module):
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def register_data(cls, data_type='pascal_voc', data_dir=None):
|
||||
|
||||
if data_type == 'pascal_voc':
|
||||
from modelscope.models.cv.image_defrcn_fewshot.utils.voc_register import register_all_voc
|
||||
if data_dir:
|
||||
register_all_voc(data_dir)
|
||||
else:
|
||||
register_all_voc()
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
'no {} dataset was registered'.format(data_type))
|
||||
|
||||
@classmethod
|
||||
def model_surgery(cls,
|
||||
src_path,
|
||||
save_dir,
|
||||
data_type='pascal_voc',
|
||||
method='remove'):
|
||||
|
||||
assert method in ['remove',
|
||||
'randinit'], '{} not implemented'.format(method)
|
||||
|
||||
def _surgery(param_name, is_weight, tar_size, ckpt):
|
||||
weight_name = param_name + ('.weight' if is_weight else '.bias')
|
||||
pretrained_weight = ckpt['model'][weight_name]
|
||||
prev_cls = pretrained_weight.size(0)
|
||||
if 'cls_score' in param_name:
|
||||
prev_cls -= 1
|
||||
if is_weight:
|
||||
feat_size = pretrained_weight.size(1)
|
||||
new_weight = torch.rand((tar_size, feat_size))
|
||||
torch.nn.init.normal_(new_weight, 0, 0.01)
|
||||
else:
|
||||
new_weight = torch.zeros(tar_size)
|
||||
|
||||
new_weight[:prev_cls] = pretrained_weight[:prev_cls]
|
||||
if 'cls_score' in param_name:
|
||||
new_weight[-1] = pretrained_weight[-1] # bg class
|
||||
ckpt['model'][weight_name] = new_weight
|
||||
|
||||
if data_type == 'pascal_voc':
|
||||
TAR_SIZE = 20
|
||||
params_name = [
|
||||
'model.roi_heads.box_predictor.cls_score',
|
||||
'model.roi_heads.box_predictor.bbox_pred'
|
||||
]
|
||||
|
||||
save_name = 'model_reset_' + ('remove' if method == 'remove' else
|
||||
'surgery') + '.pth'
|
||||
save_path = os.path.join(save_dir, save_name)
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
ckpt = torch.load(src_path)
|
||||
|
||||
if 'scheduler' in ckpt:
|
||||
del ckpt['scheduler']
|
||||
if 'optimizer' in ckpt:
|
||||
del ckpt['optimizer']
|
||||
if 'iteration' in ckpt:
|
||||
ckpt['iteration'] = 0
|
||||
|
||||
if method == 'remove':
|
||||
for param_name in params_name:
|
||||
del ckpt['model'][param_name + '.weight']
|
||||
if param_name + '.bias' in ckpt['model']:
|
||||
del ckpt['model'][param_name + '.bias']
|
||||
else:
|
||||
tar_sizes = [TAR_SIZE + 1, TAR_SIZE * 4]
|
||||
for idx, (param_name,
|
||||
tar_size) in enumerate(zip(params_name, tar_sizes)):
|
||||
_surgery(param_name, True, tar_size, ckpt)
|
||||
_surgery(param_name, False, tar_size, ckpt)
|
||||
|
||||
torch.save(ckpt, save_path)
|
||||
else:
|
||||
NotImplementedError(
|
||||
'{} dataset does not supported'.format(data_type))
|
||||
@@ -46,6 +46,7 @@ class CVTasks(object):
|
||||
|
||||
image_object_detection = 'image-object-detection'
|
||||
video_object_detection = 'video-object-detection'
|
||||
image_fewshot_detection = 'image-fewshot-detection'
|
||||
|
||||
image_segmentation = 'image-segmentation'
|
||||
semantic_segmentation = 'semantic-segmentation'
|
||||
|
||||
62
tests/pipelines/test_image_defrcn_fewshot.py
Normal file
62
tests/pipelines/test_image_defrcn_fewshot.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import subprocess
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from modelscope.hub.snapshot_download import snapshot_download
|
||||
from modelscope.models import Model
|
||||
from modelscope.outputs import OutputKeys
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
from modelscope.utils.demo_utils import DemoCompatibilityCheck
|
||||
from modelscope.utils.logger import get_logger
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
class ImageDefrcnFewShotTest(unittest.TestCase, DemoCompatibilityCheck):
|
||||
|
||||
def setUp(self) -> None:
|
||||
logger.info('start install detectron2-0.3')
|
||||
cmd = [
|
||||
sys.executable, '-m', 'pip', 'install', 'detectron2==0.3', '-f',
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html'
|
||||
]
|
||||
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
logger.info('install detectron2-0.3 finished')
|
||||
|
||||
self.task = Tasks.image_fewshot_detection
|
||||
self.model_id = 'damo/cv_resnet101_detection_fewshot-defrcn'
|
||||
self.image = 'data/test/images/image_voc2007_000001.jpg'
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_run_with_model_from_modelhub(self):
|
||||
model = Model.from_pretrained(self.model_id)
|
||||
pipeline_defrcn = pipeline(task=self.task, model=model)
|
||||
print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS])
|
||||
|
||||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
|
||||
def test_run_with_model_name(self):
|
||||
pipeline_defrcn = pipeline(task=self.task, model=self.model_id)
|
||||
print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS])
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_run_with_default_model(self):
|
||||
pipeline_defrcn = pipeline(task=self.task)
|
||||
print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS])
|
||||
|
||||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
|
||||
def test_run_by_direct_model_download(self):
|
||||
cache_path = snapshot_download(self.model_id)
|
||||
pipeline_defrcn = pipeline(self.task, model=cache_path)
|
||||
print(pipeline_defrcn(input=self.image)[OutputKeys.LABELS])
|
||||
|
||||
@unittest.skip('demo compatibility test is only enabled on a needed-basis')
|
||||
def test_demo_compatibility(self):
|
||||
self.compatibility_check()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -49,6 +49,7 @@ isolated: # test cases that may require excessive anmount of GPU memory or run
|
||||
- test_kws_nearfield_trainer.py
|
||||
- test_gpt3_text_generation.py
|
||||
- test_ddcolor_image_colorization.py
|
||||
- test_image_defrcn_fewshot_trainer.py
|
||||
- test_image_deblur_trainer.py
|
||||
|
||||
envs:
|
||||
|
||||
70
tests/trainers/test_image_defrcn_fewshot_trainer.py
Normal file
70
tests/trainers/test_image_defrcn_fewshot_trainer.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from modelscope.hub.utils.utils import get_cache_dir
|
||||
from modelscope.metainfo import Trainers
|
||||
from modelscope.msdatasets import MsDataset
|
||||
from modelscope.trainers import build_trainer
|
||||
from modelscope.utils.constant import DownloadMode
|
||||
from modelscope.utils.test_utils import test_level
|
||||
|
||||
|
||||
class TestImageDefrcnFewShotTrainer(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
cmd = [
|
||||
sys.executable, '-m', 'pip', 'install', 'detectron2==0.3', '-f',
|
||||
'https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html'
|
||||
]
|
||||
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
self.tmp_dir = tempfile.TemporaryDirectory().name
|
||||
if not os.path.exists(self.tmp_dir):
|
||||
os.makedirs(self.tmp_dir)
|
||||
|
||||
self.model_id = 'damo/cv_resnet101_detection_fewshot-defrcn'
|
||||
|
||||
data_voc = MsDataset.load(
|
||||
dataset_name='VOC_fewshot',
|
||||
namespace='shimin2023',
|
||||
split='train',
|
||||
download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS)
|
||||
self.data_dir = os.path.join(
|
||||
data_voc.config_kwargs['split_config']['train'], 'data')
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmp_dir)
|
||||
super().tearDown()
|
||||
|
||||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
|
||||
def test_trainer(self):
|
||||
|
||||
split = 1
|
||||
kwargs = dict(
|
||||
model=self.model_id,
|
||||
data_dir=self.data_dir,
|
||||
work_dir=self.tmp_dir,
|
||||
model_weights=os.path.join(get_cache_dir(), self.model_id,
|
||||
'ImageNetPretrained/MSRA/R-101.pkl'),
|
||||
data_type='pascal_voc',
|
||||
config_path='defrcn_det_r101_base{}.yaml'.format(split),
|
||||
datasets_train=('voc_2007_trainval_base{}'.format(split),
|
||||
'voc_2012_trainval_base{}'.format(split)),
|
||||
datasets_test=('voc_2007_test_base{}'.format(split), ))
|
||||
trainer = build_trainer(
|
||||
name=Trainers.image_fewshot_detection, default_args=kwargs)
|
||||
trainer.train()
|
||||
|
||||
results_files = os.listdir(self.tmp_dir)
|
||||
self.assertIn('metrics.json', results_files)
|
||||
self.assertIn('model_final.pth', results_files)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user