Fix the numpy bug for card correction

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14461396
* fix the numpy bug for card detection correction

* add output(score, angle, print type)
This commit is contained in:
rujiao.lrj
2023-10-27 16:51:50 +08:00
committed by wenmeng.zwm
parent 3b4a7b48ef
commit acc61da91c
3 changed files with 23 additions and 9 deletions

View File

@@ -442,8 +442,10 @@ TASK_OUTPUTS = {
Tasks.table_recognition: [OutputKeys.POLYGONS],
Tasks.lineless_table_recognition: [OutputKeys.POLYGONS, OutputKeys.BOXES],
Tasks.license_plate_detection: [OutputKeys.POLYGONS, OutputKeys.TEXT],
Tasks.card_detection_correction:
[OutputKeys.POLYGONS, OutputKeys.OUTPUT_IMGS],
Tasks.card_detection_correction: [
OutputKeys.POLYGONS, OutputKeys.SCORES, OutputKeys.OUTPUT_IMGS,
OutputKeys.LABELS, OutputKeys.LAYOUT
],
# ocr recognition result for single sample
# {
@@ -672,9 +674,8 @@ TASK_OUTPUTS = {
# np.array # 2D array containing only 0, 1
# ]
# }
Tasks.image_segmentation: [
OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS
],
Tasks.image_segmentation:
[OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS],
# video panoptic segmentation result for single sample
# "scores": [[0.8, 0.25, 0.05, 0.05], [0.9, 0.1, 0.05, 0.05]]

View File

@@ -172,13 +172,19 @@ class CardDetectionCorrection(Pipeline):
wh = output['wh']
reg = output['reg']
angle_cls = output['cls'].sigmoid_()
ftype_cls = output['ftype'].sigmoid_()
bbox, inds = bbox_decode(hm, wh, reg=reg, K=self.K)
angle_cls = decode_by_ind(
angle_cls, inds, K=self.K).detach().cpu().numpy()
ftype_cls = decode_by_ind(
ftype_cls, inds,
K=self.K).detach().cpu().numpy().astype(np.float32)
bbox = bbox.detach().cpu().numpy()
for i in range(bbox.shape[1]):
bbox[0][i][9] = angle_cls[0][i]
bbox = np.concatenate((bbox, np.expand_dims(ftype_cls, axis=-1)),
axis=-1)
bbox = nms(bbox, 0.3)
bbox = bbox_post_process(bbox.copy(), [meta['c'].cpu().numpy()],
[meta['s']], meta['out_height'],
@@ -187,6 +193,8 @@ class CardDetectionCorrection(Pipeline):
res = []
angle = []
sub_imgs = []
ftype = []
score = []
for idx, box in enumerate(bbox[0]):
if box[8] > 0.3:
angle.append(int(box[9]))
@@ -200,9 +208,14 @@ class CardDetectionCorrection(Pipeline):
if angle[-1] == 3:
sub_img = cv2.rotate(sub_img, 0)
sub_imgs.append(sub_img)
ftype.append(int(box[10]))
score.append(box[8])
result = {
OutputKeys.POLYGONS: np.array(res),
OutputKeys.OUTPUT_IMGS: np.array(sub_imgs)
OutputKeys.POLYGONS: res,
OutputKeys.SCORES: score,
OutputKeys.OUTPUT_IMGS: sub_imgs,
OutputKeys.LABELS: angle,
OutputKeys.LAYOUT: np.array(ftype)
}
return result

View File

@@ -232,13 +232,13 @@ def nms(dets, thresh):
keep = []
for i in range(len(dets)):
box = dets[i]
if box[-1] < thresh:
if box[8] < thresh:
break
max_score_index = -1
ctx = (dets[i][0] + dets[i][2] + dets[i][4] + dets[i][6]) / 4
cty = (dets[i][1] + dets[i][3] + dets[i][5] + dets[i][7]) / 4
for j in range(len(dets)):
if i == j or dets[j][-1] < thresh:
if i == j or dets[j][8] < thresh:
break
x1, y1 = dets[j][0], dets[j][1]
x2, y2 = dets[j][2], dets[j][3]