mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 20:49:37 +01:00
Fix the numpy bug for card correction
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14461396 * fix the numpy bug for card detection correction * add output(score, angle, print type)
This commit is contained in:
@@ -442,8 +442,10 @@ TASK_OUTPUTS = {
|
||||
Tasks.table_recognition: [OutputKeys.POLYGONS],
|
||||
Tasks.lineless_table_recognition: [OutputKeys.POLYGONS, OutputKeys.BOXES],
|
||||
Tasks.license_plate_detection: [OutputKeys.POLYGONS, OutputKeys.TEXT],
|
||||
Tasks.card_detection_correction:
|
||||
[OutputKeys.POLYGONS, OutputKeys.OUTPUT_IMGS],
|
||||
Tasks.card_detection_correction: [
|
||||
OutputKeys.POLYGONS, OutputKeys.SCORES, OutputKeys.OUTPUT_IMGS,
|
||||
OutputKeys.LABELS, OutputKeys.LAYOUT
|
||||
],
|
||||
|
||||
# ocr recognition result for single sample
|
||||
# {
|
||||
@@ -672,9 +674,8 @@ TASK_OUTPUTS = {
|
||||
# np.array # 2D array containing only 0, 1
|
||||
# ]
|
||||
# }
|
||||
Tasks.image_segmentation: [
|
||||
OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS
|
||||
],
|
||||
Tasks.image_segmentation:
|
||||
[OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS],
|
||||
|
||||
# video panoptic segmentation result for single sample
|
||||
# "scores": [[0.8, 0.25, 0.05, 0.05], [0.9, 0.1, 0.05, 0.05]]
|
||||
|
||||
@@ -172,13 +172,19 @@ class CardDetectionCorrection(Pipeline):
|
||||
wh = output['wh']
|
||||
reg = output['reg']
|
||||
angle_cls = output['cls'].sigmoid_()
|
||||
ftype_cls = output['ftype'].sigmoid_()
|
||||
|
||||
bbox, inds = bbox_decode(hm, wh, reg=reg, K=self.K)
|
||||
angle_cls = decode_by_ind(
|
||||
angle_cls, inds, K=self.K).detach().cpu().numpy()
|
||||
ftype_cls = decode_by_ind(
|
||||
ftype_cls, inds,
|
||||
K=self.K).detach().cpu().numpy().astype(np.float32)
|
||||
bbox = bbox.detach().cpu().numpy()
|
||||
for i in range(bbox.shape[1]):
|
||||
bbox[0][i][9] = angle_cls[0][i]
|
||||
bbox = np.concatenate((bbox, np.expand_dims(ftype_cls, axis=-1)),
|
||||
axis=-1)
|
||||
bbox = nms(bbox, 0.3)
|
||||
bbox = bbox_post_process(bbox.copy(), [meta['c'].cpu().numpy()],
|
||||
[meta['s']], meta['out_height'],
|
||||
@@ -187,6 +193,8 @@ class CardDetectionCorrection(Pipeline):
|
||||
res = []
|
||||
angle = []
|
||||
sub_imgs = []
|
||||
ftype = []
|
||||
score = []
|
||||
for idx, box in enumerate(bbox[0]):
|
||||
if box[8] > 0.3:
|
||||
angle.append(int(box[9]))
|
||||
@@ -200,9 +208,14 @@ class CardDetectionCorrection(Pipeline):
|
||||
if angle[-1] == 3:
|
||||
sub_img = cv2.rotate(sub_img, 0)
|
||||
sub_imgs.append(sub_img)
|
||||
ftype.append(int(box[10]))
|
||||
score.append(box[8])
|
||||
|
||||
result = {
|
||||
OutputKeys.POLYGONS: np.array(res),
|
||||
OutputKeys.OUTPUT_IMGS: np.array(sub_imgs)
|
||||
OutputKeys.POLYGONS: res,
|
||||
OutputKeys.SCORES: score,
|
||||
OutputKeys.OUTPUT_IMGS: sub_imgs,
|
||||
OutputKeys.LABELS: angle,
|
||||
OutputKeys.LAYOUT: np.array(ftype)
|
||||
}
|
||||
return result
|
||||
|
||||
@@ -232,13 +232,13 @@ def nms(dets, thresh):
|
||||
keep = []
|
||||
for i in range(len(dets)):
|
||||
box = dets[i]
|
||||
if box[-1] < thresh:
|
||||
if box[8] < thresh:
|
||||
break
|
||||
max_score_index = -1
|
||||
ctx = (dets[i][0] + dets[i][2] + dets[i][4] + dets[i][6]) / 4
|
||||
cty = (dets[i][1] + dets[i][3] + dets[i][5] + dets[i][7]) / 4
|
||||
for j in range(len(dets)):
|
||||
if i == j or dets[j][-1] < thresh:
|
||||
if i == j or dets[j][8] < thresh:
|
||||
break
|
||||
x1, y1 = dets[j][0], dets[j][1]
|
||||
x2, y2 = dets[j][2], dets[j][3]
|
||||
|
||||
Reference in New Issue
Block a user