Fix the numpy bug for card correction

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/14461396 * fix the numpy bug for card detection correction * add output(score, angle, print type)
2025-12-25 20:49:37 +01:00 · 2023-10-27 16:51:50 +08:00
parent 3b4a7b48ef
commit acc61da91c
3 changed files with 23 additions and 9 deletions
--- a/modelscope/outputs/outputs.py
+++ b/modelscope/outputs/outputs.py
@@ -442,8 +442,10 @@ TASK_OUTPUTS = {
    Tasks.table_recognition: [OutputKeys.POLYGONS],
    Tasks.lineless_table_recognition: [OutputKeys.POLYGONS, OutputKeys.BOXES],
    Tasks.license_plate_detection: [OutputKeys.POLYGONS, OutputKeys.TEXT],
-    Tasks.card_detection_correction:
-    [OutputKeys.POLYGONS, OutputKeys.OUTPUT_IMGS],
+    Tasks.card_detection_correction: [
+        OutputKeys.POLYGONS, OutputKeys.SCORES, OutputKeys.OUTPUT_IMGS,
+        OutputKeys.LABELS, OutputKeys.LAYOUT
+    ],

    # ocr recognition result for single sample
    # {
@@ -672,9 +674,8 @@ TASK_OUTPUTS = {
    #           np.array # 2D array containing only 0, 1
    #       ]
    #   }
-    Tasks.image_segmentation: [
-        OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS
-    ],
+    Tasks.image_segmentation:
+    [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS],

    # video panoptic segmentation result for single sample
    #         "scores": [[0.8, 0.25, 0.05, 0.05], [0.9, 0.1, 0.05, 0.05]]
--- a/modelscope/pipelines/cv/card_detection_correction_pipeline.py
+++ b/modelscope/pipelines/cv/card_detection_correction_pipeline.py
@@ -172,13 +172,19 @@ class CardDetectionCorrection(Pipeline):
        wh = output['wh']
        reg = output['reg']
        angle_cls = output['cls'].sigmoid_()
+        ftype_cls = output['ftype'].sigmoid_()

        bbox, inds = bbox_decode(hm, wh, reg=reg, K=self.K)
        angle_cls = decode_by_ind(
            angle_cls, inds, K=self.K).detach().cpu().numpy()
+        ftype_cls = decode_by_ind(
+            ftype_cls, inds,
+            K=self.K).detach().cpu().numpy().astype(np.float32)
        bbox = bbox.detach().cpu().numpy()
        for i in range(bbox.shape[1]):
            bbox[0][i][9] = angle_cls[0][i]
+        bbox = np.concatenate((bbox, np.expand_dims(ftype_cls, axis=-1)),
+                              axis=-1)
        bbox = nms(bbox, 0.3)
        bbox = bbox_post_process(bbox.copy(), [meta['c'].cpu().numpy()],
                                 [meta['s']], meta['out_height'],
@@ -187,6 +193,8 @@ class CardDetectionCorrection(Pipeline):
        res = []
        angle = []
        sub_imgs = []
+        ftype = []
+        score = []
        for idx, box in enumerate(bbox[0]):
            if box[8] > 0.3:
                angle.append(int(box[9]))
@@ -200,9 +208,14 @@ class CardDetectionCorrection(Pipeline):
                if angle[-1] == 3:
                    sub_img = cv2.rotate(sub_img, 0)
                sub_imgs.append(sub_img)
+                ftype.append(int(box[10]))
+                score.append(box[8])

        result = {
-            OutputKeys.POLYGONS: np.array(res),
-            OutputKeys.OUTPUT_IMGS: np.array(sub_imgs)
+            OutputKeys.POLYGONS: res,
+            OutputKeys.SCORES: score,
+            OutputKeys.OUTPUT_IMGS: sub_imgs,
+            OutputKeys.LABELS: angle,
+            OutputKeys.LAYOUT: np.array(ftype)
        }
        return result
--- a/modelscope/pipelines/cv/ocr_utils/table_process.py
+++ b/modelscope/pipelines/cv/ocr_utils/table_process.py
@@ -232,13 +232,13 @@ def nms(dets, thresh):
    keep = []
    for i in range(len(dets)):
        box = dets[i]
-        if box[-1] < thresh:
+        if box[8] < thresh:
            break
        max_score_index = -1
        ctx = (dets[i][0] + dets[i][2] + dets[i][4] + dets[i][6]) / 4
        cty = (dets[i][1] + dets[i][3] + dets[i][5] + dets[i][7]) / 4
        for j in range(len(dets)):
-            if i == j or dets[j][-1] < thresh:
+            if i == j or dets[j][8] < thresh:
                break
            x1, y1 = dets[j][0], dets[j][1]
            x2, y2 = dets[j][2], dets[j][3]