diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index 368abad6..a32fc157 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -442,8 +442,10 @@ TASK_OUTPUTS = { Tasks.table_recognition: [OutputKeys.POLYGONS], Tasks.lineless_table_recognition: [OutputKeys.POLYGONS, OutputKeys.BOXES], Tasks.license_plate_detection: [OutputKeys.POLYGONS, OutputKeys.TEXT], - Tasks.card_detection_correction: - [OutputKeys.POLYGONS, OutputKeys.OUTPUT_IMGS], + Tasks.card_detection_correction: [ + OutputKeys.POLYGONS, OutputKeys.SCORES, OutputKeys.OUTPUT_IMGS, + OutputKeys.LABELS, OutputKeys.LAYOUT + ], # ocr recognition result for single sample # { @@ -672,9 +674,8 @@ TASK_OUTPUTS = { # np.array # 2D array containing only 0, 1 # ] # } - Tasks.image_segmentation: [ - OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS - ], + Tasks.image_segmentation: + [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.MASKS], # video panoptic segmentation result for single sample # "scores": [[0.8, 0.25, 0.05, 0.05], [0.9, 0.1, 0.05, 0.05]] diff --git a/modelscope/pipelines/cv/card_detection_correction_pipeline.py b/modelscope/pipelines/cv/card_detection_correction_pipeline.py index dac174de..c4b87d0d 100644 --- a/modelscope/pipelines/cv/card_detection_correction_pipeline.py +++ b/modelscope/pipelines/cv/card_detection_correction_pipeline.py @@ -172,13 +172,19 @@ class CardDetectionCorrection(Pipeline): wh = output['wh'] reg = output['reg'] angle_cls = output['cls'].sigmoid_() + ftype_cls = output['ftype'].sigmoid_() bbox, inds = bbox_decode(hm, wh, reg=reg, K=self.K) angle_cls = decode_by_ind( angle_cls, inds, K=self.K).detach().cpu().numpy() + ftype_cls = decode_by_ind( + ftype_cls, inds, + K=self.K).detach().cpu().numpy().astype(np.float32) bbox = bbox.detach().cpu().numpy() for i in range(bbox.shape[1]): bbox[0][i][9] = angle_cls[0][i] + bbox = np.concatenate((bbox, np.expand_dims(ftype_cls, axis=-1)), + axis=-1) bbox = nms(bbox, 0.3) bbox = bbox_post_process(bbox.copy(), [meta['c'].cpu().numpy()], [meta['s']], meta['out_height'], @@ -187,6 +193,8 @@ class CardDetectionCorrection(Pipeline): res = [] angle = [] sub_imgs = [] + ftype = [] + score = [] for idx, box in enumerate(bbox[0]): if box[8] > 0.3: angle.append(int(box[9])) @@ -200,9 +208,14 @@ class CardDetectionCorrection(Pipeline): if angle[-1] == 3: sub_img = cv2.rotate(sub_img, 0) sub_imgs.append(sub_img) + ftype.append(int(box[10])) + score.append(box[8]) result = { - OutputKeys.POLYGONS: np.array(res), - OutputKeys.OUTPUT_IMGS: np.array(sub_imgs) + OutputKeys.POLYGONS: res, + OutputKeys.SCORES: score, + OutputKeys.OUTPUT_IMGS: sub_imgs, + OutputKeys.LABELS: angle, + OutputKeys.LAYOUT: np.array(ftype) } return result diff --git a/modelscope/pipelines/cv/ocr_utils/table_process.py b/modelscope/pipelines/cv/ocr_utils/table_process.py index 3bf28e84..f67bfc72 100644 --- a/modelscope/pipelines/cv/ocr_utils/table_process.py +++ b/modelscope/pipelines/cv/ocr_utils/table_process.py @@ -232,13 +232,13 @@ def nms(dets, thresh): keep = [] for i in range(len(dets)): box = dets[i] - if box[-1] < thresh: + if box[8] < thresh: break max_score_index = -1 ctx = (dets[i][0] + dets[i][2] + dets[i][4] + dets[i][6]) / 4 cty = (dets[i][1] + dets[i][3] + dets[i][5] + dets[i][7]) / 4 for j in range(len(dets)): - if i == j or dets[j][-1] < thresh: + if i == j or dets[j][8] < thresh: break x1, y1 = dets[j][0], dets[j][1] x2, y2 = dets[j][2], dets[j][3]