Commit 8a99a985 authored by dmMaze's avatar dmMaze
Browse files

fix lines merging for ctd, close #942

parent accb8620
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ from utils.imgproc_utils import letterbox, xyxy2yolo, get_yololabel_strings, squ

from ..yolov5.yolov5_utils import non_max_suppression
from ..db_utils import SegDetectorRepresenter
from utils.textblock import TextBlock, group_output
from utils.textblock import TextBlock, group_output, mit_merge_textlines
from .textmask import refine_mask, refine_undetected_mask, REFINEMASK_INPAINT, REFINEMASK_ANNOTATION
from pathlib import Path
from typing import Union, List, Tuple, Callable
@@ -344,7 +344,9 @@ class TextDetector:
            lines = []
        else:
            lines = lines.astype(np.int64)
        blk_list = group_output(blks, lines, im_w, im_h, mask)
        blk_list = group_output([], lines, im_w, im_h, mask, canvas=img)
        # print(lines)
        # blk_list = mit_merge_textlines(lines, im_w, im_w)
        mask_refined = refine_mask(img, mask, blk_list, refine_mode=refine_mode)
        if keep_undetected_mask:
            mask_refined = refine_undetected_mask(img, mask, mask_refined, blk_list, refine_mode=refine_mode)
+16 −17
Original line number Diff line number Diff line
@@ -298,16 +298,18 @@ class TextBlock:
        xyxy = np.array(self.xyxy)
        return (xyxy[:2] + xyxy[2:]) / 2

    def unrotated_polygons(self) -> np.ndarray:
    def unrotated_polygons(self, ids=None) -> np.ndarray:
        angled = self.angle != 0
        center = self.center()
        polygons = self.lines_array().reshape(-1, 8)
        if ids is not None:
            polygons = polygons[ids]
        if angled:
            polygons = rotate_polygons(center, polygons, self.angle)
        return angled, center, polygons
    
    def min_rect(self, rotate_back=True) -> List[int]:
        angled, center, polygons = self.unrotated_polygons()
    def min_rect(self, rotate_back=True, ids=None) -> List[int]:
        angled, center, polygons = self.unrotated_polygons(ids=ids)
        min_x = polygons[:, ::2].min()
        min_y = polygons[:, 1::2].min()
        max_x = polygons[:, ::2].max()
@@ -576,10 +578,7 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) ->
    v = np.sum(vec_v, axis=0)
    h = np.sum(vec_h, axis=0)
    norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h)
    if blk.language == 'ja':
        vertical = norm_v > norm_h
    else:
        vertical = norm_v > norm_h * 2
    vertical = blk.src_is_vertical
    # calcuate distance between textlines and origin 
    if vertical:
        primary_vec, primary_norm = v, norm_v
@@ -602,7 +601,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) ->
    if abs(blk.angle) < 3:
        blk.angle = 0
    blk.font_size = font_size
    blk.vertical = blk.src_is_vertical = vertical
    blk.vec = primary_vec
    blk.norm = primary_norm
    if sort:
@@ -619,9 +617,9 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distanc
    cos_vec = vec_prod / blk.norm / blk2.norm
    # distance = blk2.distance[-1] - blk.distance[-1]
    # distance_p1 = np.linalg.norm(np.array(blk2.lines[-1][0]) - np.array(blk.lines[-1][0]))
    minrect1 = blk.min_rect()[0]
    minrect1 = blk.min_rect(ids=[-1])[0]
    xyxy1 = [*minrect1[0], *minrect1[2]]
    minrect2 = blk2.min_rect()[0]
    minrect2 = blk2.min_rect(ids=[-1])[0]
    xyxy2 = [*minrect2[0], *minrect2[2]]
    distance_x = max(xyxy1[0], xyxy2[0]) - min(xyxy1[2], xyxy2[2])
    distance_y = max(xyxy1[1], xyxy2[1]) - min(xyxy1[3], xyxy2[3])
@@ -640,9 +638,9 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distanc
            return False
        # if distance > distance_tol * fntsz_avg:
        #     return False
        if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 1.5:
        if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 0.8:
            return False
        if not blk.vertical and distance_y > fntsz_avg * 0.3:
        if not blk.vertical and distance_y > fntsz_avg * 0.5:
            return False
    # merge
    for line in blk2.lines:
@@ -703,7 +701,7 @@ def split_textblk(blk: TextBlock):
            current_blk.adjust_bbox(with_bbox=False)
    return textblock_splitted, sub_blk_list

def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]:
def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True, canvas=None) -> List[TextBlock]:
    blk_list: List[TextBlock] = []
    scattered_lines = {'ver': [], 'hor': []}
    for bbox, cls, conf in zip(*blks):
@@ -733,6 +731,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
                if mask_score < mask_score_thresh:
                    continue
            blk = TextBlock([bx1, by1, bx2, by2], [line])
            blk.vertical = blk.src_is_vertical = is_vertical
            examine_textblk(blk, im_w, im_h, sort=False)
            if blk.vertical:
                scattered_lines['ver'].append(blk)
@@ -762,9 +761,9 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
                textblock_splitted = True
            elif blk.vertical:
                textblock_splitted = True
        if textblock_splitted:
            textblock_splitted, sub_blk_list = split_textblk(blk)
        else:
        # if textblock_splitted:
        #     textblock_splitted, sub_blk_list = split_textblk(blk)
        # else:
        sub_blk_list = [blk]
        # modify textblock to fit its textlines
        if not textblock_splitted: