Commit 82dcdf13 authored by dmMaze's avatar dmMaze
Browse files

fix textlines postprocess

parent 19cd3012
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ def model2annotations(model_path, img_dir_list, save_dir, save_json=False):
        for blk in blk_list:
            polys += blk.lines
            blk_xyxy.append(blk.xyxy)
            blk_dict_list.append(blk.to_dict(extra_info=True))
            blk_dict_list.append(blk.to_dict())
        blk_xyxy = xyxy2yolo(blk_xyxy, im_w, im_h)
        if blk_xyxy is not None:
            cls_list = [1] * len(blk_xyxy)
+20 −15
Original line number Diff line number Diff line
@@ -181,12 +181,11 @@ class TextBlock(object):
    def __getitem__(self, idx):
        return self.lines[idx]

    def to_dict(self, extra_info=False):
    def to_dict(self):
        blk_dict = copy.deepcopy(vars(self))
        return blk_dict

    def get_transformed_region(self, img, idx, textheight, maxwidth=None) -> np.ndarray :
        im_h, im_w = img.shape[:2]
    def get_transformed_region(self, img: np.ndarray, idx: int, textheight: int, maxwidth: int = None) -> np.ndarray :
        direction = 'v' if self.vertical else 'h'
        src_pts = np.array(self.lines[idx], dtype=np.float64)

@@ -322,7 +321,7 @@ def sort_textblk_list(blk_list: List[TextBlock], im_w: int, im_h: int) -> List[T
    blk_list.sort(key=lambda blk: blk.sort_weight)
    return blk_list

def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool, sort: bool = False) -> None:
def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> None:
    lines = blk.lines_array()
    middle_pnts = (lines[:, [1, 2, 3, 0]] + lines) / 2
    vec_v = middle_pnts[:, 2] - middle_pnts[:, 0]   # vertical vectors of textlines
@@ -332,7 +331,10 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool
    v = np.sum(vec_v, axis=0)
    h = np.sum(vec_h, axis=0)
    norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h)
    vertical = eval_orientation and norm_v > norm_h
    if blk.language == 'ja':
        vertical = norm_v > norm_h
    else:
        vertical = norm_v > norm_h * 2
    # calcuate distance between textlines and origin 
    if vertical:
        primary_vec, primary_norm = v, norm_v
@@ -355,7 +357,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool
    if abs(blk.angle) < 3:
        blk.angle = 0
    blk.font_size = font_size
    if eval_orientation:
    blk.vertical = vertical
    blk.vec = primary_vec
    blk.norm = primary_norm
@@ -440,8 +441,10 @@ def split_textblk(blk: TextBlock):
    return textblock_splitted, sub_blk_list

def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]:
    blk_list, scattered_lines = [], {'ver': [], 'hor': []}
    blk_list: List[TextBlock] = []
    scattered_lines = {'ver': [], 'hor': []}
    for bbox, cls, conf in zip(*blks):
        # cls could give wrong result
        blk_list.append(TextBlock(bbox, language=LANG_LIST[cls]))

    # step1: filter & assign lines to textblocks
@@ -473,7 +476,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[

    # step2: filter textblocks, sort & split textlines
    final_blk_list = []
    for ii, blk in enumerate(blk_list):
    for blk in blk_list:
        # filter textblocks 
        if len(blk.lines) == 0:
            bx1, by1, bx2, by2 = blk.xyxy
@@ -483,11 +486,15 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
                    continue
            xywh = np.array([[bx1, by1, bx2-bx1, by2-by1]])
            blk.lines = xywh2xyxypoly(xywh).reshape(-1, 4, 2).tolist()
        eval_orientation = blk.language != 'eng'
        examine_textblk(blk, im_w, im_h, eval_orientation, sort=True)
        examine_textblk(blk, im_w, im_h, sort=True)
        
        # split manga text if there is a distance gap
        textblock_splitted = blk.language == 'ja' and len(blk.lines) > 1
        textblock_splitted = False
        if len(blk.lines) > 1:
            if blk.language == 'ja':
                textblock_splitted = True
            elif blk.vertical:
                textblock_splitted = True
        if textblock_splitted:
            textblock_splitted, sub_blk_list = split_textblk(blk)
        else:
@@ -505,13 +512,11 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[
        final_blk_list = sort_textblk_list(final_blk_list, im_w, im_h)

    for blk in final_blk_list:
        if blk.language == 'eng':
            blk.vertical = False
        if blk.language == 'eng' and not blk.vertical:
            num_lines = len(blk.lines)
            if num_lines == 0:
                continue
            # blk.line_spacing = blk.bounding_rect()[3] / num_lines / blk.font_size
            resize_ratio = 1.1
            expand_size = max(int(blk.font_size * 0.1), 2)
            rad = np.deg2rad(blk.angle)
            shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]])