Loading modules/textdetector/ctd/inference.py +4 −2 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ from utils.imgproc_utils import letterbox, xyxy2yolo, get_yololabel_strings, squ from ..yolov5.yolov5_utils import non_max_suppression from ..db_utils import SegDetectorRepresenter from utils.textblock import TextBlock, group_output from utils.textblock import TextBlock, group_output, mit_merge_textlines from .textmask import refine_mask, refine_undetected_mask, REFINEMASK_INPAINT, REFINEMASK_ANNOTATION from pathlib import Path from typing import Union, List, Tuple, Callable Loading Loading @@ -344,7 +344,9 @@ class TextDetector: lines = [] else: lines = lines.astype(np.int64) blk_list = group_output(blks, lines, im_w, im_h, mask) blk_list = group_output([], lines, im_w, im_h, mask, canvas=img) # print(lines) # blk_list = mit_merge_textlines(lines, im_w, im_w) mask_refined = refine_mask(img, mask, blk_list, refine_mode=refine_mode) if keep_undetected_mask: mask_refined = refine_undetected_mask(img, mask, mask_refined, blk_list, refine_mode=refine_mode) Loading utils/textblock.py +16 −17 Original line number Diff line number Diff line Loading @@ -298,16 +298,18 @@ class TextBlock: xyxy = np.array(self.xyxy) return (xyxy[:2] + xyxy[2:]) / 2 def unrotated_polygons(self) -> np.ndarray: def unrotated_polygons(self, ids=None) -> np.ndarray: angled = self.angle != 0 center = self.center() polygons = self.lines_array().reshape(-1, 8) if ids is not None: polygons = polygons[ids] if angled: polygons = rotate_polygons(center, polygons, self.angle) return angled, center, polygons def min_rect(self, rotate_back=True) -> List[int]: angled, center, polygons = self.unrotated_polygons() def min_rect(self, rotate_back=True, ids=None) -> List[int]: angled, center, polygons = self.unrotated_polygons(ids=ids) min_x = polygons[:, ::2].min() min_y = polygons[:, 1::2].min() max_x = polygons[:, ::2].max() Loading Loading @@ -576,10 +578,7 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> v = np.sum(vec_v, axis=0) h = np.sum(vec_h, axis=0) norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h) if blk.language == 'ja': vertical = norm_v > norm_h else: vertical = norm_v > norm_h * 2 vertical = blk.src_is_vertical # calcuate distance between textlines and origin if vertical: primary_vec, primary_norm = v, norm_v Loading @@ -602,7 +601,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> if abs(blk.angle) < 3: blk.angle = 0 blk.font_size = font_size blk.vertical = blk.src_is_vertical = vertical blk.vec = primary_vec blk.norm = primary_norm if sort: Loading @@ -619,9 +617,9 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distanc cos_vec = vec_prod / blk.norm / blk2.norm # distance = blk2.distance[-1] - blk.distance[-1] # distance_p1 = np.linalg.norm(np.array(blk2.lines[-1][0]) - np.array(blk.lines[-1][0])) minrect1 = blk.min_rect()[0] minrect1 = blk.min_rect(ids=[-1])[0] xyxy1 = [*minrect1[0], *minrect1[2]] minrect2 = blk2.min_rect()[0] minrect2 = blk2.min_rect(ids=[-1])[0] xyxy2 = [*minrect2[0], *minrect2[2]] distance_x = max(xyxy1[0], xyxy2[0]) - min(xyxy1[2], xyxy2[2]) distance_y = max(xyxy1[1], xyxy2[1]) - min(xyxy1[3], xyxy2[3]) Loading @@ -640,9 +638,9 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distanc return False # if distance > distance_tol * fntsz_avg: # return False if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 1.5: if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 0.8: return False if not blk.vertical and distance_y > fntsz_avg * 0.3: if not blk.vertical and distance_y > fntsz_avg * 0.5: return False # merge for line in blk2.lines: Loading Loading @@ -703,7 +701,7 @@ def split_textblk(blk: TextBlock): current_blk.adjust_bbox(with_bbox=False) return textblock_splitted, sub_blk_list def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]: def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True, canvas=None) -> List[TextBlock]: blk_list: List[TextBlock] = [] scattered_lines = {'ver': [], 'hor': []} for bbox, cls, conf in zip(*blks): Loading Loading @@ -733,6 +731,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ if mask_score < mask_score_thresh: continue blk = TextBlock([bx1, by1, bx2, by2], [line]) blk.vertical = blk.src_is_vertical = is_vertical examine_textblk(blk, im_w, im_h, sort=False) if blk.vertical: scattered_lines['ver'].append(blk) Loading Loading @@ -762,9 +761,9 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ textblock_splitted = True elif blk.vertical: textblock_splitted = True if textblock_splitted: textblock_splitted, sub_blk_list = split_textblk(blk) else: # if textblock_splitted: # textblock_splitted, sub_blk_list = split_textblk(blk) # else: sub_blk_list = [blk] # modify textblock to fit its textlines if not textblock_splitted: Loading Loading
modules/textdetector/ctd/inference.py +4 −2 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ from utils.imgproc_utils import letterbox, xyxy2yolo, get_yololabel_strings, squ from ..yolov5.yolov5_utils import non_max_suppression from ..db_utils import SegDetectorRepresenter from utils.textblock import TextBlock, group_output from utils.textblock import TextBlock, group_output, mit_merge_textlines from .textmask import refine_mask, refine_undetected_mask, REFINEMASK_INPAINT, REFINEMASK_ANNOTATION from pathlib import Path from typing import Union, List, Tuple, Callable Loading Loading @@ -344,7 +344,9 @@ class TextDetector: lines = [] else: lines = lines.astype(np.int64) blk_list = group_output(blks, lines, im_w, im_h, mask) blk_list = group_output([], lines, im_w, im_h, mask, canvas=img) # print(lines) # blk_list = mit_merge_textlines(lines, im_w, im_w) mask_refined = refine_mask(img, mask, blk_list, refine_mode=refine_mode) if keep_undetected_mask: mask_refined = refine_undetected_mask(img, mask, mask_refined, blk_list, refine_mode=refine_mode) Loading
utils/textblock.py +16 −17 Original line number Diff line number Diff line Loading @@ -298,16 +298,18 @@ class TextBlock: xyxy = np.array(self.xyxy) return (xyxy[:2] + xyxy[2:]) / 2 def unrotated_polygons(self) -> np.ndarray: def unrotated_polygons(self, ids=None) -> np.ndarray: angled = self.angle != 0 center = self.center() polygons = self.lines_array().reshape(-1, 8) if ids is not None: polygons = polygons[ids] if angled: polygons = rotate_polygons(center, polygons, self.angle) return angled, center, polygons def min_rect(self, rotate_back=True) -> List[int]: angled, center, polygons = self.unrotated_polygons() def min_rect(self, rotate_back=True, ids=None) -> List[int]: angled, center, polygons = self.unrotated_polygons(ids=ids) min_x = polygons[:, ::2].min() min_y = polygons[:, 1::2].min() max_x = polygons[:, ::2].max() Loading Loading @@ -576,10 +578,7 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> v = np.sum(vec_v, axis=0) h = np.sum(vec_h, axis=0) norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h) if blk.language == 'ja': vertical = norm_v > norm_h else: vertical = norm_v > norm_h * 2 vertical = blk.src_is_vertical # calcuate distance between textlines and origin if vertical: primary_vec, primary_norm = v, norm_v Loading @@ -602,7 +601,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> if abs(blk.angle) < 3: blk.angle = 0 blk.font_size = font_size blk.vertical = blk.src_is_vertical = vertical blk.vec = primary_vec blk.norm = primary_norm if sort: Loading @@ -619,9 +617,9 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distanc cos_vec = vec_prod / blk.norm / blk2.norm # distance = blk2.distance[-1] - blk.distance[-1] # distance_p1 = np.linalg.norm(np.array(blk2.lines[-1][0]) - np.array(blk.lines[-1][0])) minrect1 = blk.min_rect()[0] minrect1 = blk.min_rect(ids=[-1])[0] xyxy1 = [*minrect1[0], *minrect1[2]] minrect2 = blk2.min_rect()[0] minrect2 = blk2.min_rect(ids=[-1])[0] xyxy2 = [*minrect2[0], *minrect2[2]] distance_x = max(xyxy1[0], xyxy2[0]) - min(xyxy1[2], xyxy2[2]) distance_y = max(xyxy1[1], xyxy2[1]) - min(xyxy1[3], xyxy2[3]) Loading @@ -640,9 +638,9 @@ def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distanc return False # if distance > distance_tol * fntsz_avg: # return False if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 1.5: if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 0.8: return False if not blk.vertical and distance_y > fntsz_avg * 0.3: if not blk.vertical and distance_y > fntsz_avg * 0.5: return False # merge for line in blk2.lines: Loading Loading @@ -703,7 +701,7 @@ def split_textblk(blk: TextBlock): current_blk.adjust_bbox(with_bbox=False) return textblock_splitted, sub_blk_list def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]: def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True, canvas=None) -> List[TextBlock]: blk_list: List[TextBlock] = [] scattered_lines = {'ver': [], 'hor': []} for bbox, cls, conf in zip(*blks): Loading Loading @@ -733,6 +731,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ if mask_score < mask_score_thresh: continue blk = TextBlock([bx1, by1, bx2, by2], [line]) blk.vertical = blk.src_is_vertical = is_vertical examine_textblk(blk, im_w, im_h, sort=False) if blk.vertical: scattered_lines['ver'].append(blk) Loading Loading @@ -762,9 +761,9 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ textblock_splitted = True elif blk.vertical: textblock_splitted = True if textblock_splitted: textblock_splitted, sub_blk_list = split_textblk(blk) else: # if textblock_splitted: # textblock_splitted, sub_blk_list = split_textblk(blk) # else: sub_blk_list = [blk] # modify textblock to fit its textlines if not textblock_splitted: Loading