Loading ballontranslator/dl/textdetector/ctd/inference.py +1 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ def model2annotations(model_path, img_dir_list, save_dir, save_json=False): for blk in blk_list: polys += blk.lines blk_xyxy.append(blk.xyxy) blk_dict_list.append(blk.to_dict(extra_info=True)) blk_dict_list.append(blk.to_dict()) blk_xyxy = xyxy2yolo(blk_xyxy, im_w, im_h) if blk_xyxy is not None: cls_list = [1] * len(blk_xyxy) Loading ballontranslator/dl/textdetector/textblock.py +20 −15 Original line number Diff line number Diff line Loading @@ -181,12 +181,11 @@ class TextBlock(object): def __getitem__(self, idx): return self.lines[idx] def to_dict(self, extra_info=False): def to_dict(self): blk_dict = copy.deepcopy(vars(self)) return blk_dict def get_transformed_region(self, img, idx, textheight, maxwidth=None) -> np.ndarray : im_h, im_w = img.shape[:2] def get_transformed_region(self, img: np.ndarray, idx: int, textheight: int, maxwidth: int = None) -> np.ndarray : direction = 'v' if self.vertical else 'h' src_pts = np.array(self.lines[idx], dtype=np.float64) Loading Loading @@ -322,7 +321,7 @@ def sort_textblk_list(blk_list: List[TextBlock], im_w: int, im_h: int) -> List[T blk_list.sort(key=lambda blk: blk.sort_weight) return blk_list def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool, sort: bool = False) -> None: def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> None: lines = blk.lines_array() middle_pnts = (lines[:, [1, 2, 3, 0]] + lines) / 2 vec_v = middle_pnts[:, 2] - middle_pnts[:, 0] # vertical vectors of textlines Loading @@ -332,7 +331,10 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool v = np.sum(vec_v, axis=0) h = np.sum(vec_h, axis=0) norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h) vertical = eval_orientation and norm_v > norm_h if blk.language == 'ja': vertical = norm_v > norm_h else: vertical = norm_v > norm_h * 2 # calcuate distance between textlines and origin if vertical: primary_vec, primary_norm = v, norm_v Loading @@ -355,7 +357,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool if abs(blk.angle) < 3: blk.angle = 0 blk.font_size = font_size if eval_orientation: blk.vertical = vertical blk.vec = primary_vec blk.norm = primary_norm Loading Loading @@ -440,8 +441,10 @@ def split_textblk(blk: TextBlock): return textblock_splitted, sub_blk_list def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]: blk_list, scattered_lines = [], {'ver': [], 'hor': []} blk_list: List[TextBlock] = [] scattered_lines = {'ver': [], 'hor': []} for bbox, cls, conf in zip(*blks): # cls could give wrong result blk_list.append(TextBlock(bbox, language=LANG_LIST[cls])) # step1: filter & assign lines to textblocks Loading Loading @@ -473,7 +476,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ # step2: filter textblocks, sort & split textlines final_blk_list = [] for ii, blk in enumerate(blk_list): for blk in blk_list: # filter textblocks if len(blk.lines) == 0: bx1, by1, bx2, by2 = blk.xyxy Loading @@ -483,11 +486,15 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ continue xywh = np.array([[bx1, by1, bx2-bx1, by2-by1]]) blk.lines = xywh2xyxypoly(xywh).reshape(-1, 4, 2).tolist() eval_orientation = blk.language != 'eng' examine_textblk(blk, im_w, im_h, eval_orientation, sort=True) examine_textblk(blk, im_w, im_h, sort=True) # split manga text if there is a distance gap textblock_splitted = blk.language == 'ja' and len(blk.lines) > 1 textblock_splitted = False if len(blk.lines) > 1: if blk.language == 'ja': textblock_splitted = True elif blk.vertical: textblock_splitted = True if textblock_splitted: textblock_splitted, sub_blk_list = split_textblk(blk) else: Loading @@ -505,13 +512,11 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ final_blk_list = sort_textblk_list(final_blk_list, im_w, im_h) for blk in final_blk_list: if blk.language == 'eng': blk.vertical = False if blk.language == 'eng' and not blk.vertical: num_lines = len(blk.lines) if num_lines == 0: continue # blk.line_spacing = blk.bounding_rect()[3] / num_lines / blk.font_size resize_ratio = 1.1 expand_size = max(int(blk.font_size * 0.1), 2) rad = np.deg2rad(blk.angle) shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]]) Loading Loading
ballontranslator/dl/textdetector/ctd/inference.py +1 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ def model2annotations(model_path, img_dir_list, save_dir, save_json=False): for blk in blk_list: polys += blk.lines blk_xyxy.append(blk.xyxy) blk_dict_list.append(blk.to_dict(extra_info=True)) blk_dict_list.append(blk.to_dict()) blk_xyxy = xyxy2yolo(blk_xyxy, im_w, im_h) if blk_xyxy is not None: cls_list = [1] * len(blk_xyxy) Loading
ballontranslator/dl/textdetector/textblock.py +20 −15 Original line number Diff line number Diff line Loading @@ -181,12 +181,11 @@ class TextBlock(object): def __getitem__(self, idx): return self.lines[idx] def to_dict(self, extra_info=False): def to_dict(self): blk_dict = copy.deepcopy(vars(self)) return blk_dict def get_transformed_region(self, img, idx, textheight, maxwidth=None) -> np.ndarray : im_h, im_w = img.shape[:2] def get_transformed_region(self, img: np.ndarray, idx: int, textheight: int, maxwidth: int = None) -> np.ndarray : direction = 'v' if self.vertical else 'h' src_pts = np.array(self.lines[idx], dtype=np.float64) Loading Loading @@ -322,7 +321,7 @@ def sort_textblk_list(blk_list: List[TextBlock], im_w: int, im_h: int) -> List[T blk_list.sort(key=lambda blk: blk.sort_weight) return blk_list def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool, sort: bool = False) -> None: def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> None: lines = blk.lines_array() middle_pnts = (lines[:, [1, 2, 3, 0]] + lines) / 2 vec_v = middle_pnts[:, 2] - middle_pnts[:, 0] # vertical vectors of textlines Loading @@ -332,7 +331,10 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool v = np.sum(vec_v, axis=0) h = np.sum(vec_h, axis=0) norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h) vertical = eval_orientation and norm_v > norm_h if blk.language == 'ja': vertical = norm_v > norm_h else: vertical = norm_v > norm_h * 2 # calcuate distance between textlines and origin if vertical: primary_vec, primary_norm = v, norm_v Loading @@ -355,7 +357,6 @@ def examine_textblk(blk: TextBlock, im_w: int, im_h: int, eval_orientation: bool if abs(blk.angle) < 3: blk.angle = 0 blk.font_size = font_size if eval_orientation: blk.vertical = vertical blk.vec = primary_vec blk.norm = primary_norm Loading Loading @@ -440,8 +441,10 @@ def split_textblk(blk: TextBlock): return textblock_splitted, sub_blk_list def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[TextBlock]: blk_list, scattered_lines = [], {'ver': [], 'hor': []} blk_list: List[TextBlock] = [] scattered_lines = {'ver': [], 'hor': []} for bbox, cls, conf in zip(*blks): # cls could give wrong result blk_list.append(TextBlock(bbox, language=LANG_LIST[cls])) # step1: filter & assign lines to textblocks Loading Loading @@ -473,7 +476,7 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ # step2: filter textblocks, sort & split textlines final_blk_list = [] for ii, blk in enumerate(blk_list): for blk in blk_list: # filter textblocks if len(blk.lines) == 0: bx1, by1, bx2, by2 = blk.xyxy Loading @@ -483,11 +486,15 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ continue xywh = np.array([[bx1, by1, bx2-bx1, by2-by1]]) blk.lines = xywh2xyxypoly(xywh).reshape(-1, 4, 2).tolist() eval_orientation = blk.language != 'eng' examine_textblk(blk, im_w, im_h, eval_orientation, sort=True) examine_textblk(blk, im_w, im_h, sort=True) # split manga text if there is a distance gap textblock_splitted = blk.language == 'ja' and len(blk.lines) > 1 textblock_splitted = False if len(blk.lines) > 1: if blk.language == 'ja': textblock_splitted = True elif blk.vertical: textblock_splitted = True if textblock_splitted: textblock_splitted, sub_blk_list = split_textblk(blk) else: Loading @@ -505,13 +512,11 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ final_blk_list = sort_textblk_list(final_blk_list, im_w, im_h) for blk in final_blk_list: if blk.language == 'eng': blk.vertical = False if blk.language == 'eng' and not blk.vertical: num_lines = len(blk.lines) if num_lines == 0: continue # blk.line_spacing = blk.bounding_rect()[3] / num_lines / blk.font_size resize_ratio = 1.1 expand_size = max(int(blk.font_size * 0.1), 2) rad = np.deg2rad(blk.angle) shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]]) Loading