Loading ballontranslator/dl/textdetector/textblock.py +18 −15 Original line number Diff line number Diff line Loading @@ -116,16 +116,20 @@ class TextBlock(object): norm_h = np.linalg.norm(middle_pnts[:, 1] - middle_pnts[:, 3]) return norm_v / norm_h def center(self): def center(self) -> np.ndarray: xyxy = np.array(self.xyxy) return (xyxy[:2] + xyxy[2:]) / 2 def min_rect(self, rotate_back=True): def unrotated_polygons(self) -> np.ndarray: angled = self.angle != 0 center = self.center() polygons = self.lines_array().reshape(-1, 8) if angled: polygons = rotate_polygons(center, polygons, self.angle) return angled, center, polygons def min_rect(self, rotate_back=True) -> List[int]: angled, center, polygons = self.unrotated_polygons() min_x = polygons[:, ::2].min() min_y = polygons[:, 1::2].min() max_x = polygons[:, ::2].max() Loading @@ -135,8 +139,17 @@ class TextBlock(object): min_bbox = rotate_polygons(center, min_bbox, -self.angle) return min_bbox.reshape(-1, 4, 2) def normalizd_width_list(self) -> List[float]: angled, center, polygons = self.unrotated_polygons() width_list = [] for polygon in polygons: width_list.append((polygon[[2, 4]] - polygon[[0, 6]]).sum()) width_list = np.array(width_list) width_list = width_list / np.sum(width_list) return width_list.tolist() # equivalent to qt's boundingRect, ignore angle def bounding_rect(self): def bounding_rect(self) -> List[int]: if self._bounding_rect is None: # if True: min_bbox = self.min_rect(rotate_back=False)[0] Loading Loading @@ -166,13 +179,6 @@ class TextBlock(object): direction = 'v' if self.vertical else 'h' src_pts = np.array(self.lines[idx], dtype=np.float64) # if self.language == 'eng' or (self.language == 'unknown' and not self.vertical): # e_size = self.font_size / 3 # src_pts[..., 0] += np.array([-e_size, e_size, e_size, -e_size]) # src_pts[..., 1] += np.array([-e_size, -e_size, e_size, e_size]) # src_pts[..., 0] = np.clip(src_pts[..., 0], 0, im_w) # src_pts[..., 1] = np.clip(src_pts[..., 1], 0, im_h) middle_pnt = (src_pts[[1, 2, 3, 0]] + src_pts) / 2 vec_v = middle_pnt[2] - middle_pnt[0] # vertical vectors of textlines vec_h = middle_pnt[1] - middle_pnt[3] # horizontal vectors of textlines Loading Loading @@ -240,10 +246,7 @@ class TextBlock(object): lines = self.lines_array() if len(lines) == 1: return 1 angled = self.angle != 0 polygons = lines.reshape(-1, 8) if angled: polygons = rotate_polygons((0, 0), polygons, self.angle) angled, center, polygons = self.unrotated_polygons() polygons = polygons.reshape(-1, 4, 2) left_std = np.std(polygons[:, 0, 0]) Loading ballontranslator/ui/constants.py +0 −7 Original line number Diff line number Diff line Loading @@ -21,13 +21,6 @@ CONFIG_COMBOBOX_LONG = 700 LDPI = 96. DPI = 188.75 LANG_SUPPORT_VERTICAL = [ '简体中文', '繁體中文', '日本語', '한국어' ] DEFAULT_FONT_FAMILY = 'Arial' WINDOW_BORDER_WIDTH = 4 Loading ballontranslator/ui/scenetext_manager.py +90 −31 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ from .fontformatpanel import set_textblk_fontsize from .misc import FontFormat, ProgramConfig, pt2px from utils.imgproc_utils import extract_ballon_region from utils.text_processing import seg_text, is_logogram from utils.text_processing import seg_text, is_cjk from utils.text_layout import layout_text class MoveBlkItemsCommand(QUndoCommand): Loading Loading @@ -445,12 +445,20 @@ class SceneTextManager(QObject): fmt = blkitem.get_fontformat() text_size_func = lambda text: get_text_size(QFontMetrics(blk_font), text) src_is_cjk = is_cjk(self.config.dl.translate_source) tgt_is_cjk = is_cjk(self.config.dl.translate_target) if mask is None: bounding_rect = blkitem.absBoundingRect() enlarge_ratio = min(max(bounding_rect[2] / bounding_rect[3], bounding_rect[3] / bounding_rect[2]) * 1.5, 3) if tgt_is_cjk: max_enlarge_ratio = 2.5 else: max_enlarge_ratio = 3 enlarge_ratio = min(max(bounding_rect[2] / bounding_rect[3], bounding_rect[3] / bounding_rect[2]) * 1.5, max_enlarge_ratio) mask, ballon_area, mask_xyxy, region_rect = extract_ballon_region(img, bounding_rect, enlarge_ratio=enlarge_ratio, cal_region_rect=True) else: mask_xyxy = [bounding_rect[0], bounding_rect[1], bounding_rect[0]+bounding_rect[2], bounding_rect[1]+bounding_rect[3]] region_x, region_y, region_w, region_h = region_rect restore_charfmts = False if text is None: Loading @@ -462,55 +470,106 @@ class SceneTextManager(QObject): words, delimiter = seg_text(text, self.config.dl.translate_target) if len(words) == 0: return tgt_is_logoram = is_logogram(self.config.dl.translate_target) src_is_logoram = is_logogram(self.config.dl.translate_source) wl_list = get_words_length_list(QFontMetrics(blk_font), words) w, h = text_size_func(text) line_height = int(round(fmt.line_spacing * h)) text_w, text_h = text_size_func(text) text_area = text_w * text_h line_height = int(round(fmt.line_spacing * text_h)) delimiter_len = text_size_func(delimiter)[0] adaptive_fntsize = False if self.auto_textlayout_flag and self.config.let_fntsize_flag == 0: if not tgt_is_cjk: adaptive_fntsize = True resize_ratio = 1 if adaptive_fntsize: area_ratio = ballon_area / (w * h) area_ratio = ballon_area / text_area ballon_area_thresh = 1.7 downscale_constraint = 0.6 # downscale the font size if textarea exceeds the balloon_area / ballon_area_thresh # or the longest word exceeds the region_width resize_ratio = np.clip(min(area_ratio / ballon_area_thresh, max(wl_list) / region_rect[2], blkitem.blk.font_size / line_height), downscale_constraint, 1.0) if resize_ratio < 1: max_central_width = np.inf if tgt_is_cjk: if blkitem.blk.text: _, _, brw, brh = blkitem.blk.bounding_rect() br_area = brw * brh if src_is_cjk: resize_ratio = np.sqrt(region_h * region_w / br_area) else: resize_ratio = np.clip(max(np.sqrt(br_area / text_area) * 0.8, np.sqrt(ballon_area / text_area ) * 0.7), 1, 1.1) if len(blkitem.blk) > 1: normalized_width_list = blkitem.blk.normalizd_width_list() max_central_width = max(normalized_width_list) else: resize_ratio = 1.1 if resize_ratio != 1: new_font_size = blk_font.pointSizeF() * resize_ratio blk_font.setPointSizeF(new_font_size) wl_list = (np.array(wl_list, np.float64) * resize_ratio).astype(np.int32).tolist() line_height = int(line_height * resize_ratio) text_w = int(text_w * resize_ratio) delimiter_len = int(delimiter_len * resize_ratio) spacing = 0 if tgt_is_logoram: spacing = line_height if max_central_width != np.inf: max_central_width = int(max_central_width * text_w) padding = pt2px(blk_font.pointSize()) + 20 # dummpy padding variable new_text, xywh = layout_text(mask, mask_xyxy, region_rect, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, spacing, padding) if fmt.alignment == 1: if len(blkitem.blk) > 0: centroid = blkitem.blk.center().astype(np.int64).tolist() centroid[0] -= mask_xyxy[0] centroid[1] -= mask_xyxy[1] else: centroid = [bounding_rect[2] // 2, bounding_rect[3] // 2] else: max_central_width = np.inf centroid = [0, 0] abs_centroid = [bounding_rect[0], bounding_rect[1]] if len(blkitem.blk) > 0: blkitem.blk.lines[0] abs_centroid = blkitem.blk.lines[0][0] centroid[0] = int(abs_centroid[0] - mask_xyxy[0]) centroid[1] = int(abs_centroid[1] - mask_xyxy[1]) new_text, xywh = layout_text(mask, mask_xyxy, centroid, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, 0, padding, max_central_width) # font size post adjustment post_resize_ratio = 1 if adaptive_fntsize: downscale_constraint = 0.5 w = xywh[2] - padding * 2 post_resize_ratio = max(region_rect[2] / w, downscale_constraint) if post_resize_ratio < 1: post_resize_ratio = np.clip(max(region_rect[2] / w, downscale_constraint), 0, 1) resize_ratio *= post_resize_ratio if tgt_is_cjk: resize_ratio = 1 post_resize_ratio = 1 / resize_ratio if post_resize_ratio != 1: cx, cy = xywh[0] + xywh[2] / 2, xywh[1] + xywh[3] / 2 w, h = xywh[2] * post_resize_ratio, xywh[3] * post_resize_ratio xywh = [int(cx - w / 2), int(cy - h / 2), int(w), int(h)] if resize_ratio < 1: if resize_ratio != 1: new_font_size = blkitem.font().pointSizeF() * resize_ratio blkitem.textCursor().clearSelection() set_textblk_fontsize(blkitem, new_font_size) scale = blkitem.scale() if scale != 1: xywh = (np.array(xywh, np.float64) * blkitem.scale()).astype(np.int32).tolist() if scale != 1 and not fmt.alignment == 0: xywh = (np.array(xywh, np.float64) * scale).astype(np.int32).tolist() if fmt.alignment == 0: x_shift = (scale - 1) * xywh[2] // 2 + xywh[0] * scale y_shift = (scale - 1) * xywh[3] // 2 + xywh[1] * scale xywh[0] = int(abs_centroid[0] * scale) + x_shift xywh[1] = int(abs_centroid[1] * scale) + y_shift if restore_charfmts: char_fmts = blkitem.get_char_fmts() Loading ballontranslator/utils/text_layout.py +89 −15 Original line number Diff line number Diff line Loading @@ -40,22 +40,24 @@ class Line: self.pos_x += self.spacing self.spacing = 0 def layout_lines_with_mask( def layout_lines_aligncenter( mask: np.ndarray, words: List[str], region_rect: List[int], centroid: List[int], wl_list: List[int], delimiter_len: int, line_height: int, spacing: int = 0, alignment: int = 0, vertical: bool = False, delimiter: str = ' ', max_central_width: float = np.inf, word_break: bool = False)->List[Line]: region_x, region_y, region_w, region_h = region_rect centroid_x = region_x + region_w // 2 centroid_y = region_y + region_h // 2 centroid_x, centroid_y = centroid # rbgmsk = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) # cv2.circle(rbgmsk, (centroid_x, centroid_y), 10, (255, 0, 0)) # cv2.imshow('mask', rbgmsk) # cv2.waitKey(0) # m = cv2.moments(mask) mask = 255 - mask Loading Loading @@ -125,6 +127,9 @@ def layout_lines_with_mask( central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter) sum_right -= len_right.pop(0) central_line.pos_x = new_x_r if central_line.length > max_central_width: break central_line.strip_spacing() lines = [central_line] Loading Loading @@ -196,10 +201,53 @@ def layout_lines_with_mask( return lines def layout_lines_alignleft( mask: np.ndarray, words: List[str], origin: List[int], wl_list: List[int], delimiter_len: int, line_height: int, spacing: int = 0, delimiter: str = ' ', word_break: bool = False)->List[Line]: ox, oy = origin bh, bw = mask.shape[:2] mask = 255 - mask num_words = len(words) lines = [] if num_words > 0: sum_right = np.array(wl_list).sum() w, wl = words.pop(0), wl_list.pop(0) line = Line(w, ox, oy, wl) lines.append(line) sum_right -= wl line_bottom = oy + line_height pos_y = oy while sum_right > 0: w, wl = words.pop(0), wl_list.pop(0) sum_right -= wl new_len = line.length + wl + delimiter_len new_r = ox + new_len line_valid = False if new_r < bw: if mask[pos_y: line_bottom, new_r].sum()==0: line_valid = True if line_valid: line.append_right(w, wl+delimiter_len, delimiter) else: pos_y = line_bottom line_bottom += line_height line = Line(w, ox, pos_y, wl) lines.append(line) return lines def layout_text( mask: np.ndarray, mask_xyxy: List, region_rect: List, centroid: List, words: List[str], wl_list: List[int], delimiter: str, Loading @@ -209,20 +257,41 @@ def layout_text( alignment: int, vertical: bool, spacing: int = 0, padding: float = 0) -> Tuple[str, List]: padding: float = 0, max_central_width=np.inf) -> Tuple[str, List]: num_words = len(words) if num_words == 0: return [] centroid_x, centroid_y = centroid center_x = mask_xyxy[0] + centroid_x center_y = mask_xyxy[1] + centroid_y shifted_x, shifted_y = 0, 0 if abs(angle) > 0: old_h, old_w = mask.shape[:2] old_origin = (old_w // 2, old_h // 2) rel_cx, rel_cy = centroid[0] - old_origin[0], centroid[1] - old_origin[1] mask = rotate_image(mask, angle) rad = np.deg2rad(angle) r_sin, r_cos = np.sin(rad), np.cos(rad) new_rel_cy = -rel_cx * r_sin + rel_cy * r_cos new_rel_cx = rel_cy * r_sin + rel_cx * r_cos lines = layout_lines_with_mask(mask, words, region_rect, wl_list, delimiter_len, line_height, spacing, alignment, vertical, delimiter) shifted_x, shifted_y = new_rel_cx - rel_cx, new_rel_cy - rel_cy new_h, new_w = mask.shape[:2] new_origin = (new_w // 2, new_h // 2) new_cx, new_cy = new_origin[0] + new_rel_cx, new_origin[1] + new_rel_cy centroid = [int(new_cx), int(new_cy)] if alignment == 1: lines = layout_lines_aligncenter(mask, words, centroid, wl_list, delimiter_len, line_height, spacing, delimiter, max_central_width) else: lines = layout_lines_alignleft(mask, words, centroid, wl_list, delimiter_len, line_height, spacing, delimiter) region_x, region_y, region_w, region_h = region_rect center_x = mask_xyxy[0] + region_x + region_w // 2 center_y = mask_xyxy[1] + region_y + region_h // 2 concated_text = [] pos_x_lst, pos_right_lst = [], [] Loading @@ -239,7 +308,12 @@ def layout_text( canvas_h = int(canvas_b - canvas_t) canvas_w = int(canvas_r - canvas_l) if alignment == 1: abs_x = int(round(center_x - canvas_w / 2)) abs_y = int(round(center_y - canvas_h / 2)) else: abs_x = shifted_x abs_y = shifted_y return concated_text, [abs_x, abs_y, canvas_w, canvas_h] No newline at end of file ballontranslator/utils/text_processing.py +8 −9 Original line number Diff line number Diff line Loading @@ -6,8 +6,9 @@ WIDE_MAP = {i: i + 0xFEE0 for i in range(0x21, 0x7F)} WIDE_MAP[0x20] = 0x3000 FULL2HALF = dict((i + 0xFEE0, i) for i in range(0x21, 0x7F)) FULL2HALF[0x3000] = 0x20 FULL2HALF[0x3002] = 0x2E LOGORAMS = {'简体中文', '繁体中文', '日本語', '한국어'} LANGSET_CJK = {'简体中文', '繁体中文', '日本語', '한국어'} LANGSET_CH = {'简体中文', '繁体中文'} PUNSET_RIGHT_ENG = {'.', '?', '!', ':', ';', ')', '}', '\'', "\""} Loading Loading @@ -124,19 +125,19 @@ def _seg_ch_pkg(text: str) -> List[str]: word_next, tag_next = segments[ii + 1] len_next = len(word_next) next_valid = True if tag_next != 'w': if tag_next != 'w' and word_next != '.': # somehow pkgseg take '.' as 'n' score_next = PKUSEGSCORES[tag][tag_next] if ii > 0: word_prev, tag_prev = words[-1], segments[ii - 1][1] len_prev = len(word_prev) prev_valid = True if tag_prev != 'w': if tag_prev != 'w' and word_prev[-1] != '.': score_prev = PKUSEGSCORES[tag_prev][tag] append_prev, append_next = False, False if tag == 'w': # puntuation if tag == 'w' or word == '.': # puntuation if word in PUNCTUATION_L: append_next = next_valid elif len_word <= 1: Loading Loading @@ -209,17 +210,15 @@ def seg_text(text: str, lang: str) -> Tuple[List, str]: delimiter = '' if lang in LANGSET_CH: words = seg_ch_pkg(text) elif lang in LOGORAMS: elif lang in LANGSET_CJK: words = seg_to_chars(text) else: words = seg_eng(text) delimiter = ' ' return words, delimiter def is_logogram(lang: str) -> bool: return lang in LOGORAMS def is_cjk(lang: str) -> bool: return lang in LANGSET_CJK Loading
ballontranslator/dl/textdetector/textblock.py +18 −15 Original line number Diff line number Diff line Loading @@ -116,16 +116,20 @@ class TextBlock(object): norm_h = np.linalg.norm(middle_pnts[:, 1] - middle_pnts[:, 3]) return norm_v / norm_h def center(self): def center(self) -> np.ndarray: xyxy = np.array(self.xyxy) return (xyxy[:2] + xyxy[2:]) / 2 def min_rect(self, rotate_back=True): def unrotated_polygons(self) -> np.ndarray: angled = self.angle != 0 center = self.center() polygons = self.lines_array().reshape(-1, 8) if angled: polygons = rotate_polygons(center, polygons, self.angle) return angled, center, polygons def min_rect(self, rotate_back=True) -> List[int]: angled, center, polygons = self.unrotated_polygons() min_x = polygons[:, ::2].min() min_y = polygons[:, 1::2].min() max_x = polygons[:, ::2].max() Loading @@ -135,8 +139,17 @@ class TextBlock(object): min_bbox = rotate_polygons(center, min_bbox, -self.angle) return min_bbox.reshape(-1, 4, 2) def normalizd_width_list(self) -> List[float]: angled, center, polygons = self.unrotated_polygons() width_list = [] for polygon in polygons: width_list.append((polygon[[2, 4]] - polygon[[0, 6]]).sum()) width_list = np.array(width_list) width_list = width_list / np.sum(width_list) return width_list.tolist() # equivalent to qt's boundingRect, ignore angle def bounding_rect(self): def bounding_rect(self) -> List[int]: if self._bounding_rect is None: # if True: min_bbox = self.min_rect(rotate_back=False)[0] Loading Loading @@ -166,13 +179,6 @@ class TextBlock(object): direction = 'v' if self.vertical else 'h' src_pts = np.array(self.lines[idx], dtype=np.float64) # if self.language == 'eng' or (self.language == 'unknown' and not self.vertical): # e_size = self.font_size / 3 # src_pts[..., 0] += np.array([-e_size, e_size, e_size, -e_size]) # src_pts[..., 1] += np.array([-e_size, -e_size, e_size, e_size]) # src_pts[..., 0] = np.clip(src_pts[..., 0], 0, im_w) # src_pts[..., 1] = np.clip(src_pts[..., 1], 0, im_h) middle_pnt = (src_pts[[1, 2, 3, 0]] + src_pts) / 2 vec_v = middle_pnt[2] - middle_pnt[0] # vertical vectors of textlines vec_h = middle_pnt[1] - middle_pnt[3] # horizontal vectors of textlines Loading Loading @@ -240,10 +246,7 @@ class TextBlock(object): lines = self.lines_array() if len(lines) == 1: return 1 angled = self.angle != 0 polygons = lines.reshape(-1, 8) if angled: polygons = rotate_polygons((0, 0), polygons, self.angle) angled, center, polygons = self.unrotated_polygons() polygons = polygons.reshape(-1, 4, 2) left_std = np.std(polygons[:, 0, 0]) Loading
ballontranslator/ui/constants.py +0 −7 Original line number Diff line number Diff line Loading @@ -21,13 +21,6 @@ CONFIG_COMBOBOX_LONG = 700 LDPI = 96. DPI = 188.75 LANG_SUPPORT_VERTICAL = [ '简体中文', '繁體中文', '日本語', '한국어' ] DEFAULT_FONT_FAMILY = 'Arial' WINDOW_BORDER_WIDTH = 4 Loading
ballontranslator/ui/scenetext_manager.py +90 −31 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ from .fontformatpanel import set_textblk_fontsize from .misc import FontFormat, ProgramConfig, pt2px from utils.imgproc_utils import extract_ballon_region from utils.text_processing import seg_text, is_logogram from utils.text_processing import seg_text, is_cjk from utils.text_layout import layout_text class MoveBlkItemsCommand(QUndoCommand): Loading Loading @@ -445,12 +445,20 @@ class SceneTextManager(QObject): fmt = blkitem.get_fontformat() text_size_func = lambda text: get_text_size(QFontMetrics(blk_font), text) src_is_cjk = is_cjk(self.config.dl.translate_source) tgt_is_cjk = is_cjk(self.config.dl.translate_target) if mask is None: bounding_rect = blkitem.absBoundingRect() enlarge_ratio = min(max(bounding_rect[2] / bounding_rect[3], bounding_rect[3] / bounding_rect[2]) * 1.5, 3) if tgt_is_cjk: max_enlarge_ratio = 2.5 else: max_enlarge_ratio = 3 enlarge_ratio = min(max(bounding_rect[2] / bounding_rect[3], bounding_rect[3] / bounding_rect[2]) * 1.5, max_enlarge_ratio) mask, ballon_area, mask_xyxy, region_rect = extract_ballon_region(img, bounding_rect, enlarge_ratio=enlarge_ratio, cal_region_rect=True) else: mask_xyxy = [bounding_rect[0], bounding_rect[1], bounding_rect[0]+bounding_rect[2], bounding_rect[1]+bounding_rect[3]] region_x, region_y, region_w, region_h = region_rect restore_charfmts = False if text is None: Loading @@ -462,55 +470,106 @@ class SceneTextManager(QObject): words, delimiter = seg_text(text, self.config.dl.translate_target) if len(words) == 0: return tgt_is_logoram = is_logogram(self.config.dl.translate_target) src_is_logoram = is_logogram(self.config.dl.translate_source) wl_list = get_words_length_list(QFontMetrics(blk_font), words) w, h = text_size_func(text) line_height = int(round(fmt.line_spacing * h)) text_w, text_h = text_size_func(text) text_area = text_w * text_h line_height = int(round(fmt.line_spacing * text_h)) delimiter_len = text_size_func(delimiter)[0] adaptive_fntsize = False if self.auto_textlayout_flag and self.config.let_fntsize_flag == 0: if not tgt_is_cjk: adaptive_fntsize = True resize_ratio = 1 if adaptive_fntsize: area_ratio = ballon_area / (w * h) area_ratio = ballon_area / text_area ballon_area_thresh = 1.7 downscale_constraint = 0.6 # downscale the font size if textarea exceeds the balloon_area / ballon_area_thresh # or the longest word exceeds the region_width resize_ratio = np.clip(min(area_ratio / ballon_area_thresh, max(wl_list) / region_rect[2], blkitem.blk.font_size / line_height), downscale_constraint, 1.0) if resize_ratio < 1: max_central_width = np.inf if tgt_is_cjk: if blkitem.blk.text: _, _, brw, brh = blkitem.blk.bounding_rect() br_area = brw * brh if src_is_cjk: resize_ratio = np.sqrt(region_h * region_w / br_area) else: resize_ratio = np.clip(max(np.sqrt(br_area / text_area) * 0.8, np.sqrt(ballon_area / text_area ) * 0.7), 1, 1.1) if len(blkitem.blk) > 1: normalized_width_list = blkitem.blk.normalizd_width_list() max_central_width = max(normalized_width_list) else: resize_ratio = 1.1 if resize_ratio != 1: new_font_size = blk_font.pointSizeF() * resize_ratio blk_font.setPointSizeF(new_font_size) wl_list = (np.array(wl_list, np.float64) * resize_ratio).astype(np.int32).tolist() line_height = int(line_height * resize_ratio) text_w = int(text_w * resize_ratio) delimiter_len = int(delimiter_len * resize_ratio) spacing = 0 if tgt_is_logoram: spacing = line_height if max_central_width != np.inf: max_central_width = int(max_central_width * text_w) padding = pt2px(blk_font.pointSize()) + 20 # dummpy padding variable new_text, xywh = layout_text(mask, mask_xyxy, region_rect, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, spacing, padding) if fmt.alignment == 1: if len(blkitem.blk) > 0: centroid = blkitem.blk.center().astype(np.int64).tolist() centroid[0] -= mask_xyxy[0] centroid[1] -= mask_xyxy[1] else: centroid = [bounding_rect[2] // 2, bounding_rect[3] // 2] else: max_central_width = np.inf centroid = [0, 0] abs_centroid = [bounding_rect[0], bounding_rect[1]] if len(blkitem.blk) > 0: blkitem.blk.lines[0] abs_centroid = blkitem.blk.lines[0][0] centroid[0] = int(abs_centroid[0] - mask_xyxy[0]) centroid[1] = int(abs_centroid[1] - mask_xyxy[1]) new_text, xywh = layout_text(mask, mask_xyxy, centroid, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, 0, padding, max_central_width) # font size post adjustment post_resize_ratio = 1 if adaptive_fntsize: downscale_constraint = 0.5 w = xywh[2] - padding * 2 post_resize_ratio = max(region_rect[2] / w, downscale_constraint) if post_resize_ratio < 1: post_resize_ratio = np.clip(max(region_rect[2] / w, downscale_constraint), 0, 1) resize_ratio *= post_resize_ratio if tgt_is_cjk: resize_ratio = 1 post_resize_ratio = 1 / resize_ratio if post_resize_ratio != 1: cx, cy = xywh[0] + xywh[2] / 2, xywh[1] + xywh[3] / 2 w, h = xywh[2] * post_resize_ratio, xywh[3] * post_resize_ratio xywh = [int(cx - w / 2), int(cy - h / 2), int(w), int(h)] if resize_ratio < 1: if resize_ratio != 1: new_font_size = blkitem.font().pointSizeF() * resize_ratio blkitem.textCursor().clearSelection() set_textblk_fontsize(blkitem, new_font_size) scale = blkitem.scale() if scale != 1: xywh = (np.array(xywh, np.float64) * blkitem.scale()).astype(np.int32).tolist() if scale != 1 and not fmt.alignment == 0: xywh = (np.array(xywh, np.float64) * scale).astype(np.int32).tolist() if fmt.alignment == 0: x_shift = (scale - 1) * xywh[2] // 2 + xywh[0] * scale y_shift = (scale - 1) * xywh[3] // 2 + xywh[1] * scale xywh[0] = int(abs_centroid[0] * scale) + x_shift xywh[1] = int(abs_centroid[1] * scale) + y_shift if restore_charfmts: char_fmts = blkitem.get_char_fmts() Loading
ballontranslator/utils/text_layout.py +89 −15 Original line number Diff line number Diff line Loading @@ -40,22 +40,24 @@ class Line: self.pos_x += self.spacing self.spacing = 0 def layout_lines_with_mask( def layout_lines_aligncenter( mask: np.ndarray, words: List[str], region_rect: List[int], centroid: List[int], wl_list: List[int], delimiter_len: int, line_height: int, spacing: int = 0, alignment: int = 0, vertical: bool = False, delimiter: str = ' ', max_central_width: float = np.inf, word_break: bool = False)->List[Line]: region_x, region_y, region_w, region_h = region_rect centroid_x = region_x + region_w // 2 centroid_y = region_y + region_h // 2 centroid_x, centroid_y = centroid # rbgmsk = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) # cv2.circle(rbgmsk, (centroid_x, centroid_y), 10, (255, 0, 0)) # cv2.imshow('mask', rbgmsk) # cv2.waitKey(0) # m = cv2.moments(mask) mask = 255 - mask Loading Loading @@ -125,6 +127,9 @@ def layout_lines_with_mask( central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter) sum_right -= len_right.pop(0) central_line.pos_x = new_x_r if central_line.length > max_central_width: break central_line.strip_spacing() lines = [central_line] Loading Loading @@ -196,10 +201,53 @@ def layout_lines_with_mask( return lines def layout_lines_alignleft( mask: np.ndarray, words: List[str], origin: List[int], wl_list: List[int], delimiter_len: int, line_height: int, spacing: int = 0, delimiter: str = ' ', word_break: bool = False)->List[Line]: ox, oy = origin bh, bw = mask.shape[:2] mask = 255 - mask num_words = len(words) lines = [] if num_words > 0: sum_right = np.array(wl_list).sum() w, wl = words.pop(0), wl_list.pop(0) line = Line(w, ox, oy, wl) lines.append(line) sum_right -= wl line_bottom = oy + line_height pos_y = oy while sum_right > 0: w, wl = words.pop(0), wl_list.pop(0) sum_right -= wl new_len = line.length + wl + delimiter_len new_r = ox + new_len line_valid = False if new_r < bw: if mask[pos_y: line_bottom, new_r].sum()==0: line_valid = True if line_valid: line.append_right(w, wl+delimiter_len, delimiter) else: pos_y = line_bottom line_bottom += line_height line = Line(w, ox, pos_y, wl) lines.append(line) return lines def layout_text( mask: np.ndarray, mask_xyxy: List, region_rect: List, centroid: List, words: List[str], wl_list: List[int], delimiter: str, Loading @@ -209,20 +257,41 @@ def layout_text( alignment: int, vertical: bool, spacing: int = 0, padding: float = 0) -> Tuple[str, List]: padding: float = 0, max_central_width=np.inf) -> Tuple[str, List]: num_words = len(words) if num_words == 0: return [] centroid_x, centroid_y = centroid center_x = mask_xyxy[0] + centroid_x center_y = mask_xyxy[1] + centroid_y shifted_x, shifted_y = 0, 0 if abs(angle) > 0: old_h, old_w = mask.shape[:2] old_origin = (old_w // 2, old_h // 2) rel_cx, rel_cy = centroid[0] - old_origin[0], centroid[1] - old_origin[1] mask = rotate_image(mask, angle) rad = np.deg2rad(angle) r_sin, r_cos = np.sin(rad), np.cos(rad) new_rel_cy = -rel_cx * r_sin + rel_cy * r_cos new_rel_cx = rel_cy * r_sin + rel_cx * r_cos lines = layout_lines_with_mask(mask, words, region_rect, wl_list, delimiter_len, line_height, spacing, alignment, vertical, delimiter) shifted_x, shifted_y = new_rel_cx - rel_cx, new_rel_cy - rel_cy new_h, new_w = mask.shape[:2] new_origin = (new_w // 2, new_h // 2) new_cx, new_cy = new_origin[0] + new_rel_cx, new_origin[1] + new_rel_cy centroid = [int(new_cx), int(new_cy)] if alignment == 1: lines = layout_lines_aligncenter(mask, words, centroid, wl_list, delimiter_len, line_height, spacing, delimiter, max_central_width) else: lines = layout_lines_alignleft(mask, words, centroid, wl_list, delimiter_len, line_height, spacing, delimiter) region_x, region_y, region_w, region_h = region_rect center_x = mask_xyxy[0] + region_x + region_w // 2 center_y = mask_xyxy[1] + region_y + region_h // 2 concated_text = [] pos_x_lst, pos_right_lst = [], [] Loading @@ -239,7 +308,12 @@ def layout_text( canvas_h = int(canvas_b - canvas_t) canvas_w = int(canvas_r - canvas_l) if alignment == 1: abs_x = int(round(center_x - canvas_w / 2)) abs_y = int(round(center_y - canvas_h / 2)) else: abs_x = shifted_x abs_y = shifted_y return concated_text, [abs_x, abs_y, canvas_w, canvas_h] No newline at end of file
ballontranslator/utils/text_processing.py +8 −9 Original line number Diff line number Diff line Loading @@ -6,8 +6,9 @@ WIDE_MAP = {i: i + 0xFEE0 for i in range(0x21, 0x7F)} WIDE_MAP[0x20] = 0x3000 FULL2HALF = dict((i + 0xFEE0, i) for i in range(0x21, 0x7F)) FULL2HALF[0x3000] = 0x20 FULL2HALF[0x3002] = 0x2E LOGORAMS = {'简体中文', '繁体中文', '日本語', '한국어'} LANGSET_CJK = {'简体中文', '繁体中文', '日本語', '한국어'} LANGSET_CH = {'简体中文', '繁体中文'} PUNSET_RIGHT_ENG = {'.', '?', '!', ':', ';', ')', '}', '\'', "\""} Loading Loading @@ -124,19 +125,19 @@ def _seg_ch_pkg(text: str) -> List[str]: word_next, tag_next = segments[ii + 1] len_next = len(word_next) next_valid = True if tag_next != 'w': if tag_next != 'w' and word_next != '.': # somehow pkgseg take '.' as 'n' score_next = PKUSEGSCORES[tag][tag_next] if ii > 0: word_prev, tag_prev = words[-1], segments[ii - 1][1] len_prev = len(word_prev) prev_valid = True if tag_prev != 'w': if tag_prev != 'w' and word_prev[-1] != '.': score_prev = PKUSEGSCORES[tag_prev][tag] append_prev, append_next = False, False if tag == 'w': # puntuation if tag == 'w' or word == '.': # puntuation if word in PUNCTUATION_L: append_next = next_valid elif len_word <= 1: Loading Loading @@ -209,17 +210,15 @@ def seg_text(text: str, lang: str) -> Tuple[List, str]: delimiter = '' if lang in LANGSET_CH: words = seg_ch_pkg(text) elif lang in LOGORAMS: elif lang in LANGSET_CJK: words = seg_to_chars(text) else: words = seg_eng(text) delimiter = ' ' return words, delimiter def is_logogram(lang: str) -> bool: return lang in LOGORAMS def is_cjk(lang: str) -> bool: return lang in LANGSET_CJK