Loading ballontranslator/dl/textdetector/textblock.py +24 −7 Original line number Diff line number Diff line Loading @@ -166,12 +166,12 @@ class TextBlock(object): direction = 'v' if self.vertical else 'h' src_pts = np.array(self.lines[idx], dtype=np.float64) if self.language == 'eng' or (self.language == 'unknown' and not self.vertical): e_size = self.font_size / 3 src_pts[..., 0] += np.array([-e_size, e_size, e_size, -e_size]) src_pts[..., 1] += np.array([-e_size, -e_size, e_size, e_size]) src_pts[..., 0] = np.clip(src_pts[..., 0], 0, im_w) src_pts[..., 1] = np.clip(src_pts[..., 1], 0, im_h) # if self.language == 'eng' or (self.language == 'unknown' and not self.vertical): # e_size = self.font_size / 3 # src_pts[..., 0] += np.array([-e_size, e_size, e_size, -e_size]) # src_pts[..., 1] += np.array([-e_size, -e_size, e_size, e_size]) # src_pts[..., 0] = np.clip(src_pts[..., 0], 0, im_w) # src_pts[..., 1] = np.clip(src_pts[..., 1], 0, im_h) middle_pnt = (src_pts[[1, 2, 3, 0]] + src_pts) / 2 vec_v = middle_pnt[2] - middle_pnt[0] # vertical vectors of textlines Loading Loading @@ -260,7 +260,7 @@ class TextBlock(object): @property def stroke_width(self): diff = color_difference(*self.get_font_colors()) if diff > 20: if diff > 15: return self.default_stroke_width return 0 Loading Loading @@ -479,6 +479,23 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ final_blk_list += merge_textlines(scattered_lines['ver']) if sort_blklist: final_blk_list = sort_textblk_list(final_blk_list, im_w, im_h) for blk in final_blk_list: if blk.language == 'eng': num_lines = len(blk.lines) if num_lines == 0: continue # blk.line_spacing = blk.bounding_rect()[3] / num_lines / blk.font_size resize_ratio = 1.1 rad = np.deg2rad(blk.angle) shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]]) shifted_vec = shifted_vec * np.array([[[np.sin(rad), np.cos(rad)]]]) * blk.font_size * (resize_ratio - 1) lines = blk.lines_array() + shifted_vec lines[..., 0] = np.clip(lines[..., 0], 0, im_w-1) lines[..., 1] = np.clip(lines[..., 1], 0, im_h-1) blk.lines = lines.astype(np.int64).tolist() blk.font_size = int(resize_ratio * blk.font_size) return final_blk_list def visualize_textblocks(canvas, blk_list: List[TextBlock]): Loading ballontranslator/ui/scenetext_manager.py +10 −5 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ from .fontformatpanel import set_textblk_fontsize from .misc import FontFormat, ProgramConfig, pt2px from utils.imgproc_utils import extract_ballon_region from utils.text_processing import seg_text from utils.text_processing import seg_text, is_logogram from utils.text_layout import layout_text class MoveBlkItemsCommand(QUndoCommand): Loading Loading @@ -462,6 +462,8 @@ class SceneTextManager(QObject): words, delimiter = seg_text(text, self.config.dl.translate_target) if len(words) == 0: return tgt_is_logoram = is_logogram(self.config.dl.translate_target) src_is_logoram = is_logogram(self.config.dl.translate_source) wl_list = get_words_length_list(QFontMetrics(blk_font), words) w, h = text_size_func(text) Loading @@ -473,11 +475,11 @@ class SceneTextManager(QObject): adaptive_fntsize = True if adaptive_fntsize: area_ratio = ballon_area / (w * h) ballon_area_thresh = 1.8 ballon_area_thresh = 1.7 downscale_constraint = 0.6 # downscale the font size if textarea exceeds the balloon_area / ballon_area_thresh # or the longest word exceeds the region_width resize_ratio = np.clip(min(area_ratio / ballon_area_thresh, max(wl_list) / region_rect[2]), downscale_constraint, 1.0) resize_ratio = np.clip(min(area_ratio / ballon_area_thresh, max(wl_list) / region_rect[2], blkitem.blk.font_size / line_height), downscale_constraint, 1.0) if resize_ratio < 1: new_font_size = blk_font.pointSizeF() * resize_ratio blk_font.setPointSizeF(new_font_size) Loading @@ -485,12 +487,15 @@ class SceneTextManager(QObject): line_height = int(line_height * resize_ratio) delimiter_len = int(delimiter_len * resize_ratio) spacing = 0 if tgt_is_logoram: spacing = line_height padding = pt2px(blk_font.pointSize()) + 20 # dummpy padding variable new_text, xywh = layout_text(mask, mask_xyxy, region_rect, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, padding) new_text, xywh = layout_text(mask, mask_xyxy, region_rect, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, spacing, padding) # font size post adjustment if adaptive_fntsize: downscale_constraint = 0.6 downscale_constraint = 0.5 w = xywh[2] - padding * 2 post_resize_ratio = max(region_rect[2] / w, downscale_constraint) if post_resize_ratio < 1: Loading ballontranslator/utils/text_layout.py +24 −8 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ from .imgproc_utils import extract_ballon_region, rotate_image class Line: def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None: def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0, spacing: int = 0) -> None: self.text = text self.pos_x = pos_x self.pos_y = pos_y Loading @@ -15,6 +15,8 @@ class Line: self.num_words = 0 if text: self.num_words += 1 self.spacing = 0 self.add_spacing(spacing) def append_right(self, word: str, w_len: int, delimiter: str = ''): self.text = self.text + delimiter + word Loading @@ -28,6 +30,16 @@ class Line: self.num_words += 1 self.length += w_len def add_spacing(self, spacing: int): self.spacing = spacing self.pos_x -= spacing self.length += 2 * spacing def strip_spacing(self): self.length -= self.spacing * 2 self.pos_x += self.spacing self.spacing = 0 def layout_lines_with_mask( mask: np.ndarray, words: List[str], Loading @@ -35,6 +47,7 @@ def layout_lines_with_mask( wl_list: List[int], delimiter_len: int, line_height: int, spacing: int = 0, alignment: int = 0, vertical: bool = False, delimiter: str = ' ', Loading Loading @@ -75,7 +88,7 @@ def layout_lines_with_mask( pos_x = centroid_x - wl_list[central_index] // 2 bh, bw = mask.shape[:2] central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index]) central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index], spacing) line_bottom = pos_y + line_height while sum_left > 0 or sum_right > 0: left_valid, right_valid = False, False Loading Loading @@ -112,7 +125,7 @@ def layout_lines_with_mask( central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter) sum_right -= len_right.pop(0) central_line.pos_x = new_x_r central_line.strip_spacing() lines = [central_line] # layout bottom half Loading @@ -121,7 +134,7 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y = centroid_y + line_height // 2 line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) line = Line(w, pos_x, pos_y, wl, spacing) lines.append(line) sum_right -= wl while sum_right > 0: Loading @@ -144,7 +157,8 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y = line_bottom line_bottom += line_height line = Line(w, pos_x, pos_y, wl) line.strip_spacing() line = Line(w, pos_x, pos_y, wl, spacing) lines.append(line) # layout top half Loading @@ -153,7 +167,7 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y = centroid_y - line_height // 2 - line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) line = Line(w, pos_x, pos_y, wl, spacing) lines.insert(0, line) sum_left -= wl while sum_left > 0: Loading @@ -176,7 +190,8 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y -= line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) line.strip_spacing() line = Line(w, pos_x, pos_y, wl, spacing) lines.insert(0, line) return lines Loading @@ -193,6 +208,7 @@ def layout_text( line_height: int, alignment: int, vertical: bool, spacing: int = 0, padding: float = 0) -> Tuple[str, List]: num_words = len(words) Loading @@ -202,7 +218,7 @@ def layout_text( if abs(angle) > 0: mask = rotate_image(mask, angle) lines = layout_lines_with_mask(mask, words, region_rect, wl_list, delimiter_len, line_height, alignment, vertical, delimiter) lines = layout_lines_with_mask(mask, words, region_rect, wl_list, delimiter_len, line_height, spacing, alignment, vertical, delimiter) region_x, region_y, region_w, region_h = region_rect center_x = mask_xyxy[0] + region_x + region_w // 2 Loading ballontranslator/utils/text_processing.py +31 −3 Original line number Diff line number Diff line Loading @@ -38,10 +38,33 @@ def seg_eng(text: str) -> List[str]: if skip_next: skip_next = False continue if len(word) < 3: append_left, append_right = False, False len_word, len_next, len_prev = len(word), -1, -1 if ii < word_num - 1: if len(word) == 1 or len(word_list[ii + 1]) == 1: len_next = len(word_list[ii + 1]) if ii > 0: len_prev = len(word_list[ii - 1]) cond_next = (len_word == 2 and len_next <= 4) or len_word == 1 cond_prev = (len_word == 2 and len_prev <= 4) or len_word == 1 if len_next > 0 and len_prev > 0: if len_next < len_prev: append_right = cond_next else: append_left = cond_prev elif len_next > 0: append_right = cond_next elif len_prev: append_left = cond_prev if append_left: words[-1] = words[-1] + ' ' + word elif append_right: words.append(word + ' ' + word_list[ii + 1]) skip_next = True word = word + ' ' + word_list[ii + 1] else: words.append(word) continue words.append(word) return words Loading @@ -55,3 +78,8 @@ def seg_text(text: str, lang: str) -> Tuple[List, str]: words = seg_eng(text) delimiter = ' ' return words, delimiter LOGORAMS = ['简体中文', '繁体中文', '日本語', '한국어'] def is_logogram(lang: str) -> bool: return lang in LOGORAMS Loading
ballontranslator/dl/textdetector/textblock.py +24 −7 Original line number Diff line number Diff line Loading @@ -166,12 +166,12 @@ class TextBlock(object): direction = 'v' if self.vertical else 'h' src_pts = np.array(self.lines[idx], dtype=np.float64) if self.language == 'eng' or (self.language == 'unknown' and not self.vertical): e_size = self.font_size / 3 src_pts[..., 0] += np.array([-e_size, e_size, e_size, -e_size]) src_pts[..., 1] += np.array([-e_size, -e_size, e_size, e_size]) src_pts[..., 0] = np.clip(src_pts[..., 0], 0, im_w) src_pts[..., 1] = np.clip(src_pts[..., 1], 0, im_h) # if self.language == 'eng' or (self.language == 'unknown' and not self.vertical): # e_size = self.font_size / 3 # src_pts[..., 0] += np.array([-e_size, e_size, e_size, -e_size]) # src_pts[..., 1] += np.array([-e_size, -e_size, e_size, e_size]) # src_pts[..., 0] = np.clip(src_pts[..., 0], 0, im_w) # src_pts[..., 1] = np.clip(src_pts[..., 1], 0, im_h) middle_pnt = (src_pts[[1, 2, 3, 0]] + src_pts) / 2 vec_v = middle_pnt[2] - middle_pnt[0] # vertical vectors of textlines Loading Loading @@ -260,7 +260,7 @@ class TextBlock(object): @property def stroke_width(self): diff = color_difference(*self.get_font_colors()) if diff > 20: if diff > 15: return self.default_stroke_width return 0 Loading Loading @@ -479,6 +479,23 @@ def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True) -> List[ final_blk_list += merge_textlines(scattered_lines['ver']) if sort_blklist: final_blk_list = sort_textblk_list(final_blk_list, im_w, im_h) for blk in final_blk_list: if blk.language == 'eng': num_lines = len(blk.lines) if num_lines == 0: continue # blk.line_spacing = blk.bounding_rect()[3] / num_lines / blk.font_size resize_ratio = 1.1 rad = np.deg2rad(blk.angle) shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]]) shifted_vec = shifted_vec * np.array([[[np.sin(rad), np.cos(rad)]]]) * blk.font_size * (resize_ratio - 1) lines = blk.lines_array() + shifted_vec lines[..., 0] = np.clip(lines[..., 0], 0, im_w-1) lines[..., 1] = np.clip(lines[..., 1], 0, im_h-1) blk.lines = lines.astype(np.int64).tolist() blk.font_size = int(resize_ratio * blk.font_size) return final_blk_list def visualize_textblocks(canvas, blk_list: List[TextBlock]): Loading
ballontranslator/ui/scenetext_manager.py +10 −5 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ from .fontformatpanel import set_textblk_fontsize from .misc import FontFormat, ProgramConfig, pt2px from utils.imgproc_utils import extract_ballon_region from utils.text_processing import seg_text from utils.text_processing import seg_text, is_logogram from utils.text_layout import layout_text class MoveBlkItemsCommand(QUndoCommand): Loading Loading @@ -462,6 +462,8 @@ class SceneTextManager(QObject): words, delimiter = seg_text(text, self.config.dl.translate_target) if len(words) == 0: return tgt_is_logoram = is_logogram(self.config.dl.translate_target) src_is_logoram = is_logogram(self.config.dl.translate_source) wl_list = get_words_length_list(QFontMetrics(blk_font), words) w, h = text_size_func(text) Loading @@ -473,11 +475,11 @@ class SceneTextManager(QObject): adaptive_fntsize = True if adaptive_fntsize: area_ratio = ballon_area / (w * h) ballon_area_thresh = 1.8 ballon_area_thresh = 1.7 downscale_constraint = 0.6 # downscale the font size if textarea exceeds the balloon_area / ballon_area_thresh # or the longest word exceeds the region_width resize_ratio = np.clip(min(area_ratio / ballon_area_thresh, max(wl_list) / region_rect[2]), downscale_constraint, 1.0) resize_ratio = np.clip(min(area_ratio / ballon_area_thresh, max(wl_list) / region_rect[2], blkitem.blk.font_size / line_height), downscale_constraint, 1.0) if resize_ratio < 1: new_font_size = blk_font.pointSizeF() * resize_ratio blk_font.setPointSizeF(new_font_size) Loading @@ -485,12 +487,15 @@ class SceneTextManager(QObject): line_height = int(line_height * resize_ratio) delimiter_len = int(delimiter_len * resize_ratio) spacing = 0 if tgt_is_logoram: spacing = line_height padding = pt2px(blk_font.pointSize()) + 20 # dummpy padding variable new_text, xywh = layout_text(mask, mask_xyxy, region_rect, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, padding) new_text, xywh = layout_text(mask, mask_xyxy, region_rect, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, spacing, padding) # font size post adjustment if adaptive_fntsize: downscale_constraint = 0.6 downscale_constraint = 0.5 w = xywh[2] - padding * 2 post_resize_ratio = max(region_rect[2] / w, downscale_constraint) if post_resize_ratio < 1: Loading
ballontranslator/utils/text_layout.py +24 −8 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ from .imgproc_utils import extract_ballon_region, rotate_image class Line: def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None: def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0, spacing: int = 0) -> None: self.text = text self.pos_x = pos_x self.pos_y = pos_y Loading @@ -15,6 +15,8 @@ class Line: self.num_words = 0 if text: self.num_words += 1 self.spacing = 0 self.add_spacing(spacing) def append_right(self, word: str, w_len: int, delimiter: str = ''): self.text = self.text + delimiter + word Loading @@ -28,6 +30,16 @@ class Line: self.num_words += 1 self.length += w_len def add_spacing(self, spacing: int): self.spacing = spacing self.pos_x -= spacing self.length += 2 * spacing def strip_spacing(self): self.length -= self.spacing * 2 self.pos_x += self.spacing self.spacing = 0 def layout_lines_with_mask( mask: np.ndarray, words: List[str], Loading @@ -35,6 +47,7 @@ def layout_lines_with_mask( wl_list: List[int], delimiter_len: int, line_height: int, spacing: int = 0, alignment: int = 0, vertical: bool = False, delimiter: str = ' ', Loading Loading @@ -75,7 +88,7 @@ def layout_lines_with_mask( pos_x = centroid_x - wl_list[central_index] // 2 bh, bw = mask.shape[:2] central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index]) central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index], spacing) line_bottom = pos_y + line_height while sum_left > 0 or sum_right > 0: left_valid, right_valid = False, False Loading Loading @@ -112,7 +125,7 @@ def layout_lines_with_mask( central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter) sum_right -= len_right.pop(0) central_line.pos_x = new_x_r central_line.strip_spacing() lines = [central_line] # layout bottom half Loading @@ -121,7 +134,7 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y = centroid_y + line_height // 2 line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) line = Line(w, pos_x, pos_y, wl, spacing) lines.append(line) sum_right -= wl while sum_right > 0: Loading @@ -144,7 +157,8 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y = line_bottom line_bottom += line_height line = Line(w, pos_x, pos_y, wl) line.strip_spacing() line = Line(w, pos_x, pos_y, wl, spacing) lines.append(line) # layout top half Loading @@ -153,7 +167,7 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y = centroid_y - line_height // 2 - line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) line = Line(w, pos_x, pos_y, wl, spacing) lines.insert(0, line) sum_left -= wl while sum_left > 0: Loading @@ -176,7 +190,8 @@ def layout_lines_with_mask( pos_x = centroid_x - wl // 2 pos_y -= line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) line.strip_spacing() line = Line(w, pos_x, pos_y, wl, spacing) lines.insert(0, line) return lines Loading @@ -193,6 +208,7 @@ def layout_text( line_height: int, alignment: int, vertical: bool, spacing: int = 0, padding: float = 0) -> Tuple[str, List]: num_words = len(words) Loading @@ -202,7 +218,7 @@ def layout_text( if abs(angle) > 0: mask = rotate_image(mask, angle) lines = layout_lines_with_mask(mask, words, region_rect, wl_list, delimiter_len, line_height, alignment, vertical, delimiter) lines = layout_lines_with_mask(mask, words, region_rect, wl_list, delimiter_len, line_height, spacing, alignment, vertical, delimiter) region_x, region_y, region_w, region_h = region_rect center_x = mask_xyxy[0] + region_x + region_w // 2 Loading
ballontranslator/utils/text_processing.py +31 −3 Original line number Diff line number Diff line Loading @@ -38,10 +38,33 @@ def seg_eng(text: str) -> List[str]: if skip_next: skip_next = False continue if len(word) < 3: append_left, append_right = False, False len_word, len_next, len_prev = len(word), -1, -1 if ii < word_num - 1: if len(word) == 1 or len(word_list[ii + 1]) == 1: len_next = len(word_list[ii + 1]) if ii > 0: len_prev = len(word_list[ii - 1]) cond_next = (len_word == 2 and len_next <= 4) or len_word == 1 cond_prev = (len_word == 2 and len_prev <= 4) or len_word == 1 if len_next > 0 and len_prev > 0: if len_next < len_prev: append_right = cond_next else: append_left = cond_prev elif len_next > 0: append_right = cond_next elif len_prev: append_left = cond_prev if append_left: words[-1] = words[-1] + ' ' + word elif append_right: words.append(word + ' ' + word_list[ii + 1]) skip_next = True word = word + ' ' + word_list[ii + 1] else: words.append(word) continue words.append(word) return words Loading @@ -55,3 +78,8 @@ def seg_text(text: str, lang: str) -> Tuple[List, str]: words = seg_eng(text) delimiter = ' ' return words, delimiter LOGORAMS = ['简体中文', '繁体中文', '日本語', '한국어'] def is_logogram(lang: str) -> bool: return lang in LOGORAMS