Loading ballontranslator/utils/imgproc_utils.py +87 −1 Original line number Diff line number Diff line import numpy as np import cv2 import random from typing import List from typing import List, Tuple, Union def hex2bgr(hex): gmask = 254 << 8 Loading Loading @@ -209,3 +209,89 @@ def draw_connected_labels(num_labels, labels, stats, centroids, names="draw_conn cv2.imshow(names, labdraw) return labdraw def color_difference(rgb1: List, rgb2: List) -> float: # https://en.wikipedia.org/wiki/Color_difference#CIE76 color1 = np.array(rgb1, dtype=np.uint8).reshape(1, 1, 3) color2 = np.array(rgb2, dtype=np.uint8).reshape(1, 1, 3) diff = cv2.cvtColor(color1, cv2.COLOR_RGB2LAB).astype(np.float64) - cv2.cvtColor(color2, cv2.COLOR_RGB2LAB).astype(np.float64) diff[..., 0] *= 0.392 diff = np.linalg.norm(diff, axis=2) return diff.item() def extract_ballon_region(img: np.ndarray, ballon_rect: List, show_process=False, enlarge_ratio=2.0) -> Tuple[np.ndarray, int, List]: WHITE = (255, 255, 255) BLACK = (0, 0, 0) x1, y1, x2, y2 = ballon_rect[0], ballon_rect[1], \ ballon_rect[2] + ballon_rect[0], ballon_rect[3] + ballon_rect[1] if enlarge_ratio > 1: x1, y1, x2, y2 = enlarge_window([x1, y1, x2, y2], img.shape[1], img.shape[0], enlarge_ratio, aspect_ratio=ballon_rect[3] / ballon_rect[2]) img = img[y1:y2, x1:x2].copy() kernel = np.ones((3,3),np.uint8) orih, oriw = img.shape[0], img.shape[1] scaleR = 1 if orih > 300 and oriw > 300: scaleR = 0.6 elif orih < 120 or oriw < 120: scaleR = 1.4 if scaleR != 1: h, w = img.shape[0], img.shape[1] orimg = np.copy(img) img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA) h, w = img.shape[0], img.shape[1] img_area = h * w cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT) detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8) ballon_mask, outer_index = np.zeros((h, w), np.uint8), -1 min_retval = np.inf mask = np.zeros((h, w), np.uint8) difres = 10 seedpnt = (int(w/2), int(h/2)) for ii in range(len(cons)): rect = cv2.boundingRect(cons[ii]) if rect[2]*rect[3] < img_area*0.4: continue mask = cv2.drawContours(mask, cons, ii, (255), 2) cpmask = np.copy(mask) cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt, flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) if retval <= img_area * 0.3: mask = cv2.drawContours(mask, cons, ii, (0), 2) if retval < min_retval and retval > img_area * 0.3: min_retval = retval ballon_mask = cpmask ballon_mask = 127 - ballon_mask ballon_mask = cv2.dilate(ballon_mask, kernel,iterations = 1) ballon_area, _, _, rect = cv2.floodFill(ballon_mask, mask=None, seedPoint=seedpnt, flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) ballon_mask = 30 - ballon_mask retval, ballon_mask = cv2.threshold(ballon_mask, 1, 255, cv2.THRESH_BINARY) ballon_mask = cv2.bitwise_not(ballon_mask, ballon_mask) box_kernel = int(np.sqrt(ballon_area) / 30) if box_kernel > 1: box_kernel = np.ones((box_kernel,box_kernel),np.uint8) ballon_mask = cv2.dilate(ballon_mask, box_kernel, iterations = 1) ballon_mask = cv2.erode(ballon_mask, box_kernel, iterations = 1) if scaleR != 1: img = orimg ballon_mask = cv2.resize(ballon_mask, (oriw, orih)) if show_process: cv2.imshow('ballon_mask', ballon_mask) cv2.imshow('img', img) cv2.waitKey(0) return ballon_mask, (ballon_mask > 0).sum(), [x1, y1, x2, y2] ballontranslator/utils/text_layout.py 0 → 100644 +190 −0 Original line number Diff line number Diff line from typing import List import numpy as np import cv2 from .text_processing import seg_ch, seg_eng, seg_to_chars class Line: def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None: self.text = text self.pos_x = pos_x self.pos_y = pos_y self.length = int(length) self.num_words = 0 if text: self.num_words += 1 def append_right(self, word: str, w_len: int, delimiter: str = ''): self.text = self.text + delimiter + word if word: self.num_words += 1 self.length += w_len def append_left(self, word: str, w_len: int, delimiter: str = ''): self.text = word + delimiter + self.text if word: self.num_words += 1 self.length += w_len def layout_lines_with_mask( mask: np.ndarray, words: List[str], wl_list: List[int], delimiter_len: int, line_height: int, delimiter: str = ' ', word_break: bool = False)->List[Line]: m = cv2.moments(mask) mask = 255 - mask centroid_y = int(m['m01'] / m['m00']) centroid_x = int(m['m10'] / m['m00']) # layout the central line, the center word is approximately aligned with the centroid of the mask num_words = len(words) len_left, len_right = [], [] wlst_left, wlst_right = [], [] sum_left, sum_right = 0, 0 if num_words > 1: wl_cumsums = np.cumsum(np.array(wl_list, dtype=np.float64)) wl_cumsums -= wl_cumsums[-1] / 2 central_index = np.argmin(np.abs(wl_cumsums)) if wl_list[central_index] < 0: central_index += 1 if central_index > 0: wlst_left = words[:central_index] len_left = wl_list[:central_index] sum_left = np.sum(len_left) if central_index < num_words - 1: wlst_right = words[central_index + 1:] len_right = wl_list[central_index + 1:] sum_right = np.sum(len_right) else: central_index = 0 pos_y = centroid_y - line_height // 2 pos_x = centroid_x - wl_list[central_index] // 2 bh, bw = mask.shape[:2] central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index]) line_bottom = pos_y + line_height while sum_left > 0 or sum_right > 0: left_valid, right_valid = False, False if sum_left > 0: new_len_l = central_line.length + len_left[-1] + delimiter_len new_x_l = centroid_x - new_len_l // 2 new_r_l = new_x_l + new_len_l if (new_x_l > 0 and new_r_l < bw): if mask[pos_y: line_bottom, new_x_l].sum()==0 and mask[pos_y: line_bottom, new_r_l].sum() == 0: left_valid = True if sum_right > 0: new_len_r = central_line.length + len_right[0] + delimiter_len new_x_r = centroid_x - new_len_r // 2 new_r_r = new_x_r + new_len_r if (new_x_r > 0 and new_r_r < bw): if mask[pos_y: line_bottom, new_x_r].sum()==0 and mask[pos_y: line_bottom, new_r_r].sum() == 0: right_valid = True insert_left = False if left_valid and right_valid: if sum_left > sum_right: insert_left = True elif left_valid: insert_left = True elif not right_valid: break if insert_left: central_line.append_left(wlst_left.pop(-1), len_left[-1] + delimiter_len, delimiter) sum_left -= len_left.pop(-1) central_line.pos_x = new_x_l else: central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter) sum_right -= len_right.pop(0) central_line.pos_x = new_x_r lines = [central_line] # layout bottom half if sum_right > 0: w, wl = wlst_right.pop(0), len_right.pop(0) pos_x = centroid_x - wl // 2 pos_y = centroid_y + line_height // 2 line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) lines.append(line) sum_right -= wl while sum_right > 0: w, wl = wlst_right.pop(0), len_right.pop(0) sum_right -= wl new_len = line.length + wl + delimiter_len new_x = centroid_x - new_len // 2 right_x = new_x + new_len if new_x <= 0 or right_x >= bw: line_valid = False elif mask[pos_y: line_bottom, new_x].sum() > 0 or\ mask[pos_y: line_bottom, right_x].sum() > 0: line_valid = False else: line_valid = True if line_valid: line.append_right(w, wl+delimiter_len, delimiter) line.pos_x = new_x else: pos_x = centroid_x - wl // 2 pos_y = line_bottom line_bottom += line_height line = Line(w, pos_x, pos_y, wl) lines.append(line) # layout top half if sum_left > 0: w, wl = wlst_left.pop(-1), len_left.pop(-1) pos_x = centroid_x - wl // 2 pos_y = centroid_y - line_height // 2 - line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) lines.insert(0, line) sum_left -= wl while sum_left > 0: w, wl = wlst_left.pop(-1), len_left.pop(-1) sum_left -= wl new_len = line.length + wl + delimiter_len new_x = centroid_x - new_len // 2 right_x = new_x + new_len if new_x <= 0 or right_x >= bw: line_valid = False elif mask[pos_y: line_bottom, new_x].sum() > 0 or\ mask[pos_y: line_bottom, right_x].sum() > 0: line_valid = False else: line_valid = True if line_valid: line.append_left(w, wl+delimiter_len, delimiter) line.pos_x = new_x else: pos_x = centroid_x - wl // 2 pos_y -= line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) lines.insert(0, line) return lines def layout_text(text: str, lang: str, text_size_func) -> List[Line]: # preprocessing if lang in ['简体中文', '繁体中文']: words_list = seg_ch(text) elif lang in ['日本語', '한국어']: words_list = seg_to_chars(text) else: words_list = seg_eng(text) num_words = len(words_list) if num_words == 0: return [] No newline at end of file ballontranslator/utils/text_processing.py 0 → 100644 +51 −0 Original line number Diff line number Diff line from typing import List CHSEG = None def seg_to_chars(text: str) -> List[str]: text = text.replace('\n', '') return [c for c in text] def seg_ch(text: str) -> List[str]: text = text.replace('\n', '') global CHSEG if CHSEG is None: import pkuseg CHSEG = pkuseg.pkuseg() return CHSEG.cut(text) def seg_eng(text: str) -> List[str]: text = text.upper().replace(' ', ' ').replace(' .', '.').replace('\n', ' ') processed_text = '' # dumb way to insure spaces between words text_len = len(text) for ii, c in enumerate(text): if c in ['.', '?', '!'] and ii < text_len - 1: next_c = text[ii + 1] if next_c.isalpha() or next_c.isnumeric(): processed_text += c + ' ' else: processed_text += c else: processed_text += c word_list = processed_text.split(' ') words = [] skip_next = False word_num = len(word_list) for ii, word in enumerate(word_list): if skip_next: skip_next = False continue if ii < word_num - 1: if len(word) == 1 or len(word_list[ii + 1]) == 1: skip_next = True word = word + ' ' + word_list[ii + 1] words.append(word) return words Loading
ballontranslator/utils/imgproc_utils.py +87 −1 Original line number Diff line number Diff line import numpy as np import cv2 import random from typing import List from typing import List, Tuple, Union def hex2bgr(hex): gmask = 254 << 8 Loading Loading @@ -209,3 +209,89 @@ def draw_connected_labels(num_labels, labels, stats, centroids, names="draw_conn cv2.imshow(names, labdraw) return labdraw def color_difference(rgb1: List, rgb2: List) -> float: # https://en.wikipedia.org/wiki/Color_difference#CIE76 color1 = np.array(rgb1, dtype=np.uint8).reshape(1, 1, 3) color2 = np.array(rgb2, dtype=np.uint8).reshape(1, 1, 3) diff = cv2.cvtColor(color1, cv2.COLOR_RGB2LAB).astype(np.float64) - cv2.cvtColor(color2, cv2.COLOR_RGB2LAB).astype(np.float64) diff[..., 0] *= 0.392 diff = np.linalg.norm(diff, axis=2) return diff.item() def extract_ballon_region(img: np.ndarray, ballon_rect: List, show_process=False, enlarge_ratio=2.0) -> Tuple[np.ndarray, int, List]: WHITE = (255, 255, 255) BLACK = (0, 0, 0) x1, y1, x2, y2 = ballon_rect[0], ballon_rect[1], \ ballon_rect[2] + ballon_rect[0], ballon_rect[3] + ballon_rect[1] if enlarge_ratio > 1: x1, y1, x2, y2 = enlarge_window([x1, y1, x2, y2], img.shape[1], img.shape[0], enlarge_ratio, aspect_ratio=ballon_rect[3] / ballon_rect[2]) img = img[y1:y2, x1:x2].copy() kernel = np.ones((3,3),np.uint8) orih, oriw = img.shape[0], img.shape[1] scaleR = 1 if orih > 300 and oriw > 300: scaleR = 0.6 elif orih < 120 or oriw < 120: scaleR = 1.4 if scaleR != 1: h, w = img.shape[0], img.shape[1] orimg = np.copy(img) img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA) h, w = img.shape[0], img.shape[1] img_area = h * w cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT) detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8) ballon_mask, outer_index = np.zeros((h, w), np.uint8), -1 min_retval = np.inf mask = np.zeros((h, w), np.uint8) difres = 10 seedpnt = (int(w/2), int(h/2)) for ii in range(len(cons)): rect = cv2.boundingRect(cons[ii]) if rect[2]*rect[3] < img_area*0.4: continue mask = cv2.drawContours(mask, cons, ii, (255), 2) cpmask = np.copy(mask) cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt, flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) if retval <= img_area * 0.3: mask = cv2.drawContours(mask, cons, ii, (0), 2) if retval < min_retval and retval > img_area * 0.3: min_retval = retval ballon_mask = cpmask ballon_mask = 127 - ballon_mask ballon_mask = cv2.dilate(ballon_mask, kernel,iterations = 1) ballon_area, _, _, rect = cv2.floodFill(ballon_mask, mask=None, seedPoint=seedpnt, flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) ballon_mask = 30 - ballon_mask retval, ballon_mask = cv2.threshold(ballon_mask, 1, 255, cv2.THRESH_BINARY) ballon_mask = cv2.bitwise_not(ballon_mask, ballon_mask) box_kernel = int(np.sqrt(ballon_area) / 30) if box_kernel > 1: box_kernel = np.ones((box_kernel,box_kernel),np.uint8) ballon_mask = cv2.dilate(ballon_mask, box_kernel, iterations = 1) ballon_mask = cv2.erode(ballon_mask, box_kernel, iterations = 1) if scaleR != 1: img = orimg ballon_mask = cv2.resize(ballon_mask, (oriw, orih)) if show_process: cv2.imshow('ballon_mask', ballon_mask) cv2.imshow('img', img) cv2.waitKey(0) return ballon_mask, (ballon_mask > 0).sum(), [x1, y1, x2, y2]
ballontranslator/utils/text_layout.py 0 → 100644 +190 −0 Original line number Diff line number Diff line from typing import List import numpy as np import cv2 from .text_processing import seg_ch, seg_eng, seg_to_chars class Line: def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0) -> None: self.text = text self.pos_x = pos_x self.pos_y = pos_y self.length = int(length) self.num_words = 0 if text: self.num_words += 1 def append_right(self, word: str, w_len: int, delimiter: str = ''): self.text = self.text + delimiter + word if word: self.num_words += 1 self.length += w_len def append_left(self, word: str, w_len: int, delimiter: str = ''): self.text = word + delimiter + self.text if word: self.num_words += 1 self.length += w_len def layout_lines_with_mask( mask: np.ndarray, words: List[str], wl_list: List[int], delimiter_len: int, line_height: int, delimiter: str = ' ', word_break: bool = False)->List[Line]: m = cv2.moments(mask) mask = 255 - mask centroid_y = int(m['m01'] / m['m00']) centroid_x = int(m['m10'] / m['m00']) # layout the central line, the center word is approximately aligned with the centroid of the mask num_words = len(words) len_left, len_right = [], [] wlst_left, wlst_right = [], [] sum_left, sum_right = 0, 0 if num_words > 1: wl_cumsums = np.cumsum(np.array(wl_list, dtype=np.float64)) wl_cumsums -= wl_cumsums[-1] / 2 central_index = np.argmin(np.abs(wl_cumsums)) if wl_list[central_index] < 0: central_index += 1 if central_index > 0: wlst_left = words[:central_index] len_left = wl_list[:central_index] sum_left = np.sum(len_left) if central_index < num_words - 1: wlst_right = words[central_index + 1:] len_right = wl_list[central_index + 1:] sum_right = np.sum(len_right) else: central_index = 0 pos_y = centroid_y - line_height // 2 pos_x = centroid_x - wl_list[central_index] // 2 bh, bw = mask.shape[:2] central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index]) line_bottom = pos_y + line_height while sum_left > 0 or sum_right > 0: left_valid, right_valid = False, False if sum_left > 0: new_len_l = central_line.length + len_left[-1] + delimiter_len new_x_l = centroid_x - new_len_l // 2 new_r_l = new_x_l + new_len_l if (new_x_l > 0 and new_r_l < bw): if mask[pos_y: line_bottom, new_x_l].sum()==0 and mask[pos_y: line_bottom, new_r_l].sum() == 0: left_valid = True if sum_right > 0: new_len_r = central_line.length + len_right[0] + delimiter_len new_x_r = centroid_x - new_len_r // 2 new_r_r = new_x_r + new_len_r if (new_x_r > 0 and new_r_r < bw): if mask[pos_y: line_bottom, new_x_r].sum()==0 and mask[pos_y: line_bottom, new_r_r].sum() == 0: right_valid = True insert_left = False if left_valid and right_valid: if sum_left > sum_right: insert_left = True elif left_valid: insert_left = True elif not right_valid: break if insert_left: central_line.append_left(wlst_left.pop(-1), len_left[-1] + delimiter_len, delimiter) sum_left -= len_left.pop(-1) central_line.pos_x = new_x_l else: central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter) sum_right -= len_right.pop(0) central_line.pos_x = new_x_r lines = [central_line] # layout bottom half if sum_right > 0: w, wl = wlst_right.pop(0), len_right.pop(0) pos_x = centroid_x - wl // 2 pos_y = centroid_y + line_height // 2 line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) lines.append(line) sum_right -= wl while sum_right > 0: w, wl = wlst_right.pop(0), len_right.pop(0) sum_right -= wl new_len = line.length + wl + delimiter_len new_x = centroid_x - new_len // 2 right_x = new_x + new_len if new_x <= 0 or right_x >= bw: line_valid = False elif mask[pos_y: line_bottom, new_x].sum() > 0 or\ mask[pos_y: line_bottom, right_x].sum() > 0: line_valid = False else: line_valid = True if line_valid: line.append_right(w, wl+delimiter_len, delimiter) line.pos_x = new_x else: pos_x = centroid_x - wl // 2 pos_y = line_bottom line_bottom += line_height line = Line(w, pos_x, pos_y, wl) lines.append(line) # layout top half if sum_left > 0: w, wl = wlst_left.pop(-1), len_left.pop(-1) pos_x = centroid_x - wl // 2 pos_y = centroid_y - line_height // 2 - line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) lines.insert(0, line) sum_left -= wl while sum_left > 0: w, wl = wlst_left.pop(-1), len_left.pop(-1) sum_left -= wl new_len = line.length + wl + delimiter_len new_x = centroid_x - new_len // 2 right_x = new_x + new_len if new_x <= 0 or right_x >= bw: line_valid = False elif mask[pos_y: line_bottom, new_x].sum() > 0 or\ mask[pos_y: line_bottom, right_x].sum() > 0: line_valid = False else: line_valid = True if line_valid: line.append_left(w, wl+delimiter_len, delimiter) line.pos_x = new_x else: pos_x = centroid_x - wl // 2 pos_y -= line_height line_bottom = pos_y + line_height line = Line(w, pos_x, pos_y, wl) lines.insert(0, line) return lines def layout_text(text: str, lang: str, text_size_func) -> List[Line]: # preprocessing if lang in ['简体中文', '繁体中文']: words_list = seg_ch(text) elif lang in ['日本語', '한국어']: words_list = seg_to_chars(text) else: words_list = seg_eng(text) num_words = len(words_list) if num_words == 0: return [] No newline at end of file
ballontranslator/utils/text_processing.py 0 → 100644 +51 −0 Original line number Diff line number Diff line from typing import List CHSEG = None def seg_to_chars(text: str) -> List[str]: text = text.replace('\n', '') return [c for c in text] def seg_ch(text: str) -> List[str]: text = text.replace('\n', '') global CHSEG if CHSEG is None: import pkuseg CHSEG = pkuseg.pkuseg() return CHSEG.cut(text) def seg_eng(text: str) -> List[str]: text = text.upper().replace(' ', ' ').replace(' .', '.').replace('\n', ' ') processed_text = '' # dumb way to insure spaces between words text_len = len(text) for ii, c in enumerate(text): if c in ['.', '?', '!'] and ii < text_len - 1: next_c = text[ii + 1] if next_c.isalpha() or next_c.isnumeric(): processed_text += c + ' ' else: processed_text += c else: processed_text += c word_list = processed_text.split(' ') words = [] skip_next = False word_num = len(word_list) for ii, word in enumerate(word_list): if skip_next: skip_next = False continue if ii < word_num - 1: if len(word) == 1 or len(word_list[ii + 1]) == 1: skip_next = True word = word + ' ' + word_list[ii + 1] words.append(word) return words