Loading ui/mainwindow.py +0 −2 Original line number Diff line number Diff line Loading @@ -453,8 +453,6 @@ class MainWindow(FrameLessWindow): size.height() - msg_size.height())) self.dl_manager.progress_msgbox.move(p) def on_closebtn_clicked(self): if self.imsave_thread.isRunning(): self.imsave_thread.finished.connect(self.close) Loading utils/stroke_width_calculator.py 0 → 100644 +132 −0 Original line number Diff line number Diff line import cv2, os, time import numpy as np class Ray(): def __init__(self, start_x=None, start_y=None): self.start_x = start_x self.start_y = start_y self.end_x = None self.end_y = None self.length = None def set_endpnt(self, end_x, end_y): self.end_x = end_x self.end_y = end_y self.length = np.sqrt((self.start_x-self.end_x)**2 + (self.start_y-self.end_y)**2) def calculate_derivatives(gx, gy): mag = np.sqrt(gx*gx + gy*gy) if mag==0: return False, -1, -1 else: return True, gx / mag, gy / mag def sw_calculator(mask, canny_img, gradient_x, gradient_y, show_process=False): # _, canny_img = cv2.threshold(mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) height, width = canny_img.shape[0], canny_img.shape[1] img_area = canny_img.shape[0] * canny_img.shape[1] if show_process: drawborder = np.zeros((canny_img.shape[0], canny_img.shape[1], 3), dtype=np.uint8) pnts = np.where(np.logical_and(canny_img != 0, mask!=0)) total_pnt_num = pnts[0].shape[0] sample_pnt_num = 150 sample_step = total_pnt_num / sample_pnt_num if total_pnt_num > sample_pnt_num else 1 cur_pnt_ind = 0 ray_list = [] t0 = time.time() while cur_pnt_ind < total_pnt_num: start_x, start_y = pnts[1][cur_pnt_ind], pnts[0][cur_pnt_ind] ray_arr = [start_x, start_y, -1, -1, -1] valid, dx, dy = calculate_derivatives(gradient_x[start_y][start_x], gradient_y[start_y][start_x]) if valid: inc = 0.2 cur_x, cur_y = start_x + inc * dx, start_y + inc * dy while (True): tmp_curx, tmp_cury = int(cur_x), int(cur_y) if tmp_curx < 0 or tmp_curx >= width or tmp_cury <= 0 or tmp_cury >= height: break if canny_img[tmp_cury][tmp_curx] == 0: valid, dx_t, dy_t = calculate_derivatives(gradient_x[tmp_cury][tmp_curx], gradient_y[tmp_cury][tmp_curx]) if not valid: break if np.arccos(-dx * dx_t + -dy * dy_t) < np.pi / 2.0: ray_arr[2] = tmp_curx ray_arr[3] = tmp_cury ray_arr[4] = np.sqrt((start_x - tmp_curx)**2 + (start_y - tmp_cury)**2) break cur_x += dx cur_y += dy if ray_arr[2] != -1: ray_list.append(ray_arr) if show_process: drawborder = cv2.arrowedLine(drawborder, (ray_arr[0], ray_arr[1]), (ray_arr[2], ray_arr[3]), (0, 255, 0), 1) end_x, end_y = None, None cur_pnt_ind += sample_step cur_pnt_ind = int(round(cur_pnt_ind)) if show_process and len(ray_list) != 0: ray_list.sort(key=lambda x: x[4]) print(f"cost time: {time.time() - t0}, {total_pnt_num}, {ray_list[int(len(ray_list)/2)][4]}") cv2.imshow("border", drawborder) cv2.imshow("cannyimg", canny_img) cv2.waitKey(0) return ray_list def strokewidth_check(text_mask, labels, num_labels, stats, debug_type=0): rays_width = [] height, width = text_mask.shape[0], text_mask.shape[1] blur_img = cv2.dilate(text_mask ,(3,3),cv2.BORDER_DEFAULT) # canny_img = cv2.Canny(cv2.dilate(text_mask, (3,3), 1), 170, 320, L2gradient=True, apertureSize=3) _, canny_img = cv2.threshold(text_mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) blur2 = blur_img.astype(float) / 255 gradient_x = cv2.Scharr(blur2, ddepth=-1, dx=1, dy=0) gradient_x = cv2.GaussianBlur(gradient_x ,(3, 3),cv2.BORDER_DEFAULT) gradient_y = cv2.Scharr(blur2, ddepth=-1, dx=0, dy=1) gradient_y = cv2.GaussianBlur(gradient_y ,(3, 3),cv2.BORDER_DEFAULT) img_area = text_mask.shape[0] * text_mask.shape[1] show_process = True if debug_type > 0 else False for lab in range(num_labels): stat = stats[lab] if lab != 0 and stat[4] > img_area * 0.002: x1, y1, x2, y2 = stat[0] - 2, stat[1] - 2, stat[0] + stat[2] + 2, stat[1] + stat[3] + 2 x1, x2 = max(x1, 0), min(x2, width) y1, y2 = max(y1, 0), min(y2, height) labcord = np.where(labels==lab) labcord2 = (labcord[0] - y1, labcord[1] - x1) text_roi = np.zeros((y2-y1, x2-x1), dtype=np.uint8) text_roi[labcord2] = 255 text_roi = cv2.GaussianBlur(text_roi ,(3,3), cv2.BORDER_DEFAULT) ray_list = sw_calculator(text_roi, canny_img[y1: y2, x1: x2], gradient_x[y1: y2, x1: x2], gradient_y[y1: y2, x1: x2], show_process=show_process) if len(ray_list) != 0: ray_list.sort(key=lambda x: x[4]) rays_width.append([int(lab), ray_list[int(len(ray_list)/2)][4]]) if len(rays_width) != 0: rays_width = np.array(rays_width) mean_width = np.mean(rays_width[:, 1]) ma = np.int0(rays_width[:, 0]) mean_area = np.mean(stats[ma][:, 4]) false_labels = np.where(rays_width[:, 1] > 2*mean_width)[0] false_labels = rays_width[false_labels, 0].astype(np.int) for fl in false_labels: if stats[fl][4] > 2 * mean_area: text_mask[np.where(labels==fl)] = 0 return text_mask utils/textblock_mask.py 0 → 100644 +349 −0 Original line number Diff line number Diff line import cv2 import numpy as np from .imgproc_utils import draw_connected_labels from .stroke_width_calculator import strokewidth_check opencv_inpaint = lambda img, mask: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS) def show_img_by_dict(imgdicts): for keyname in imgdicts.keys(): cv2.imshow(keyname, imgdicts[keyname]) cv2.waitKey(0) # 计算文本bgr均值 def letter_calculator(img, mask, bground_bgr, show_process=False): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # bgr to grey aver_bground_bgr = 0.114 * bground_bgr[0] + 0.587 * bground_bgr[1] + 0.299 * bground_bgr[2] thresh_low = 127 retval, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_OTSU) if aver_bground_bgr < thresh_low: threshed = 255 - threshed threshed = 255 - threshed threshed = cv2.bitwise_and(threshed, mask) le_region = np.where(threshed==255) mat_region = img[le_region] if mat_region.shape[0] == 0: # retval, threshed = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY) # cv2.imshow("xxx", threshed) # cv2.imshow("2xxx", img) # cv2.waitKey(0) return [-1, -1, -1], threshed letter_bgr = np.mean(mat_region, axis=0).astype(int).tolist() if show_process: cv2.imshow("thresh", threshed) # ocr_protest(threshed) imgcp = np.copy(img) imgcp *= 0 imgcp += 127 imgcp[le_region] = letter_bgr cv2.imshow("letter_img", imgcp) # cv2.waitKey(0) return letter_bgr, threshed # 预处理让文本颜色提取准确点 def usm(src): blur_img = cv2.GaussianBlur(src, (0, 0), 5) usm = cv2.addWeighted(src, 1.5, blur_img, -0.5, 0) h, w = src.shape[:2] result = np.zeros([h, w*2, 3], dtype=src.dtype) result[0:h,0:w,:] = src result[0:h,w:2*w,:] = usm return usm # 计算文本bgr均值方法2,可能用中位数代替均值会好点 def textbgr_calculator(img, text_mask, show_process=False): text_mask = cv2.erode(text_mask, (3, 3), iterations=1) usm_img = usm(img) overall_meanbgr = np.mean(usm_img[np.where(text_mask==255)], axis=0) if show_process: colored_text_board = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) + 127 colored_text_board[np.where(text_mask==255)] = overall_meanbgr cv2.imshow("usm", usm_img) cv2.imshow("textcolor", colored_text_board) return overall_meanbgr.astype(np.int).tolist() # 计算背景bgr均值和标准差 def bground_calculator(buble_img, back_ground_mask, dilate=True): kernel = np.ones((3,3),np.uint8) if dilate: back_ground_mask = cv2.dilate(back_ground_mask, kernel, iterations = 1) bground_region = np.where(back_ground_mask==0) sd = -1 if len(bground_region[0]) != 0: pix_array = buble_img[bground_region] bground_aver = np.mean(pix_array, axis=0).astype(int) pix_array - bground_aver gray = cv2.cvtColor(buble_img, cv2.COLOR_BGR2GRAY) gray_pixarray = gray[bground_region] gray_aver = np.mean(gray_pixarray) gray_pixarray = gray_pixarray - gray_aver gray_pixarray = np.power(gray_pixarray, 2) # gray_pixarray = np.sqrt(gray_pixarray) sd = np.mean(gray_pixarray) else: bground_aver = np.array([-1, -1, -1]) return bground_aver, bground_region, sd # 输入:文本块roi,分割出文本mask,根据mask计算文本bgr均值和标准差,决定纯色覆盖/inpaint修复 def canny_flood(img, show_process=False, inpaint_sdthresh=10, inpaint=opencv_inpaint): # cv2.setNumThreads(4) WHITE = (255, 255, 255) BLACK = (0, 0, 0) kernel = np.ones((3,3),np.uint8) orih, oriw = img.shape[0], img.shape[1] scaleR = 1 if orih > 300 and oriw > 300: scaleR = 0.6 elif orih < 120 or oriw < 120: scaleR = 1.4 if scaleR != 1: h, w = img.shape[0], img.shape[1] orimg = np.copy(img) img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA) h, w = img.shape[0], img.shape[1] img_area = h * w cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT) detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8) outer_msk, outer_index = np.zeros((h, w), np.uint8), -1 min_retval = np.inf mask = np.zeros((h, w), np.uint8) difres = 10 seedpnt = (int(w/2), int(h/2)) for ii in range(len(cons)): rect = cv2.boundingRect(cons[ii]) if rect[2]*rect[3] < img_area*0.4: continue mask = cv2.drawContours(mask, cons, ii, (255), 2) cpmask = np.copy(mask) cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt, flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) if retval <= img_area * 0.3: mask = cv2.drawContours(mask, cons, ii, (0), 2) if retval < min_retval and retval > img_area * 0.3: min_retval = retval outer_msk = cpmask outer_msk = 127 - outer_msk outer_msk = cv2.dilate(outer_msk, kernel,iterations = 1) outer_area, _, _, rect = cv2.floodFill(outer_msk, mask=None, seedPoint=seedpnt, flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) outer_msk = 30 - outer_msk retval, outer_msk = cv2.threshold(outer_msk, 1, 255, cv2.THRESH_BINARY) outer_msk = cv2.bitwise_not(outer_msk, outer_msk) detected_edges = cv2.dilate(detected_edges, kernel, iterations = 1) for ii in range(2): detected_edges = cv2.bitwise_and(detected_edges, outer_msk) mask = np.copy(detected_edges) bgarea1, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(0, 0), flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) bgarea2, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(detected_edges.shape[1]-1, detected_edges.shape[0]-1), flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) txt_area = min(img_area - bgarea1, img_area - bgarea2) ratio_ob = txt_area / outer_area outer_msk = cv2.erode(outer_msk, kernel,iterations = 1) if ratio_ob < 0.85: break mask = 127 - mask retval, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY) if scaleR != 1: img = orimg outer_msk = cv2.resize(outer_msk, (oriw, orih)) mask = cv2.resize(mask, (oriw, orih)) bg_mask = cv2.bitwise_or(mask, 255-outer_msk) mask = cv2.bitwise_and(mask, outer_msk) bground_aver, bground_region, sd = bground_calculator(img, bg_mask) inner_rect = None threshed = np.zeros((img.shape[0], img.shape[1]), np.uint8) if bground_aver[0] != -1: letter_aver, threshed = letter_calculator(img, mask, bground_aver, show_process=show_process) if letter_aver[0] != -1: mask = cv2.dilate(threshed, kernel, iterations=1) inner_rect = cv2.boundingRect(cv2.findNonZero(mask)) else: letter_aver = [0, 0, 0] if sd != -1 and sd < inpaint_sdthresh: img[np.where(outer_msk==255)] = bground_aver paint_res = img use_inpaint = False else: # paint_res = inpaint(img, mask, outer_msk, bground_region, inpaint_type) paint_res = inpaint(img, mask) use_inpaint = True if show_process: print(f"\nuse inpaint: {use_inpaint}, sd: {sd}, {type(inner_rect)}") show_img_by_dict({"res": paint_res, "outermask": outer_msk, "detect": detected_edges, "mask": mask}) if isinstance(inner_rect, tuple): inner_rect = [ii for ii in inner_rect] if inner_rect is None: inner_rect = [-1, -1, -1, -1] else: inner_rect.append(-1) bground_aver = bground_aver.astype(int).tolist() bub_dict = {"bgr": letter_aver, "bground_bgr": bground_aver, "inner_rect": inner_rect} return threshed, paint_res, bub_dict # 输入:文本块roi,分割出文本mask,根据mask计算文本bgr均值和标准差,决定纯色覆盖/inpaint修复 def connected_canny_flood(img, show_process=False, inpaint_sdthresh=10, inpaint=opencv_inpaint, apply_strokewidth_check=0): # 寻找最可能是气泡的外轮廓mask def find_outermask(img): connectivity = 4 num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(img, connectivity, cv2.CV_16U) drawtext = np.zeros((img.shape[0], img.shape[1]), np.uint8) max_ind = np.argmax(stats[:, 4]) maxbbox_area, sec_ind = -1, -1 for ind, stat in enumerate(stats): if ind != max_ind: bbarea = stat[2] * stat[3] if bbarea > maxbbox_area: maxbbox_area = bbarea sec_ind = ind drawtext[np.where(labels==max_ind)] = 255 cv2.rectangle(drawtext, (0, 0), (img.shape[1]-1, img.shape[0]-1), (0, 0, 0), 1, cv2.LINE_8) cons, hiers = cv2.findContours(drawtext, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) img_area = img.shape[0] * img.shape[1] rects = np.array([cv2.boundingRect(cnt) for cnt in cons]) rect_area = np.array([rect[2] * rect[3] for rect in rects]) quali_ind = np.where(rect_area > img_area * 0.3)[0] outer_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8) for ind in quali_ind: outer_mask = cv2.drawContours(outer_mask, cons, ind, (255), 2) seedpnt = (int(outer_mask.shape[1]/2), int(outer_mask.shape[0]/2)) difres = 10 retval, _, _, rect = cv2.floodFill(outer_mask, mask=None, seedPoint=seedpnt, flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) outer_mask = 255 - cv2.threshold(outer_mask - 127, 1, 255, cv2.THRESH_BINARY)[1] return num_labels, labels, stats, centroids, outer_mask # BGR直接转灰度图可能导致文本区域和背景难以区分,比如测试样例中的黑底红字 # 但是总有一个通道文本和背景容易区分 # 返回最容易区分的那个通道 def ccctest(img, crop_r=0.1): # img = usm(img) maxh = 100 if img.shape[0] > maxh: scaleR = maxh / img.shape[0] im = cv2.resize(img, (int(img.shape[1]*scaleR), int(img.shape[0]*scaleR)), interpolation=cv2.INTER_AREA) else: im = img textlabel_counter = 0 reverse = False c_ind = 0 num_labels, labels, stats, centroids, pseduo_outermask = find_outermask(cv2.threshold(cv2.cvtColor(im, cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)[1]) grayim = np.expand_dims(np.array(cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)), axis=2) im = np.append(im, grayim, axis=2) outer_cords = np.where(pseduo_outermask==255) for bgr_ind in range(4): channel = im[:, :, bgr_ind] ret, thresh = cv2.threshold(channel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) tmp_reverse = False if np.mean(thresh[outer_cords]) > 160: thresh = 255 - thresh tmp_reverse = True num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, 4, cv2.CV_16U) # draw_connected_labels(num_labels, labels, stats, centroids) # cv2.waitKey(0) max_ind = np.argmax(stats[:, 4]) maxr, minr = 0.5, 0.001 maxw, maxh = stats[max_ind][2] * maxr, stats[max_ind][3] * maxr minarea = im.shape[0] * im.shape[1] * minr tmp_counter = 0 for stat in stats: bboxarea = stat[2] * stat[3] if stat[2] < maxw and stat[3] < maxh and bboxarea > minarea: tmp_counter += 1 if tmp_counter > textlabel_counter: textlabel_counter = tmp_counter c_ind = bgr_ind reverse = tmp_reverse return c_ind, reverse channel_index, reverse = ccctest(img) chanel = img[:, :, channel_index] if channel_index < 3 else cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(chanel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) '''reverse to get white text on black bg''' if reverse: thresh = 255 - thresh num_labels, labels, stats, centroids, outer_mask = find_outermask(thresh) img_area = img.shape[0] * img.shape[1] text_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8) max_ind = np.argmax(stats[:, 4]) for lab in (range(num_labels)): stat = stats[lab] if lab != max_ind and stat[4] < img_area * 0.4: labcord = np.where(labels==lab) text_mask[labcord] = 255 text_mask = cv2.bitwise_and(text_mask, outer_mask) if apply_strokewidth_check > 0: text_mask = strokewidth_check(text_mask, labels, num_labels, stats, debug_type=show_process-1) text_color = textbgr_calculator(img, text_mask, show_process=show_process) inner_rect = cv2.boundingRect(cv2.findNonZero(cv2.dilate(text_mask, (3, 3), iterations=1))) inner_rect = [ii for ii in inner_rect] inner_rect.append(-1) bg_mask = cv2.bitwise_or(text_mask, 255-outer_mask) bground_aver, bground_region, sd = bground_calculator(img, bg_mask) paint_res = img roi_mask = cv2.GaussianBlur(text_mask,(3,3),cv2.BORDER_DEFAULT) _, roi_mask = cv2.threshold(roi_mask, 1, 255, cv2.THRESH_BINARY) if sd != -1 and sd < inpaint_sdthresh: paint_res[np.where(outer_mask==255)] = bground_aver use_inpaint = False else: # paint_res = inpaint(img, roi_mask, outer_mask, bground_region, inpaint_type) paint_res = inpaint(img, roi_mask) use_inpaint = True if show_process: print(f"\nuse inpaint: {use_inpaint}, sd: {sd}, {type(inner_rect)}") def box(width, height): return np.ones((height, width), dtype=np.uint8) draw_connected_labels(num_labels, labels, stats, centroids) show_img_by_dict({"thresh": thresh, "ori": img, "outer": outer_mask, "text": text_mask, "bgmask": bg_mask, "paintres": paint_res}) bground_aver = bground_aver.astype(int).tolist() bub_dict = {"bgr": text_color, "bground_bgr": bground_aver, "inner_rect": inner_rect} return text_mask, paint_res, bub_dict No newline at end of file Loading
ui/mainwindow.py +0 −2 Original line number Diff line number Diff line Loading @@ -453,8 +453,6 @@ class MainWindow(FrameLessWindow): size.height() - msg_size.height())) self.dl_manager.progress_msgbox.move(p) def on_closebtn_clicked(self): if self.imsave_thread.isRunning(): self.imsave_thread.finished.connect(self.close) Loading
utils/stroke_width_calculator.py 0 → 100644 +132 −0 Original line number Diff line number Diff line import cv2, os, time import numpy as np class Ray(): def __init__(self, start_x=None, start_y=None): self.start_x = start_x self.start_y = start_y self.end_x = None self.end_y = None self.length = None def set_endpnt(self, end_x, end_y): self.end_x = end_x self.end_y = end_y self.length = np.sqrt((self.start_x-self.end_x)**2 + (self.start_y-self.end_y)**2) def calculate_derivatives(gx, gy): mag = np.sqrt(gx*gx + gy*gy) if mag==0: return False, -1, -1 else: return True, gx / mag, gy / mag def sw_calculator(mask, canny_img, gradient_x, gradient_y, show_process=False): # _, canny_img = cv2.threshold(mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) height, width = canny_img.shape[0], canny_img.shape[1] img_area = canny_img.shape[0] * canny_img.shape[1] if show_process: drawborder = np.zeros((canny_img.shape[0], canny_img.shape[1], 3), dtype=np.uint8) pnts = np.where(np.logical_and(canny_img != 0, mask!=0)) total_pnt_num = pnts[0].shape[0] sample_pnt_num = 150 sample_step = total_pnt_num / sample_pnt_num if total_pnt_num > sample_pnt_num else 1 cur_pnt_ind = 0 ray_list = [] t0 = time.time() while cur_pnt_ind < total_pnt_num: start_x, start_y = pnts[1][cur_pnt_ind], pnts[0][cur_pnt_ind] ray_arr = [start_x, start_y, -1, -1, -1] valid, dx, dy = calculate_derivatives(gradient_x[start_y][start_x], gradient_y[start_y][start_x]) if valid: inc = 0.2 cur_x, cur_y = start_x + inc * dx, start_y + inc * dy while (True): tmp_curx, tmp_cury = int(cur_x), int(cur_y) if tmp_curx < 0 or tmp_curx >= width or tmp_cury <= 0 or tmp_cury >= height: break if canny_img[tmp_cury][tmp_curx] == 0: valid, dx_t, dy_t = calculate_derivatives(gradient_x[tmp_cury][tmp_curx], gradient_y[tmp_cury][tmp_curx]) if not valid: break if np.arccos(-dx * dx_t + -dy * dy_t) < np.pi / 2.0: ray_arr[2] = tmp_curx ray_arr[3] = tmp_cury ray_arr[4] = np.sqrt((start_x - tmp_curx)**2 + (start_y - tmp_cury)**2) break cur_x += dx cur_y += dy if ray_arr[2] != -1: ray_list.append(ray_arr) if show_process: drawborder = cv2.arrowedLine(drawborder, (ray_arr[0], ray_arr[1]), (ray_arr[2], ray_arr[3]), (0, 255, 0), 1) end_x, end_y = None, None cur_pnt_ind += sample_step cur_pnt_ind = int(round(cur_pnt_ind)) if show_process and len(ray_list) != 0: ray_list.sort(key=lambda x: x[4]) print(f"cost time: {time.time() - t0}, {total_pnt_num}, {ray_list[int(len(ray_list)/2)][4]}") cv2.imshow("border", drawborder) cv2.imshow("cannyimg", canny_img) cv2.waitKey(0) return ray_list def strokewidth_check(text_mask, labels, num_labels, stats, debug_type=0): rays_width = [] height, width = text_mask.shape[0], text_mask.shape[1] blur_img = cv2.dilate(text_mask ,(3,3),cv2.BORDER_DEFAULT) # canny_img = cv2.Canny(cv2.dilate(text_mask, (3,3), 1), 170, 320, L2gradient=True, apertureSize=3) _, canny_img = cv2.threshold(text_mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) blur2 = blur_img.astype(float) / 255 gradient_x = cv2.Scharr(blur2, ddepth=-1, dx=1, dy=0) gradient_x = cv2.GaussianBlur(gradient_x ,(3, 3),cv2.BORDER_DEFAULT) gradient_y = cv2.Scharr(blur2, ddepth=-1, dx=0, dy=1) gradient_y = cv2.GaussianBlur(gradient_y ,(3, 3),cv2.BORDER_DEFAULT) img_area = text_mask.shape[0] * text_mask.shape[1] show_process = True if debug_type > 0 else False for lab in range(num_labels): stat = stats[lab] if lab != 0 and stat[4] > img_area * 0.002: x1, y1, x2, y2 = stat[0] - 2, stat[1] - 2, stat[0] + stat[2] + 2, stat[1] + stat[3] + 2 x1, x2 = max(x1, 0), min(x2, width) y1, y2 = max(y1, 0), min(y2, height) labcord = np.where(labels==lab) labcord2 = (labcord[0] - y1, labcord[1] - x1) text_roi = np.zeros((y2-y1, x2-x1), dtype=np.uint8) text_roi[labcord2] = 255 text_roi = cv2.GaussianBlur(text_roi ,(3,3), cv2.BORDER_DEFAULT) ray_list = sw_calculator(text_roi, canny_img[y1: y2, x1: x2], gradient_x[y1: y2, x1: x2], gradient_y[y1: y2, x1: x2], show_process=show_process) if len(ray_list) != 0: ray_list.sort(key=lambda x: x[4]) rays_width.append([int(lab), ray_list[int(len(ray_list)/2)][4]]) if len(rays_width) != 0: rays_width = np.array(rays_width) mean_width = np.mean(rays_width[:, 1]) ma = np.int0(rays_width[:, 0]) mean_area = np.mean(stats[ma][:, 4]) false_labels = np.where(rays_width[:, 1] > 2*mean_width)[0] false_labels = rays_width[false_labels, 0].astype(np.int) for fl in false_labels: if stats[fl][4] > 2 * mean_area: text_mask[np.where(labels==fl)] = 0 return text_mask
utils/textblock_mask.py 0 → 100644 +349 −0 Original line number Diff line number Diff line import cv2 import numpy as np from .imgproc_utils import draw_connected_labels from .stroke_width_calculator import strokewidth_check opencv_inpaint = lambda img, mask: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS) def show_img_by_dict(imgdicts): for keyname in imgdicts.keys(): cv2.imshow(keyname, imgdicts[keyname]) cv2.waitKey(0) # 计算文本bgr均值 def letter_calculator(img, mask, bground_bgr, show_process=False): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # bgr to grey aver_bground_bgr = 0.114 * bground_bgr[0] + 0.587 * bground_bgr[1] + 0.299 * bground_bgr[2] thresh_low = 127 retval, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_OTSU) if aver_bground_bgr < thresh_low: threshed = 255 - threshed threshed = 255 - threshed threshed = cv2.bitwise_and(threshed, mask) le_region = np.where(threshed==255) mat_region = img[le_region] if mat_region.shape[0] == 0: # retval, threshed = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY) # cv2.imshow("xxx", threshed) # cv2.imshow("2xxx", img) # cv2.waitKey(0) return [-1, -1, -1], threshed letter_bgr = np.mean(mat_region, axis=0).astype(int).tolist() if show_process: cv2.imshow("thresh", threshed) # ocr_protest(threshed) imgcp = np.copy(img) imgcp *= 0 imgcp += 127 imgcp[le_region] = letter_bgr cv2.imshow("letter_img", imgcp) # cv2.waitKey(0) return letter_bgr, threshed # 预处理让文本颜色提取准确点 def usm(src): blur_img = cv2.GaussianBlur(src, (0, 0), 5) usm = cv2.addWeighted(src, 1.5, blur_img, -0.5, 0) h, w = src.shape[:2] result = np.zeros([h, w*2, 3], dtype=src.dtype) result[0:h,0:w,:] = src result[0:h,w:2*w,:] = usm return usm # 计算文本bgr均值方法2,可能用中位数代替均值会好点 def textbgr_calculator(img, text_mask, show_process=False): text_mask = cv2.erode(text_mask, (3, 3), iterations=1) usm_img = usm(img) overall_meanbgr = np.mean(usm_img[np.where(text_mask==255)], axis=0) if show_process: colored_text_board = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) + 127 colored_text_board[np.where(text_mask==255)] = overall_meanbgr cv2.imshow("usm", usm_img) cv2.imshow("textcolor", colored_text_board) return overall_meanbgr.astype(np.int).tolist() # 计算背景bgr均值和标准差 def bground_calculator(buble_img, back_ground_mask, dilate=True): kernel = np.ones((3,3),np.uint8) if dilate: back_ground_mask = cv2.dilate(back_ground_mask, kernel, iterations = 1) bground_region = np.where(back_ground_mask==0) sd = -1 if len(bground_region[0]) != 0: pix_array = buble_img[bground_region] bground_aver = np.mean(pix_array, axis=0).astype(int) pix_array - bground_aver gray = cv2.cvtColor(buble_img, cv2.COLOR_BGR2GRAY) gray_pixarray = gray[bground_region] gray_aver = np.mean(gray_pixarray) gray_pixarray = gray_pixarray - gray_aver gray_pixarray = np.power(gray_pixarray, 2) # gray_pixarray = np.sqrt(gray_pixarray) sd = np.mean(gray_pixarray) else: bground_aver = np.array([-1, -1, -1]) return bground_aver, bground_region, sd # 输入:文本块roi,分割出文本mask,根据mask计算文本bgr均值和标准差,决定纯色覆盖/inpaint修复 def canny_flood(img, show_process=False, inpaint_sdthresh=10, inpaint=opencv_inpaint): # cv2.setNumThreads(4) WHITE = (255, 255, 255) BLACK = (0, 0, 0) kernel = np.ones((3,3),np.uint8) orih, oriw = img.shape[0], img.shape[1] scaleR = 1 if orih > 300 and oriw > 300: scaleR = 0.6 elif orih < 120 or oriw < 120: scaleR = 1.4 if scaleR != 1: h, w = img.shape[0], img.shape[1] orimg = np.copy(img) img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA) h, w = img.shape[0], img.shape[1] img_area = h * w cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT) detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8) outer_msk, outer_index = np.zeros((h, w), np.uint8), -1 min_retval = np.inf mask = np.zeros((h, w), np.uint8) difres = 10 seedpnt = (int(w/2), int(h/2)) for ii in range(len(cons)): rect = cv2.boundingRect(cons[ii]) if rect[2]*rect[3] < img_area*0.4: continue mask = cv2.drawContours(mask, cons, ii, (255), 2) cpmask = np.copy(mask) cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8) retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt, flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) if retval <= img_area * 0.3: mask = cv2.drawContours(mask, cons, ii, (0), 2) if retval < min_retval and retval > img_area * 0.3: min_retval = retval outer_msk = cpmask outer_msk = 127 - outer_msk outer_msk = cv2.dilate(outer_msk, kernel,iterations = 1) outer_area, _, _, rect = cv2.floodFill(outer_msk, mask=None, seedPoint=seedpnt, flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) outer_msk = 30 - outer_msk retval, outer_msk = cv2.threshold(outer_msk, 1, 255, cv2.THRESH_BINARY) outer_msk = cv2.bitwise_not(outer_msk, outer_msk) detected_edges = cv2.dilate(detected_edges, kernel, iterations = 1) for ii in range(2): detected_edges = cv2.bitwise_and(detected_edges, outer_msk) mask = np.copy(detected_edges) bgarea1, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(0, 0), flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) bgarea2, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(detected_edges.shape[1]-1, detected_edges.shape[0]-1), flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) txt_area = min(img_area - bgarea1, img_area - bgarea2) ratio_ob = txt_area / outer_area outer_msk = cv2.erode(outer_msk, kernel,iterations = 1) if ratio_ob < 0.85: break mask = 127 - mask retval, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY) if scaleR != 1: img = orimg outer_msk = cv2.resize(outer_msk, (oriw, orih)) mask = cv2.resize(mask, (oriw, orih)) bg_mask = cv2.bitwise_or(mask, 255-outer_msk) mask = cv2.bitwise_and(mask, outer_msk) bground_aver, bground_region, sd = bground_calculator(img, bg_mask) inner_rect = None threshed = np.zeros((img.shape[0], img.shape[1]), np.uint8) if bground_aver[0] != -1: letter_aver, threshed = letter_calculator(img, mask, bground_aver, show_process=show_process) if letter_aver[0] != -1: mask = cv2.dilate(threshed, kernel, iterations=1) inner_rect = cv2.boundingRect(cv2.findNonZero(mask)) else: letter_aver = [0, 0, 0] if sd != -1 and sd < inpaint_sdthresh: img[np.where(outer_msk==255)] = bground_aver paint_res = img use_inpaint = False else: # paint_res = inpaint(img, mask, outer_msk, bground_region, inpaint_type) paint_res = inpaint(img, mask) use_inpaint = True if show_process: print(f"\nuse inpaint: {use_inpaint}, sd: {sd}, {type(inner_rect)}") show_img_by_dict({"res": paint_res, "outermask": outer_msk, "detect": detected_edges, "mask": mask}) if isinstance(inner_rect, tuple): inner_rect = [ii for ii in inner_rect] if inner_rect is None: inner_rect = [-1, -1, -1, -1] else: inner_rect.append(-1) bground_aver = bground_aver.astype(int).tolist() bub_dict = {"bgr": letter_aver, "bground_bgr": bground_aver, "inner_rect": inner_rect} return threshed, paint_res, bub_dict # 输入:文本块roi,分割出文本mask,根据mask计算文本bgr均值和标准差,决定纯色覆盖/inpaint修复 def connected_canny_flood(img, show_process=False, inpaint_sdthresh=10, inpaint=opencv_inpaint, apply_strokewidth_check=0): # 寻找最可能是气泡的外轮廓mask def find_outermask(img): connectivity = 4 num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(img, connectivity, cv2.CV_16U) drawtext = np.zeros((img.shape[0], img.shape[1]), np.uint8) max_ind = np.argmax(stats[:, 4]) maxbbox_area, sec_ind = -1, -1 for ind, stat in enumerate(stats): if ind != max_ind: bbarea = stat[2] * stat[3] if bbarea > maxbbox_area: maxbbox_area = bbarea sec_ind = ind drawtext[np.where(labels==max_ind)] = 255 cv2.rectangle(drawtext, (0, 0), (img.shape[1]-1, img.shape[0]-1), (0, 0, 0), 1, cv2.LINE_8) cons, hiers = cv2.findContours(drawtext, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) img_area = img.shape[0] * img.shape[1] rects = np.array([cv2.boundingRect(cnt) for cnt in cons]) rect_area = np.array([rect[2] * rect[3] for rect in rects]) quali_ind = np.where(rect_area > img_area * 0.3)[0] outer_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8) for ind in quali_ind: outer_mask = cv2.drawContours(outer_mask, cons, ind, (255), 2) seedpnt = (int(outer_mask.shape[1]/2), int(outer_mask.shape[0]/2)) difres = 10 retval, _, _, rect = cv2.floodFill(outer_mask, mask=None, seedPoint=seedpnt, flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres)) outer_mask = 255 - cv2.threshold(outer_mask - 127, 1, 255, cv2.THRESH_BINARY)[1] return num_labels, labels, stats, centroids, outer_mask # BGR直接转灰度图可能导致文本区域和背景难以区分,比如测试样例中的黑底红字 # 但是总有一个通道文本和背景容易区分 # 返回最容易区分的那个通道 def ccctest(img, crop_r=0.1): # img = usm(img) maxh = 100 if img.shape[0] > maxh: scaleR = maxh / img.shape[0] im = cv2.resize(img, (int(img.shape[1]*scaleR), int(img.shape[0]*scaleR)), interpolation=cv2.INTER_AREA) else: im = img textlabel_counter = 0 reverse = False c_ind = 0 num_labels, labels, stats, centroids, pseduo_outermask = find_outermask(cv2.threshold(cv2.cvtColor(im, cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)[1]) grayim = np.expand_dims(np.array(cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)), axis=2) im = np.append(im, grayim, axis=2) outer_cords = np.where(pseduo_outermask==255) for bgr_ind in range(4): channel = im[:, :, bgr_ind] ret, thresh = cv2.threshold(channel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) tmp_reverse = False if np.mean(thresh[outer_cords]) > 160: thresh = 255 - thresh tmp_reverse = True num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, 4, cv2.CV_16U) # draw_connected_labels(num_labels, labels, stats, centroids) # cv2.waitKey(0) max_ind = np.argmax(stats[:, 4]) maxr, minr = 0.5, 0.001 maxw, maxh = stats[max_ind][2] * maxr, stats[max_ind][3] * maxr minarea = im.shape[0] * im.shape[1] * minr tmp_counter = 0 for stat in stats: bboxarea = stat[2] * stat[3] if stat[2] < maxw and stat[3] < maxh and bboxarea > minarea: tmp_counter += 1 if tmp_counter > textlabel_counter: textlabel_counter = tmp_counter c_ind = bgr_ind reverse = tmp_reverse return c_ind, reverse channel_index, reverse = ccctest(img) chanel = img[:, :, channel_index] if channel_index < 3 else cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(chanel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY) '''reverse to get white text on black bg''' if reverse: thresh = 255 - thresh num_labels, labels, stats, centroids, outer_mask = find_outermask(thresh) img_area = img.shape[0] * img.shape[1] text_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8) max_ind = np.argmax(stats[:, 4]) for lab in (range(num_labels)): stat = stats[lab] if lab != max_ind and stat[4] < img_area * 0.4: labcord = np.where(labels==lab) text_mask[labcord] = 255 text_mask = cv2.bitwise_and(text_mask, outer_mask) if apply_strokewidth_check > 0: text_mask = strokewidth_check(text_mask, labels, num_labels, stats, debug_type=show_process-1) text_color = textbgr_calculator(img, text_mask, show_process=show_process) inner_rect = cv2.boundingRect(cv2.findNonZero(cv2.dilate(text_mask, (3, 3), iterations=1))) inner_rect = [ii for ii in inner_rect] inner_rect.append(-1) bg_mask = cv2.bitwise_or(text_mask, 255-outer_mask) bground_aver, bground_region, sd = bground_calculator(img, bg_mask) paint_res = img roi_mask = cv2.GaussianBlur(text_mask,(3,3),cv2.BORDER_DEFAULT) _, roi_mask = cv2.threshold(roi_mask, 1, 255, cv2.THRESH_BINARY) if sd != -1 and sd < inpaint_sdthresh: paint_res[np.where(outer_mask==255)] = bground_aver use_inpaint = False else: # paint_res = inpaint(img, roi_mask, outer_mask, bground_region, inpaint_type) paint_res = inpaint(img, roi_mask) use_inpaint = True if show_process: print(f"\nuse inpaint: {use_inpaint}, sd: {sd}, {type(inner_rect)}") def box(width, height): return np.ones((height, width), dtype=np.uint8) draw_connected_labels(num_labels, labels, stats, centroids) show_img_by_dict({"thresh": thresh, "ori": img, "outer": outer_mask, "text": text_mask, "bgmask": bg_mask, "paintres": paint_res}) bground_aver = bground_aver.astype(int).tolist() bub_dict = {"bgr": text_color, "bground_bgr": bground_aver, "inner_rect": inner_rect} return text_mask, paint_res, bub_dict No newline at end of file