Commit 0464e6ff authored by dmMaze's avatar dmMaze
Browse files

add non dl methods to extract text mask

parent 3e89df81
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -453,8 +453,6 @@ class MainWindow(FrameLessWindow):
                                    size.height() - msg_size.height()))
        self.dl_manager.progress_msgbox.move(p)

    

    def on_closebtn_clicked(self):
        if self.imsave_thread.isRunning():
            self.imsave_thread.finished.connect(self.close)
+132 −0
Original line number Diff line number Diff line
import cv2, os, time
import numpy as np

class Ray():
    def __init__(self, start_x=None, start_y=None):
        self.start_x = start_x
        self.start_y = start_y
        self.end_x = None
        self.end_y = None
        self.length = None
    def set_endpnt(self, end_x, end_y):
        self.end_x = end_x
        self.end_y = end_y
        self.length = np.sqrt((self.start_x-self.end_x)**2 + (self.start_y-self.end_y)**2)


def calculate_derivatives(gx, gy):
    mag = np.sqrt(gx*gx + gy*gy)
    if mag==0:
        return False, -1, -1
    else:
        return True, gx / mag, gy / mag

def sw_calculator(mask, canny_img, gradient_x, gradient_y, show_process=False):
    # _, canny_img = cv2.threshold(mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
    height, width = canny_img.shape[0], canny_img.shape[1]
    img_area = canny_img.shape[0] * canny_img.shape[1]
    if show_process:
        drawborder = np.zeros((canny_img.shape[0], canny_img.shape[1], 3), dtype=np.uint8)

    pnts = np.where(np.logical_and(canny_img != 0, mask!=0))
    total_pnt_num = pnts[0].shape[0]
    sample_pnt_num = 150
    sample_step = total_pnt_num / sample_pnt_num if total_pnt_num > sample_pnt_num else 1

    cur_pnt_ind = 0
    ray_list = []
    t0 = time.time()
    while cur_pnt_ind < total_pnt_num:
        start_x, start_y = pnts[1][cur_pnt_ind], pnts[0][cur_pnt_ind]
        ray_arr = [start_x, start_y, -1, -1, -1]
        valid, dx, dy = calculate_derivatives(gradient_x[start_y][start_x], gradient_y[start_y][start_x])

        if valid:
            inc = 0.2
            cur_x, cur_y = start_x + inc * dx, start_y + inc * dy
            while (True):
                tmp_curx, tmp_cury = int(cur_x), int(cur_y)
                if tmp_curx < 0 or tmp_curx >= width or tmp_cury <= 0 or tmp_cury >= height:
                    break
                if canny_img[tmp_cury][tmp_curx] == 0:
                    valid, dx_t, dy_t = calculate_derivatives(gradient_x[tmp_cury][tmp_curx], gradient_y[tmp_cury][tmp_curx])
                    if not valid:
                        break
                    if np.arccos(-dx * dx_t + -dy * dy_t) < np.pi / 2.0:
                        ray_arr[2] = tmp_curx
                        ray_arr[3] = tmp_cury
                        ray_arr[4] = np.sqrt((start_x - tmp_curx)**2 + (start_y - tmp_cury)**2)
                    break
                cur_x += dx
                cur_y += dy
            if ray_arr[2] != -1:
                ray_list.append(ray_arr)
                if show_process:
                    drawborder = cv2.arrowedLine(drawborder, (ray_arr[0], ray_arr[1]), (ray_arr[2], ray_arr[3]), 
                                                    (0, 255, 0), 1)


            end_x, end_y = None, None

        cur_pnt_ind += sample_step
        cur_pnt_ind = int(round(cur_pnt_ind))
    if show_process and len(ray_list) != 0:
        ray_list.sort(key=lambda x: x[4])
        
        print(f"cost time: {time.time() - t0}, {total_pnt_num}, {ray_list[int(len(ray_list)/2)][4]}")

        cv2.imshow("border", drawborder)
        cv2.imshow("cannyimg", canny_img)
        cv2.waitKey(0)
    return ray_list

def strokewidth_check(text_mask, labels, num_labels, stats, debug_type=0):
    rays_width = []
    height, width = text_mask.shape[0], text_mask.shape[1]
    
    blur_img = cv2.dilate(text_mask ,(3,3),cv2.BORDER_DEFAULT)
    
    # canny_img = cv2.Canny(cv2.dilate(text_mask, (3,3), 1), 170, 320, L2gradient=True, apertureSize=3)
    
    _, canny_img = cv2.threshold(text_mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
    blur2 = blur_img.astype(float) / 255
    gradient_x = cv2.Scharr(blur2, ddepth=-1, dx=1, dy=0)
    gradient_x = cv2.GaussianBlur(gradient_x ,(3, 3),cv2.BORDER_DEFAULT)
    gradient_y = cv2.Scharr(blur2, ddepth=-1, dx=0, dy=1)
    gradient_y = cv2.GaussianBlur(gradient_y ,(3, 3),cv2.BORDER_DEFAULT)

    img_area = text_mask.shape[0] * text_mask.shape[1]
    show_process = True if debug_type > 0 else False
    for lab in range(num_labels):
        stat = stats[lab]
        if lab != 0 and stat[4] > img_area * 0.002:
            x1, y1, x2, y2 = stat[0] - 2, stat[1] - 2, stat[0] + stat[2] + 2, stat[1] + stat[3] + 2
            x1, x2 = max(x1, 0), min(x2, width)
            y1, y2 = max(y1, 0), min(y2, height)
            labcord = np.where(labels==lab)
            labcord2 = (labcord[0] - y1, labcord[1] - x1)
            text_roi = np.zeros((y2-y1, x2-x1), dtype=np.uint8)
            text_roi[labcord2] = 255
            text_roi = cv2.GaussianBlur(text_roi ,(3,3), cv2.BORDER_DEFAULT)
            ray_list = sw_calculator(text_roi,
                                    canny_img[y1: y2, x1: x2],
                                    gradient_x[y1: y2, x1: x2],
                                    gradient_y[y1: y2, x1: x2],
                                    show_process=show_process)
            if len(ray_list) != 0:
                ray_list.sort(key=lambda x: x[4])
                rays_width.append([int(lab), ray_list[int(len(ray_list)/2)][4]])
    
    if len(rays_width) != 0:
        rays_width = np.array(rays_width)
        mean_width = np.mean(rays_width[:, 1])
        ma = np.int0(rays_width[:, 0])
        mean_area = np.mean(stats[ma][:, 4])

        false_labels = np.where(rays_width[:, 1] > 2*mean_width)[0]
        false_labels = rays_width[false_labels, 0].astype(np.int)
        for fl in false_labels:
            if stats[fl][4] > 2 * mean_area:
                text_mask[np.where(labels==fl)] = 0
    return text_mask
+349 −0
Original line number Diff line number Diff line
import cv2
import numpy as np
from .imgproc_utils import draw_connected_labels
from .stroke_width_calculator import strokewidth_check

opencv_inpaint = lambda img, mask: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS)

def show_img_by_dict(imgdicts):
    for keyname in imgdicts.keys():
        cv2.imshow(keyname, imgdicts[keyname])
    cv2.waitKey(0)

# 计算文本bgr均值
def letter_calculator(img, mask, bground_bgr, show_process=False):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # bgr to grey
    aver_bground_bgr = 0.114 * bground_bgr[0] + 0.587 * bground_bgr[1] + 0.299 * bground_bgr[2]
    thresh_low = 127
    retval, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_OTSU)

    if aver_bground_bgr < thresh_low:
        threshed = 255 - threshed
    threshed = 255 - threshed

    
    threshed = cv2.bitwise_and(threshed, mask)
    le_region = np.where(threshed==255)
    mat_region = img[le_region]

    if mat_region.shape[0] == 0:
        # retval, threshed = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY)
        # cv2.imshow("xxx", threshed)
        # cv2.imshow("2xxx", img)
        # cv2.waitKey(0)
        return [-1, -1, -1], threshed
    
    letter_bgr = np.mean(mat_region, axis=0).astype(int).tolist()
    
    if show_process:
        cv2.imshow("thresh", threshed)
        # ocr_protest(threshed)
        imgcp = np.copy(img)
        imgcp *= 0
        imgcp += 127
        imgcp[le_region] = letter_bgr
        cv2.imshow("letter_img", imgcp)
        # cv2.waitKey(0)
        
    return letter_bgr, threshed

# 预处理让文本颜色提取准确点
def usm(src):
    blur_img = cv2.GaussianBlur(src, (0, 0), 5)
    usm = cv2.addWeighted(src, 1.5, blur_img, -0.5, 0)
    h, w = src.shape[:2]
    result = np.zeros([h, w*2, 3], dtype=src.dtype)
    result[0:h,0:w,:] = src
    result[0:h,w:2*w,:] = usm
    return usm

# 计算文本bgr均值方法2,可能用中位数代替均值会好点
def textbgr_calculator(img, text_mask, show_process=False):
    text_mask = cv2.erode(text_mask, (3, 3), iterations=1)
    usm_img = usm(img)
    overall_meanbgr = np.mean(usm_img[np.where(text_mask==255)], axis=0)
    if show_process:
        colored_text_board = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) + 127
        colored_text_board[np.where(text_mask==255)] = overall_meanbgr
        cv2.imshow("usm", usm_img)
        cv2.imshow("textcolor", colored_text_board)
    return overall_meanbgr.astype(np.int).tolist()

# 计算背景bgr均值和标准差
def bground_calculator(buble_img, back_ground_mask, dilate=True):
    kernel = np.ones((3,3),np.uint8)
    if dilate:
        back_ground_mask = cv2.dilate(back_ground_mask, kernel, iterations = 1)
    bground_region = np.where(back_ground_mask==0)
    sd = -1
    if len(bground_region[0]) != 0:
        pix_array = buble_img[bground_region]
        bground_aver = np.mean(pix_array, axis=0).astype(int)
        pix_array - bground_aver
        gray = cv2.cvtColor(buble_img, cv2.COLOR_BGR2GRAY)
        gray_pixarray = gray[bground_region]
        gray_aver = np.mean(gray_pixarray)
        gray_pixarray = gray_pixarray - gray_aver
        gray_pixarray = np.power(gray_pixarray, 2)
        # gray_pixarray = np.sqrt(gray_pixarray)
        sd = np.mean(gray_pixarray)
    else: bground_aver = np.array([-1, -1, -1])

    return bground_aver, bground_region, sd

# 输入:文本块roi,分割出文本mask,根据mask计算文本bgr均值和标准差,决定纯色覆盖/inpaint修复
def canny_flood(img, show_process=False, inpaint_sdthresh=10, inpaint=opencv_inpaint):
    # cv2.setNumThreads(4)
    WHITE = (255, 255, 255)
    BLACK = (0, 0, 0)
    kernel = np.ones((3,3),np.uint8)
    orih, oriw = img.shape[0], img.shape[1]
    scaleR = 1
    if orih > 300 and oriw > 300:
        scaleR = 0.6
    elif orih < 120 or oriw < 120:
        scaleR = 1.4

    if scaleR != 1:
        h, w = img.shape[0], img.shape[1]
        orimg = np.copy(img)
        img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA)
    h, w = img.shape[0], img.shape[1]
    img_area = h * w

    cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT)
    detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3)
    cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8)

    cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)

    cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8)

    outer_msk, outer_index = np.zeros((h, w), np.uint8), -1

    min_retval = np.inf
    mask = np.zeros((h, w), np.uint8)
    difres = 10
    seedpnt = (int(w/2), int(h/2))
    for ii in range(len(cons)):
        rect = cv2.boundingRect(cons[ii])
        if rect[2]*rect[3] < img_area*0.4:
            continue
        
        mask = cv2.drawContours(mask, cons, ii, (255), 2)
        cpmask = np.copy(mask)
        cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8)
        retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))

        if retval <= img_area * 0.3:
            mask = cv2.drawContours(mask, cons, ii, (0), 2)
        if retval < min_retval and retval > img_area * 0.3:
            min_retval = retval
            outer_msk = cpmask

    outer_msk = 127 - outer_msk
    outer_msk = cv2.dilate(outer_msk, kernel,iterations = 1)
    outer_area, _, _, rect = cv2.floodFill(outer_msk, mask=None, seedPoint=seedpnt,  flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
    outer_msk = 30 - outer_msk    
    retval, outer_msk = cv2.threshold(outer_msk, 1, 255, cv2.THRESH_BINARY)
    outer_msk = cv2.bitwise_not(outer_msk, outer_msk)

    detected_edges = cv2.dilate(detected_edges, kernel, iterations = 1)
    for ii in range(2):
        detected_edges = cv2.bitwise_and(detected_edges, outer_msk)
        mask = np.copy(detected_edges)
        bgarea1, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(0, 0),  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
        bgarea2, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(detected_edges.shape[1]-1, detected_edges.shape[0]-1),  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
        txt_area = min(img_area - bgarea1, img_area - bgarea2)
        ratio_ob = txt_area / outer_area
        outer_msk = cv2.erode(outer_msk, kernel,iterations = 1)
        if ratio_ob < 0.85:
            break

    mask = 127 - mask
    retval, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
    if scaleR != 1:
        img = orimg
        outer_msk = cv2.resize(outer_msk, (oriw, orih))
        mask = cv2.resize(mask, (oriw, orih))

    bg_mask = cv2.bitwise_or(mask, 255-outer_msk)
    mask = cv2.bitwise_and(mask, outer_msk)

    bground_aver, bground_region, sd = bground_calculator(img, bg_mask)
    inner_rect = None
    threshed = np.zeros((img.shape[0], img.shape[1]), np.uint8)

    if bground_aver[0] != -1:
        letter_aver, threshed = letter_calculator(img, mask, bground_aver, show_process=show_process)
        if letter_aver[0] != -1:
            mask = cv2.dilate(threshed, kernel, iterations=1)
            inner_rect = cv2.boundingRect(cv2.findNonZero(mask))
    else: letter_aver = [0, 0, 0]

    if sd != -1 and sd < inpaint_sdthresh:
        img[np.where(outer_msk==255)] = bground_aver
        paint_res = img
        use_inpaint = False
    else:
        # paint_res = inpaint(img, mask, outer_msk, bground_region, inpaint_type)
        paint_res = inpaint(img, mask)
        use_inpaint = True
    if show_process:
        print(f"\nuse inpaint: {use_inpaint}, sd: {sd}, {type(inner_rect)}")
        show_img_by_dict({"res": paint_res, "outermask": outer_msk, "detect": detected_edges, "mask": mask})


    if isinstance(inner_rect, tuple):
        inner_rect = [ii for ii in inner_rect]
    if inner_rect is None:
        inner_rect = [-1, -1, -1, -1]
    else:
        inner_rect.append(-1)
    
    bground_aver = bground_aver.astype(int).tolist()
    bub_dict = {"bgr": letter_aver,
                "bground_bgr": bground_aver,
                "inner_rect": inner_rect}
    return threshed, paint_res, bub_dict

# 输入:文本块roi,分割出文本mask,根据mask计算文本bgr均值和标准差,决定纯色覆盖/inpaint修复
def connected_canny_flood(img, show_process=False, inpaint_sdthresh=10, inpaint=opencv_inpaint, apply_strokewidth_check=0):

    # 寻找最可能是气泡的外轮廓mask
    def find_outermask(img):
        connectivity = 4
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(img, connectivity, cv2.CV_16U)
        drawtext = np.zeros((img.shape[0], img.shape[1]), np.uint8)
        
        max_ind = np.argmax(stats[:, 4])
        maxbbox_area, sec_ind = -1, -1
        for ind, stat in enumerate(stats):
            if ind != max_ind:
                bbarea = stat[2] * stat[3]
                if bbarea > maxbbox_area:
                    maxbbox_area = bbarea
                    sec_ind = ind
        drawtext[np.where(labels==max_ind)] = 255
        
        cv2.rectangle(drawtext, (0, 0), (img.shape[1]-1, img.shape[0]-1), (0, 0, 0), 1, cv2.LINE_8)
        cons, hiers = cv2.findContours(drawtext, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
        img_area = img.shape[0] * img.shape[1]

        rects = np.array([cv2.boundingRect(cnt) for cnt in cons])
        rect_area = np.array([rect[2] * rect[3] for rect in rects])
        quali_ind = np.where(rect_area > img_area * 0.3)[0]
        outer_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8)
        for ind in quali_ind:
            outer_mask = cv2.drawContours(outer_mask, cons, ind, (255), 2)
        
        seedpnt = (int(outer_mask.shape[1]/2), int(outer_mask.shape[0]/2))
        difres = 10
        retval, _, _, rect = cv2.floodFill(outer_mask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
        outer_mask = 255 - cv2.threshold(outer_mask - 127, 1, 255, cv2.THRESH_BINARY)[1]
        return num_labels, labels, stats, centroids, outer_mask

    # BGR直接转灰度图可能导致文本区域和背景难以区分,比如测试样例中的黑底红字
    # 但是总有一个通道文本和背景容易区分
    # 返回最容易区分的那个通道
    def ccctest(img, crop_r=0.1):
        # img = usm(img)
        maxh = 100
        if img.shape[0] > maxh:
            scaleR = maxh / img.shape[0]
            im = cv2.resize(img, (int(img.shape[1]*scaleR), int(img.shape[0]*scaleR)), interpolation=cv2.INTER_AREA)
        else:
            im = img

        textlabel_counter = 0
        reverse = False
        c_ind = 0

        num_labels, labels, stats, centroids, pseduo_outermask = find_outermask(cv2.threshold(cv2.cvtColor(im, cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)[1])
        grayim = np.expand_dims(np.array(cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)), axis=2)
        im = np.append(im, grayim, axis=2)
        outer_cords = np.where(pseduo_outermask==255)
        for bgr_ind in range(4):
            channel = im[:, :, bgr_ind]
            ret, thresh = cv2.threshold(channel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)

            tmp_reverse = False
            
            if np.mean(thresh[outer_cords]) > 160:
                thresh = 255 - thresh
                tmp_reverse = True

            num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, 4, cv2.CV_16U)
            # draw_connected_labels(num_labels, labels, stats, centroids)
            # cv2.waitKey(0)
            max_ind = np.argmax(stats[:, 4])
            maxr, minr = 0.5, 0.001
            maxw, maxh = stats[max_ind][2] * maxr, stats[max_ind][3] * maxr
            minarea = im.shape[0] * im.shape[1] * minr

            tmp_counter = 0
            for stat in stats:
                bboxarea = stat[2] * stat[3]
                if stat[2] < maxw and stat[3] < maxh and bboxarea > minarea:
                    tmp_counter += 1
            if tmp_counter > textlabel_counter:
                textlabel_counter = tmp_counter
                c_ind = bgr_ind
                reverse = tmp_reverse
        return c_ind, reverse
    
    channel_index, reverse = ccctest(img)
    chanel = img[:, :, channel_index] if channel_index < 3 else cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(chanel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
    
    '''reverse to get white text on black bg'''
    if reverse:
        thresh = 255 - thresh
    num_labels, labels, stats, centroids, outer_mask = find_outermask(thresh)
    img_area = img.shape[0] * img.shape[1]
    text_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8)
    max_ind = np.argmax(stats[:, 4])
    for lab in (range(num_labels)):
        stat = stats[lab]
        if lab != max_ind and stat[4] < img_area * 0.4:
            labcord = np.where(labels==lab)
            text_mask[labcord] = 255

    text_mask = cv2.bitwise_and(text_mask, outer_mask)
    if apply_strokewidth_check > 0:
        text_mask = strokewidth_check(text_mask, labels, num_labels, stats, debug_type=show_process-1)
        
    text_color = textbgr_calculator(img, text_mask, show_process=show_process)
    inner_rect = cv2.boundingRect(cv2.findNonZero(cv2.dilate(text_mask, (3, 3), iterations=1)))
    inner_rect = [ii for ii in inner_rect]
    inner_rect.append(-1)

    bg_mask = cv2.bitwise_or(text_mask, 255-outer_mask)

    bground_aver, bground_region, sd = bground_calculator(img, bg_mask)

    paint_res = img
    roi_mask = cv2.GaussianBlur(text_mask,(3,3),cv2.BORDER_DEFAULT)
    _, roi_mask = cv2.threshold(roi_mask, 1, 255, cv2.THRESH_BINARY)
    if sd != -1 and sd < inpaint_sdthresh:
        paint_res[np.where(outer_mask==255)] = bground_aver
        use_inpaint = False
    else:
        # paint_res = inpaint(img, roi_mask, outer_mask, bground_region, inpaint_type)
        paint_res = inpaint(img, roi_mask)
        use_inpaint = True

    if show_process:
        print(f"\nuse inpaint: {use_inpaint}, sd: {sd}, {type(inner_rect)}")
        
        def box(width, height):
            return np.ones((height, width), dtype=np.uint8)
        draw_connected_labels(num_labels, labels, stats, centroids)
        show_img_by_dict({"thresh": thresh, "ori": img, "outer": outer_mask, "text": text_mask, "bgmask": bg_mask, "paintres": paint_res})

    bground_aver = bground_aver.astype(int).tolist()
    bub_dict = {"bgr": text_color,
                "bground_bgr": bground_aver,
                "inner_rect": inner_rect}
    return text_mask, paint_res, bub_dict
 No newline at end of file