Commit a4808920 authored by dmMaze's avatar dmMaze
Browse files

improve eng->chi textlayot

parent 3c9f8966
Loading
Loading
Loading
Loading
+15 −12
Original line number Diff line number Diff line
@@ -493,6 +493,7 @@ class SceneTextManager(QObject):

        max_central_width = np.inf
        if tgt_is_cjk:
            if ballon_area / text_area > 2:
                if blkitem.blk.text:
                    _, _, brw, brh = blkitem.blk.bounding_rect()
                    br_area = brw * brh
@@ -505,6 +506,9 @@ class SceneTextManager(QObject):
                        max_central_width = max(normalized_width_list)
                else:
                    resize_ratio = 1.1
            else:
                if ballon_area / text_area < 1.8:   # default eng->cjk font_size = 1.1 * detected_size, because detected eng bboxes are a bit small
                    resize_ratio = 0.9

        if resize_ratio != 1:
            new_font_size = blk_font.pointSizeF() * resize_ratio
@@ -515,7 +519,7 @@ class SceneTextManager(QObject):
            delimiter_len = int(delimiter_len * resize_ratio)

        if max_central_width != np.inf:
            max_central_width = int(max_central_width * text_w)
            max_central_width = max(int(max_central_width * text_w), 0.8 * region_rect[2])

        padding = pt2px(blk_font.pointSize()) + 20   # dummpy padding variable
        if fmt.alignment == 1:
@@ -535,7 +539,6 @@ class SceneTextManager(QObject):
                centroid[0] = int(abs_centroid[0] - mask_xyxy[0])
                centroid[1] = int(abs_centroid[1] - mask_xyxy[1])


        new_text, xywh = layout_text(mask, mask_xyxy, centroid, words, wl_list, delimiter, delimiter_len, blkitem.blk.angle, line_height, fmt.alignment, fmt.vertical, 0, padding, max_central_width)
        
        # font size post adjustment
+2 −2
Original line number Diff line number Diff line
@@ -148,7 +148,7 @@ def layout_lines_aligncenter(
            new_len = line.length + wl + delimiter_len
            new_x = centroid_x - new_len // 2
            right_x = new_x + new_len
            if new_x <= 0 or right_x >= bw:
            if new_x <= 0 or right_x >= bw or new_len > max_central_width:
                line_valid = False
            elif mask[pos_y: line_bottom, new_x].sum() > 0 or\
                mask[pos_y: line_bottom, right_x].sum() > 0:
@@ -181,7 +181,7 @@ def layout_lines_aligncenter(
            new_len = line.length + wl + delimiter_len
            new_x = centroid_x - new_len // 2
            right_x = new_x + new_len
            if new_x <= 0 or right_x >= bw:
            if new_x <= 0 or right_x >= bw or new_len > max_central_width:
                line_valid = False
            elif mask[pos_y: line_bottom, new_x].sum() > 0 or\
                mask[pos_y: line_bottom, right_x].sum() > 0:
+17 −5
Original line number Diff line number Diff line
@@ -2,8 +2,9 @@ from typing import List, Tuple
from tqdm import tqdm
import json

WIDE_MAP = {i: i + 0xFEE0 for i in range(0x21, 0x7F)}
WIDE_MAP[0x20] = 0x3000
HALF2FULL = {i: i + 0xFEE0 for i in range(0x21, 0x7F)}
HALF2FULL[0x20] = 0x3000
HALF2FULL[0x2E] = 0x3002
FULL2HALF = dict((i + 0xFEE0, i) for i in range(0x21, 0x7F))
FULL2HALF[0x3000] = 0x20
FULL2HALF[0x3002] = 0x2E
@@ -23,7 +24,7 @@ def full_len(s: str):
    Convert all ASCII characters to their full-width counterpart.
    https://stackoverflow.com/questions/2422177/python-how-can-i-replace-full-width-characters-with-half-width-characters 
    """
    return s.translate(WIDE_MAP)
    return s.translate(HALF2FULL)

def half_len(s):
    '''
@@ -125,14 +126,14 @@ def _seg_ch_pkg(text: str) -> List[str]:
                word_next, tag_next = segments[ii + 1]
                len_next = len(word_next)
                next_valid = True
                if tag_next != 'w' and word_next != '.':    # somehow pkgseg take '.' as 'n'
                if tag_next != 'w':
                    score_next = PKUSEGSCORES[tag][tag_next]
            
            if ii > 0:
                word_prev, tag_prev = words[-1], segments[ii - 1][1]
                len_prev = len(word_prev)
                prev_valid = True
                if tag_prev != 'w' and word_prev[-1] != '.':
                if tag_prev != 'w':
                    score_prev = PKUSEGSCORES[tag_prev][tag]

            append_prev, append_next = False, False
@@ -188,6 +189,13 @@ def seg_ch_pkg(text: str):
        import pkuseg
        CHSEG = pkuseg.pkuseg(postag=True)

    # pkuseg won't work with half-width punctuations
    fullen_text = full_len(text)
    cvt_back = False
    if fullen_text != text:
        cvt_back = True
        text = fullen_text

    global PKUSEGSCORES
    if PKUSEGSCORES is None:
        with open(PKUSEGPATH, 'r', encoding='utf8') as f:
@@ -204,6 +212,10 @@ def seg_ch_pkg(text: str):
            result_list.extend(words)
    if len(result_list) > 0:
        result_list = result_list[1:]

    if cvt_back:
        # pkuseg w
        result_list = [half_len(word) for word in result_list]
    return result_list

def seg_text(text: str, lang: str) -> Tuple[List, str]: