Commit cc049536 authored by dmMaze's avatar dmMaze
Browse files

fix #134

parent e1dcb5f4
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -4,12 +4,18 @@ from shapely.geometry import Polygon
import math
import copy
import cv2
import re

from utils.imgproc_utils import union_area, xywh2xyxypoly, rotate_polygons, color_difference

LANG_LIST = ['eng', 'ja', 'unknown']
LANGCLS2IDX = {'eng': 0, 'ja': 1, 'unknown': 2}

# https://ayaka.shn.hk/hanregex/
# https://medium.com/the-artificial-impostor/detecting-chinese-characters-in-unicode-strings-4ac839ba313a
CJKPATTERN = re.compile(r'[\uac00-\ud7a3\u3040-\u30ff\u4e00-\u9FFF]')


class TextBlock(object):
    def __init__(self, xyxy: List, 
                 lines: List = None, 
@@ -262,7 +268,9 @@ class TextBlock(object):
        text = ''
        for t in self.text:
            if text and t:
                if text[-1].isalpha() and t[0].isalpha():
                if text[-1].isalpha() and t[0].isalpha() \
                    and CJKPATTERN.search(text[-1]) is None \
                    and CJKPATTERN.search(t[0]) is None:
                    text += ' '
            text += t

+4 −3
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ class KeywordSubWidget(Widget):
    def sub_text(self, text: str) -> str:
        for ii, subpair in enumerate(self.sublist):
            k = subpair['keyword']
            if not k:
            if k == '':
                continue
            
            regexr = k
@@ -129,5 +129,6 @@ class KeywordSubWidget(Widget):
                LOGGER.error(f'Invalid regex expression at line {ii}:')
                LOGGER.error(traceback.format_exc())
                continue
            
        #     print(f'k: {k}, {text}')
        # print('text: ', text)
        return text
 No newline at end of file