Unverified Commit 9e7ac86a authored by Sergey Pinus's avatar Sergey Pinus Committed by GitHub
Browse files

Merge branch 'dmMaze:dev' into dev

parents fd01e3a2 1bb8546c
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -760,6 +760,14 @@ QLabel#angleLabel {
    background-color: rgba(30, 147, 229, 20%);
}

.ConfigClickableLabel {
    font-size: 12px;
}

.ConfigClickableLabel::hover {
    background-color: rgba(30, 147, 229, 20%);
}

.ExpandLabel::hover {
    background-color: rgba(30, 147, 229, 20%);
}
@@ -1049,6 +1057,14 @@ SmallComboBox {
    background-color: @transtexteditBackgroundColor;
}

SmallConfigPutton {
    height: 20px;
    width: 20px;
    border: none;
    border-width: 0px;
    background-color: rgba(0, 0, 0, 0);
}

SmallSizeComboBox {
    height: 20px;
    font-size: 12px;
+1 −1
Original line number Diff line number Diff line
> [!IMPORTANT]  
> **번역 결과 공개적으로 공유하고 경험이 풍부한 인간 번역가가 번역 또는 교정에 참여하지 않은 경우, 기계 번역으로 명확하게 표시하십시오.**
> **번역 결과물을 공개적으로 공유할 때 숙련된 번역가가 번역이나 교정에 참여하지 않았다면, 기계 번역임을 잘 보이는 곳에 표시해 주세요.**

# BallonTranslator
[简体中文](/README.md) | [English](/README_EN.md) | [pt-BR](../doc/README_PT-BR.md) | [Русский](../doc/README_RU.md) | [日本語](../doc/README_JA.md) | [Indonesia](../doc/README_ID.md) | [Tiếng Việt](../doc/README_VI.md) | 한국어
+2 −1
Original line number Diff line number Diff line
@@ -92,7 +92,8 @@ class BaseModule:
                try:
                    param_value = type(p)(param_value)
                except ValueError:
                    self.logger.warning(f'Invalid param value {param_value} for defined dtype: {type(p)}')
                    self.logger.warning(f'Invalid param value {param_value} for defined dtype: {type(p)}, revert to original value {p}')
                    param_value = p
            self.params[param_key] = param_value

    def updateParam(self, param_key: str, param_content):
+7 −0
Original line number Diff line number Diff line
import numpy as np
import cv2
from typing import Tuple, List

from .base import register_textdetectors, TextDetectorBase, TextBlock, DEFAULT_DEVICE, DEVICE_SELECTOR, ProjImgTrans
@@ -31,6 +32,7 @@ class ComicTextDetector(TextDetectorBase):
        'font size multiplier': 1.,
        'font size max': -1,
        'font size min': -1,
        'mask dilate size': 2
    }
    _load_model_keys = {'model'}
    download_file_list = [{
@@ -75,6 +77,11 @@ class ComicTextDetector(TextDetectorBase):
            blk.font_size = sz
            blk._detected_font_size = sz

        ksize = self.get_param_value('mask dilate size')
        if ksize > 0:
            element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * ksize + 1, 2 * ksize + 1),(ksize, ksize))
            mask = cv2.dilate(mask, element)

        return mask, blk_list

    def updateParam(self, param_key: str, param_content):
+68 −6
Original line number Diff line number Diff line
@@ -7,7 +7,9 @@ import numpy as np
import cv2

from .base import register_textdetectors, TextDetectorBase, TextBlock, DEVICE_SELECTOR
from utils.textblock import mit_merge_textlines
from utils.textblock import mit_merge_textlines, sort_regions
from utils.textblock_mask import canny_flood
from utils.split_text_region import manga_split, split_textblock
from utils.imgproc_utils import xywh2xyxypoly
from utils.proj_imgtrans import ProjImgTrans

@@ -63,10 +65,13 @@ class YSGYoloDetector(TextDetectorBase):
                'vertical_textline': True, 
                'horizontal_textline': True, 
                'angled_vertical_textline': True, 
                'angled_horizontal_textline': True
                'angled_horizontal_textline': True,
                'textblock': True
            }, 
            'type': 'check_group'
        }
        },
        'source text is vertical': True,
        'mask dilate size': 2
    }

    _load_model_keys = {'yolo'}
@@ -81,7 +86,7 @@ class YSGYoloDetector(TextDetectorBase):
            self.yolo = YOLO(self.get_param_value('model path')).to(device=self.get_param_value('device'))

    def get_valid_labels(self):
        valid_labels = [k for k, v in self.params['label']['value'].items() if v]
        valid_labels = [k for k, v in self.params['label']['value'].items() if v and k != 'textblock']
        return valid_labels

    @property
@@ -97,17 +102,23 @@ class YSGYoloDetector(TextDetectorBase):
        )[0]
        valid_ids = []
        valid_labels = set(self.get_valid_labels())
        textblock_idx = -1
        for idx, name in result.names.items():
            if CLS_MAP[name] in valid_labels:
                valid_ids.append(idx)
            if name == 'qipao':
                textblock_idx = idx
        need_textblock = self.params['label']['value']['textblock'] == True

        mask = np.zeros_like(img[..., 0])
        if len(valid_ids) == 0:
        if len(valid_ids) == 0 and not need_textblock:
            return [], mask

        im_h, im_w = img.shape[:2]
        pts_list = []

        blk_list = []

        dets = result.boxes
        if dets is not None and len(dets.cls) > 0:
            device = dets.cls.device
@@ -126,6 +137,51 @@ class YSGYoloDetector(TextDetectorBase):
                xyxy_list[:, [2, 3]] -= xyxy_list[:, [0, 1]]
                pts_list += xywh2xyxypoly(xyxy_list).reshape(-1, 4, 2).tolist()
            
            if need_textblock:
                valid_mask = dets.cls == textblock_idx
                is_vertical = self.get_param_value('source text is vertical')
                if torch.any(valid_mask):
                    xyxy_list = dets.xyxy[valid_mask]
                    xyxy_list = xyxy_list.to(device='cpu', dtype=torch.float32).round().to(torch.int32)
                    xyxy_list[:, [0, 2]] = torch.clip(xyxy_list[:, [0, 2]], 0, im_w - 1)
                    xyxy_list[:, [1, 3]] = torch.clip(xyxy_list[:, [1, 3]], 0, im_h - 1)
                    xyxy_list = xyxy_list.numpy()
                    for xyxy in xyxy_list:
                        x1, y1, x2, y2 = xyxy
                        crop = img[y1: y2, x1: x2]
                        bmask  = canny_flood(crop)[0]
                        if is_vertical:
                            span_list = manga_split(bmask)
                            lines = [[line.left + x1, line.top + y1, line.width, line.height] for line in span_list]
                            lines = np.array(lines)[::-1]
                            font_sz = np.mean(lines[:, 2])
                        else:
                            span_list = split_textblock(bmask)[0]
                            lines = [[line.left + x1, line.top + y1, line.width, line.height] for line in span_list]
                            lines = np.array(lines)
                            font_sz = np.mean(lines[:, 3])
                        for line in lines:
                            x1, y1, x2, y2 = line
                            x2 += x1
                            y2 += y1
                            cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
                        lines = xywh2xyxypoly(lines).reshape(-1, 4, 2).tolist()
                        blk = TextBlock(xyxy=xyxy, lines=np.array(lines), src_is_vertical=is_vertical, vertical=is_vertical)
                        blk.font_size = font_sz
                        blk._detected_font_size = font_sz
                        if is_vertical:
                            blk.alignment = 1
                        else:
                            blk.recalulate_alignment()

                        blk_list.append(blk)
                        
                        # cv2.imwrite('mask.jpg', mask)
                        # for ii in range(len(blk.lines)):
                        #     rst = blk.get_transformed_region(img, ii, 48)
                        #     cv2.imwrite('local_tst.jpg', rst)
                        #     pass

        # oriented objects
        dets = result.obb
        if dets is not None and len(dets.cls) > 0:
@@ -143,7 +199,8 @@ class YSGYoloDetector(TextDetectorBase):
                    cv2.fillPoly(mask, [pts], 255)
                pts_list += xyxy_list.tolist()

        blk_list: List[TextBlock] = mit_merge_textlines(pts_list, width=im_w, height=im_h)
        blk_list += mit_merge_textlines(pts_list, width=im_w, height=im_h)
        blk_list = sort_regions(blk_list)

        fnt_rsz = self.get_param_value('font size multiplier')
        fnt_max = self.get_param_value('font size max')
@@ -157,6 +214,11 @@ class YSGYoloDetector(TextDetectorBase):
            blk.font_size = sz
            blk._detected_font_size = sz

        ksize = self.get_param_value('mask dilate size')
        if ksize > 0:
            element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * ksize + 1, 2 * ksize + 1),(ksize, ksize))
            mask = cv2.dilate(mask, element)
            
        return mask, blk_list

    def updateParam(self, param_key: str, param_content):
Loading