Unverified Commit ecebde29 authored by Sergey Pinus's avatar Sergey Pinus Committed by GitHub
Browse files

Merge branch 'dmMaze:dev' into dev

parents 5f4b0daa 5f154f0a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -151,7 +151,7 @@ This project is heavily dependent upon [manga-image-translator](https://github.c
   * When using the Tuanzi Detector for text detection, it is recommended to set OCR to none_ocr to directly read the text, saving time and reducing the number of requests.
   * For detailed instructions, see **Tuanzi OCR Instructions**: ([Chinese](doc/团子OCR说明.md) & [Brazilian Portuguese](doc/Manual_TuanziOCR_pt-BR.md) only)
* Added as an "optional" PaddleOCR module. In Debug mode you will see a message stating that it is not there. You can simply install it by following the instructions described there. If you don’t want to install the package yourself, just uncomment (remove the `#`) the lines with paddlepaddle(gpu) and paddleocr. Bet everything at your own peril andrisk. For me (bropines) and two testers, everything was installed fine, you may have an error. Write about it in issue and tag me.
* Added [OneOCR](https://github.com/b1tg/win11-oneocr). Local WINDOWS model taken from SnippingTOOL or Win.PHOTOS applications. To use it, you need to place the model and DLL files in the 'data/models/one-ocr' folder. Before running, it is better to throw the files at once. Read how to find and get DLL and model files here: https://gist.github.com/bropines/063b822e6eb274151c512ef7a311f259. Thanks AuroraWright for the project [OneOCR](https://github.com/AuroraWright/oneocr)
* Added [OneOCR](https://github.com/b1tg/win11-oneocr). Local WINDOWS model taken from SnippingTOOL or Win.PHOTOS applications. To use it, you need to place the model and DLL files in the 'data/models/one-ocr' folder. Before running, it is better to throw the files at once. Read how to find and get DLL and model files here: https://github.com/dmMaze/BallonsTranslator/discussions/859#discussioncomment-12876757 . Thanks AuroraWright for the project [OneOCR](https://github.com/AuroraWright/oneocr)

## Inpainting
  * AOT is from [manga-image-translator](https://github.com/zyddnys/manga-image-translator).
+10 −2
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ import numpy as np
import cv2

from .base import register_textdetectors, TextDetectorBase, TextBlock, DEVICE_SELECTOR
from utils.textblock import mit_merge_textlines, sort_regions
from utils.textblock import mit_merge_textlines, sort_regions, examine_textblk
from utils.textblock_mask import canny_flood
from utils.split_text_region import manga_split, split_textblock
from utils.imgproc_utils import xywh2xyxypoly
@@ -50,6 +50,7 @@ class YSGYoloDetector(TextDetectorBase):
            'path_filter': '*.pt *.ckpt *.pth *.safetensors',
            'size': 'median'
        },
        'merge text lines': True,
        'confidence threshold': 0.3,
        'IoU threshold': 0.5,
        'font size multiplier': 1.,
@@ -199,7 +200,14 @@ class YSGYoloDetector(TextDetectorBase):
                    cv2.fillPoly(mask, [pts], 255)
                pts_list += xyxy_list.tolist()

        if self.get_param_value('merge text lines'):
            blk_list += mit_merge_textlines(pts_list, width=im_w, height=im_h)
        else:
            for pts in pts_list:
                blk = TextBlock(lines=[pts])
                blk.adjust_bbox()
                examine_textblk(blk, im_w, im_h)
                blk_list.append(blk)
        blk_list = sort_regions(blk_list)

        fnt_rsz = self.get_param_value('font size multiplier')
+14 −9
Original line number Diff line number Diff line
@@ -227,7 +227,7 @@ def split_textblock(src_img, crop_ratio=0.2, blur=False, show_process=False, dis
    vars = (-1, -1)
    
    if len(bound0) < 2:
        return [TextSpan(0, height-1)], vars
        return [TextSpan(0, height-1, 0, width - 1)], vars

    base_span = TextSpan(bound0[0], bound0[-1])
    meanby_yaxis = sumby_yaxis.mean()
@@ -304,19 +304,24 @@ def manga_split(img, bbox=None, show_process=False, clip_width=False) -> list[Te

    span_list, _ = split_textblock(im, show_process=show_process, shrink=False, recheck=True, discard=False, crop_ratio=0)
    if span_list is None:
        return bboxes
    span_list, _ = shrink_span_list(im, span_list, shrink_vert_space=False)
        return [TextSpan(0, 0, im.shape[1], im.shape[0])]
    # span_list, _ = shrink_span_list(im, span_list, shrink_vert_space=False)
        
    for span in span_list:
    for ii, span in enumerate(span_list):
        left = span.left
        right = span.right
        if ii == 0:
            span.left = 0
        else:
            span.left = span.top
        if ii == len(span_list) - 1:
            span.right = im.shape[0]
        else:
            span.right = span.bottom
        span.top =  imw - right
        span.bottom = imw - left
        w, h = span.width, span.height
        span.height = w
        span.width = h
        span.height = span.bottom - span.top
        span.width = span.right - span.left

    return span_list