Merge branch 'dmMaze:dev' into dev (ecebde29) · Commits · git-mirror / BallonsTranslator

README_EN.md

+1 −1

Original line number	Diff line number	Diff line
		@@ -151,7 +151,7 @@ This project is heavily dependent upon [manga-image-translator](https://github.c
		* When using the Tuanzi Detector for text detection, it is recommended to set OCR to none_ocr to directly read the text, saving time and reducing the number of requests.
		* For detailed instructions, see Tuanzi OCR Instructions: ([Chinese](doc/团子OCR说明.md) & [Brazilian Portuguese](doc/Manual_TuanziOCR_pt-BR.md) only)
		* Added as an "optional" PaddleOCR module. In Debug mode you will see a message stating that it is not there. You can simply install it by following the instructions described there. If you don’t want to install the package yourself, just uncomment (remove the `#`) the lines with paddlepaddle(gpu) and paddleocr. Bet everything at your own peril andrisk. For me (bropines) and two testers, everything was installed fine, you may have an error. Write about it in issue and tag me.
		* Added [OneOCR](https://github.com/b1tg/win11-oneocr). Local WINDOWS model taken from SnippingTOOL or Win.PHOTOS applications. To use it, you need to place the model and DLL files in the 'data/models/one-ocr' folder. Before running, it is better to throw the files at once. Read how to find and get DLL and model files here: https://gist.github.com/bropines/063b822e6eb274151c512ef7a311f259. Thanks AuroraWright for the project [OneOCR](https://github.com/AuroraWright/oneocr)
		* Added [OneOCR](https://github.com/b1tg/win11-oneocr). Local WINDOWS model taken from SnippingTOOL or Win.PHOTOS applications. To use it, you need to place the model and DLL files in the 'data/models/one-ocr' folder. Before running, it is better to throw the files at once. Read how to find and get DLL and model files here: https://github.com/dmMaze/BallonsTranslator/discussions/859#discussioncomment-12876757 . Thanks AuroraWright for the project [OneOCR](https://github.com/AuroraWright/oneocr)

		## Inpainting
		* AOT is from [manga-image-translator](https://github.com/zyddnys/manga-image-translator).

modules/textdetector/detector_ysg.py

+10 −2

Original line number	Diff line number	Diff line
		@@ -7,7 +7,7 @@ import numpy as np
		import cv2

		from .base import register_textdetectors, TextDetectorBase, TextBlock, DEVICE_SELECTOR
		from utils.textblock import mit_merge_textlines, sort_regions
		from utils.textblock import mit_merge_textlines, sort_regions, examine_textblk
		from utils.textblock_mask import canny_flood
		from utils.split_text_region import manga_split, split_textblock
		from utils.imgproc_utils import xywh2xyxypoly
		@@ -50,6 +50,7 @@ class YSGYoloDetector(TextDetectorBase):
		'path_filter': '.pt .ckpt .pth .safetensors',
		'size': 'median'
		},
		'merge text lines': True,
		'confidence threshold': 0.3,
		'IoU threshold': 0.5,
		'font size multiplier': 1.,
		@@ -199,7 +200,14 @@ class YSGYoloDetector(TextDetectorBase):
		cv2.fillPoly(mask, [pts], 255)
		pts_list += xyxy_list.tolist()

		if self.get_param_value('merge text lines'):
		blk_list += mit_merge_textlines(pts_list, width=im_w, height=im_h)
		else:
		for pts in pts_list:
		blk = TextBlock(lines=[pts])
		blk.adjust_bbox()
		examine_textblk(blk, im_w, im_h)
		blk_list.append(blk)
		blk_list = sort_regions(blk_list)

		fnt_rsz = self.get_param_value('font size multiplier')

utils/split_text_region.py

+14 −9

Original line number	Diff line number	Diff line
		@@ -227,7 +227,7 @@ def split_textblock(src_img, crop_ratio=0.2, blur=False, show_process=False, dis
		vars = (-1, -1)

		if len(bound0) < 2:
		return [TextSpan(0, height-1)], vars
		return [TextSpan(0, height-1, 0, width - 1)], vars

		base_span = TextSpan(bound0[0], bound0[-1])
		meanby_yaxis = sumby_yaxis.mean()
		@@ -304,19 +304,24 @@ def manga_split(img, bbox=None, show_process=False, clip_width=False) -> list[Te

		span_list, _ = split_textblock(im, show_process=show_process, shrink=False, recheck=True, discard=False, crop_ratio=0)
		if span_list is None:
		return bboxes
		span_list, _ = shrink_span_list(im, span_list, shrink_vert_space=False)
		return [TextSpan(0, 0, im.shape[1], im.shape[0])]
		# span_list, _ = shrink_span_list(im, span_list, shrink_vert_space=False)

		for span in span_list:
		for ii, span in enumerate(span_list):
		left = span.left
		right = span.right
		if ii == 0:
		span.left = 0
		else:
		span.left = span.top
		if ii == len(span_list) - 1:
		span.right = im.shape[0]
		else:
		span.right = span.bottom
		span.top = imw - right
		span.bottom = imw - left
		w, h = span.width, span.height
		span.height = w
		span.width = h
		span.height = span.bottom - span.top
		span.width = span.right - span.left

		return span_list