Loading modules/inpaint/base.py +6 −6 Original line number Diff line number Diff line Loading @@ -69,9 +69,9 @@ class InpainterBase(BaseModule): non_text_region = np.where(non_text_msk > 0) non_text_px = img[non_text_region] average_bg_color = np.median(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 std_rgb = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_rgb) inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10 if std_max < inpaint_thresh: img = img.copy() img[np.where(ballon_msk > 0)] = average_bg_color Loading @@ -92,9 +92,9 @@ class InpainterBase(BaseModule): non_text_region = np.where(non_text_msk > 0) non_text_px = im[non_text_region] average_bg_color = np.median(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 std_rgb = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_rgb) inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10 if std_max < inpaint_thresh: need_inpaint = False im[np.where(ballon_msk > 0)] = average_bg_color Loading modules/ocr/mit32px.py +2 −3 Original line number Diff line number Diff line Loading @@ -602,10 +602,9 @@ class OCR32pxModel: seq.append(ch) textblk.text.append(''.join(seq)) # manga-image-translator ocr extract bgr instead of rgb textblk.update_font_colors( [fb, fg, fr], [bb, bg, br] [fr, fg, fb], [br, bg, bb] ) chunck_idx += N Loading modules/ocr/mit48px_ctc.py +2 −2 Original line number Diff line number Diff line Loading @@ -464,8 +464,8 @@ class OCR48pxCTC: continue textblk.text.append(''.join(cur_texts)) textblk.update_font_colors( [int(total_fb()), int(total_fg()), int(total_fr())], [int(total_bb()), int(total_bg()), int(total_br())] [int(total_fr()), int(total_fg()), int(total_fb())], [int(total_br()), int(total_bg()), int(total_bb())] ) chunck_idx += N Loading modules/ocr/ocr_lens_proto.py +1 −1 Original line number Diff line number Diff line Loading @@ -471,7 +471,7 @@ class OCRLensAPI_exp(OCRBase): full_text = "" try: pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) pil_img = Image.fromarray(img) processed_bytes, width, height = _preprocess_image_for_lens(pil_img) if not processed_bytes: Loading modules/ocr/ocr_oneocr.py +3 −14 Original line number Diff line number Diff line Loading @@ -150,18 +150,7 @@ class OcrEngine: def recognize_pil(self, image: PilImage.Image): if image.mode != 'RGBA': image = image.convert('RGBA') try: r, g, b, a = image.split() # Try BGRA reorder needed by some libs bgra_image = PilImage.merge('RGBA', (b, g, r, a)) except ValueError: if self.logger: self.logger.warning( "Could not split RGBA, trying via RGB->BGRA.") rgb = image.convert('RGB') r, g, b = rgb.split() bgra_image = PilImage.merge('RGB', (b, g, r)).convert('RGBA') return self._process_image(cols=bgra_image.width, rows=bgra_image.height, step=bgra_image.width*4, data=bgra_image.tobytes()) return self._process_image(cols=image.width, rows=image.height, step=image.width*4, data=image.tobytes()) def _process_image(self, cols, rows, step, data): dp = ctypes.cast(data, c_ubyte_p) if not isinstance( Loading Loading @@ -364,9 +353,9 @@ class OCROneAPI(OCRBase): if len(img_to_process.shape) == 2: img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_GRAY2RGB) elif img_to_process.shape[2] == 3: img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGR2RGB) img_rgb = img_to_process elif img_to_process.shape[2] == 4: img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGRA2RGB) img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_RGBA2RGB) else: raise ValueError( f"Unsupported channels: {img_to_process.shape[2]}") Loading Loading
modules/inpaint/base.py +6 −6 Original line number Diff line number Diff line Loading @@ -69,9 +69,9 @@ class InpainterBase(BaseModule): non_text_region = np.where(non_text_msk > 0) non_text_px = img[non_text_region] average_bg_color = np.median(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 std_rgb = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_rgb) inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10 if std_max < inpaint_thresh: img = img.copy() img[np.where(ballon_msk > 0)] = average_bg_color Loading @@ -92,9 +92,9 @@ class InpainterBase(BaseModule): non_text_region = np.where(non_text_msk > 0) non_text_px = im[non_text_region] average_bg_color = np.median(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 std_rgb = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_rgb) inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10 if std_max < inpaint_thresh: need_inpaint = False im[np.where(ballon_msk > 0)] = average_bg_color Loading
modules/ocr/mit32px.py +2 −3 Original line number Diff line number Diff line Loading @@ -602,10 +602,9 @@ class OCR32pxModel: seq.append(ch) textblk.text.append(''.join(seq)) # manga-image-translator ocr extract bgr instead of rgb textblk.update_font_colors( [fb, fg, fr], [bb, bg, br] [fr, fg, fb], [br, bg, bb] ) chunck_idx += N Loading
modules/ocr/mit48px_ctc.py +2 −2 Original line number Diff line number Diff line Loading @@ -464,8 +464,8 @@ class OCR48pxCTC: continue textblk.text.append(''.join(cur_texts)) textblk.update_font_colors( [int(total_fb()), int(total_fg()), int(total_fr())], [int(total_bb()), int(total_bg()), int(total_br())] [int(total_fr()), int(total_fg()), int(total_fb())], [int(total_br()), int(total_bg()), int(total_bb())] ) chunck_idx += N Loading
modules/ocr/ocr_lens_proto.py +1 −1 Original line number Diff line number Diff line Loading @@ -471,7 +471,7 @@ class OCRLensAPI_exp(OCRBase): full_text = "" try: pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) pil_img = Image.fromarray(img) processed_bytes, width, height = _preprocess_image_for_lens(pil_img) if not processed_bytes: Loading
modules/ocr/ocr_oneocr.py +3 −14 Original line number Diff line number Diff line Loading @@ -150,18 +150,7 @@ class OcrEngine: def recognize_pil(self, image: PilImage.Image): if image.mode != 'RGBA': image = image.convert('RGBA') try: r, g, b, a = image.split() # Try BGRA reorder needed by some libs bgra_image = PilImage.merge('RGBA', (b, g, r, a)) except ValueError: if self.logger: self.logger.warning( "Could not split RGBA, trying via RGB->BGRA.") rgb = image.convert('RGB') r, g, b = rgb.split() bgra_image = PilImage.merge('RGB', (b, g, r)).convert('RGBA') return self._process_image(cols=bgra_image.width, rows=bgra_image.height, step=bgra_image.width*4, data=bgra_image.tobytes()) return self._process_image(cols=image.width, rows=image.height, step=image.width*4, data=image.tobytes()) def _process_image(self, cols, rows, step, data): dp = ctypes.cast(data, c_ubyte_p) if not isinstance( Loading Loading @@ -364,9 +353,9 @@ class OCROneAPI(OCRBase): if len(img_to_process.shape) == 2: img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_GRAY2RGB) elif img_to_process.shape[2] == 3: img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGR2RGB) img_rgb = img_to_process elif img_to_process.shape[2] == 4: img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGRA2RGB) img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_RGBA2RGB) else: raise ValueError( f"Unsupported channels: {img_to_process.shape[2]}") Loading