set internal ndarray image channel order to RGB (30015313) · Commits · git-mirror / BallonsTranslator

modules/inpaint/base.py

+6 −6

Original line number	Diff line number	Diff line
		@@ -69,9 +69,9 @@ class InpainterBase(BaseModule):
		non_text_region = np.where(non_text_msk > 0)
		non_text_px = img[non_text_region]
		average_bg_color = np.median(non_text_px, axis=0)
		std_bgr = np.std(non_text_px - average_bg_color, axis=0)
		std_max = np.max(std_bgr)
		inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10
		std_rgb = np.std(non_text_px - average_bg_color, axis=0)
		std_max = np.max(std_rgb)
		inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10
		if std_max < inpaint_thresh:
		img = img.copy()
		img[np.where(ballon_msk > 0)] = average_bg_color
		@@ -92,9 +92,9 @@ class InpainterBase(BaseModule):
		non_text_region = np.where(non_text_msk > 0)
		non_text_px = im[non_text_region]
		average_bg_color = np.median(non_text_px, axis=0)
		std_bgr = np.std(non_text_px - average_bg_color, axis=0)
		std_max = np.max(std_bgr)
		inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10
		std_rgb = np.std(non_text_px - average_bg_color, axis=0)
		std_max = np.max(std_rgb)
		inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10
		if std_max < inpaint_thresh:
		need_inpaint = False
		im[np.where(ballon_msk > 0)] = average_bg_color

+2 −3

Original line number	Diff line number	Diff line
		@@ -602,10 +602,9 @@ class OCR32pxModel:
		seq.append(ch)

		textblk.text.append(''.join(seq))
		# manga-image-translator ocr extract bgr instead of rgb
		textblk.update_font_colors(
		[fb, fg, fr],
		[bb, bg, br]
		[fr, fg, fb],
		[br, bg, bb]
		)
		chunck_idx += N

+2 −2

Original line number	Diff line number	Diff line
		@@ -464,8 +464,8 @@ class OCR48pxCTC:
		continue
		textblk.text.append(''.join(cur_texts))
		textblk.update_font_colors(
		[int(total_fb()), int(total_fg()), int(total_fr())],
		[int(total_bb()), int(total_bg()), int(total_br())]
		[int(total_fr()), int(total_fg()), int(total_fb())],
		[int(total_br()), int(total_bg()), int(total_bb())]
		)
		chunck_idx += N

+1 −1

Original line number	Diff line number	Diff line
		@@ -471,7 +471,7 @@ class OCRLensAPI_exp(OCRBase):

		full_text = ""
		try:
		pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
		pil_img = Image.fromarray(img)
		processed_bytes, width, height = _preprocess_image_for_lens(pil_img)

		if not processed_bytes:

+3 −14

Original line number	Diff line number	Diff line
		@@ -150,18 +150,7 @@ class OcrEngine:
		def recognize_pil(self, image: PilImage.Image):
		if image.mode != 'RGBA':
		image = image.convert('RGBA')
		try:
		r, g, b, a = image.split()
		# Try BGRA reorder needed by some libs
		bgra_image = PilImage.merge('RGBA', (b, g, r, a))
		except ValueError:
		if self.logger:
		self.logger.warning(
		"Could not split RGBA, trying via RGB->BGRA.")
		rgb = image.convert('RGB')
		r, g, b = rgb.split()
		bgra_image = PilImage.merge('RGB', (b, g, r)).convert('RGBA')
		return self._process_image(cols=bgra_image.width, rows=bgra_image.height, step=bgra_image.width*4, data=bgra_image.tobytes())
		return self._process_image(cols=image.width, rows=image.height, step=image.width*4, data=image.tobytes())

		def _process_image(self, cols, rows, step, data):
		dp = ctypes.cast(data, c_ubyte_p) if not isinstance(
		@@ -364,9 +353,9 @@ class OCROneAPI(OCRBase):
		if len(img_to_process.shape) == 2:
		img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_GRAY2RGB)
		elif img_to_process.shape[2] == 3:
		img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGR2RGB)
		img_rgb = img_to_process
		elif img_to_process.shape[2] == 4:
		img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGRA2RGB)
		img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_RGBA2RGB)
		else:
		raise ValueError(
		f"Unsupported channels: {img_to_process.shape[2]}")