Commit 30015313 authored by dmMaze's avatar dmMaze
Browse files

set internal ndarray image channel order to RGB

parent 72caedb6
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -69,9 +69,9 @@ class InpainterBase(BaseModule):
                    non_text_region = np.where(non_text_msk > 0)
                    non_text_px = img[non_text_region]
                    average_bg_color = np.median(non_text_px, axis=0)
                    std_bgr = np.std(non_text_px - average_bg_color, axis=0)
                    std_max = np.max(std_bgr)
                    inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10
                    std_rgb = np.std(non_text_px - average_bg_color, axis=0)
                    std_max = np.max(std_rgb)
                    inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10
                    if std_max < inpaint_thresh:
                        img = img.copy()
                        img[np.where(ballon_msk > 0)] = average_bg_color
@@ -92,9 +92,9 @@ class InpainterBase(BaseModule):
                        non_text_region = np.where(non_text_msk > 0)
                        non_text_px = im[non_text_region]
                        average_bg_color = np.median(non_text_px, axis=0)
                        std_bgr = np.std(non_text_px - average_bg_color, axis=0)
                        std_max = np.max(std_bgr)
                        inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10
                        std_rgb = np.std(non_text_px - average_bg_color, axis=0)
                        std_max = np.max(std_rgb)
                        inpaint_thresh = 7 if np.std(std_rgb) > 1 else 10
                        if std_max < inpaint_thresh:
                            need_inpaint = False
                            im[np.where(ballon_msk > 0)] = average_bg_color
+2 −3
Original line number Diff line number Diff line
@@ -602,10 +602,9 @@ class OCR32pxModel:
                    seq.append(ch)
                    
                textblk.text.append(''.join(seq))
                # manga-image-translator ocr extract bgr instead of rgb
                textblk.update_font_colors(
                    [fb, fg, fr],
                    [bb, bg, br]
                    [fr, fg, fb],
                    [br, bg, bb]
                )
            chunck_idx += N

+2 −2
Original line number Diff line number Diff line
@@ -464,8 +464,8 @@ class OCR48pxCTC:
                    continue
                textblk.text.append(''.join(cur_texts))
                textblk.update_font_colors(
                    [int(total_fb()), int(total_fg()), int(total_fr())],
                    [int(total_bb()), int(total_bg()), int(total_br())]
                    [int(total_fr()), int(total_fg()), int(total_fb())],
                    [int(total_br()), int(total_bg()), int(total_bb())]
                )
            chunck_idx += N

+1 −1
Original line number Diff line number Diff line
@@ -471,7 +471,7 @@ class OCRLensAPI_exp(OCRBase):

        full_text = ""
        try:
            pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            pil_img = Image.fromarray(img)
            processed_bytes, width, height = _preprocess_image_for_lens(pil_img)

            if not processed_bytes:
+3 −14
Original line number Diff line number Diff line
@@ -150,18 +150,7 @@ class OcrEngine:
    def recognize_pil(self, image: PilImage.Image):
        if image.mode != 'RGBA':
            image = image.convert('RGBA')
        try:
            r, g, b, a = image.split()
            # Try BGRA reorder needed by some libs
            bgra_image = PilImage.merge('RGBA', (b, g, r, a))
        except ValueError:
            if self.logger:
                self.logger.warning(
                    "Could not split RGBA, trying via RGB->BGRA.")
            rgb = image.convert('RGB')
            r, g, b = rgb.split()
            bgra_image = PilImage.merge('RGB', (b, g, r)).convert('RGBA')
        return self._process_image(cols=bgra_image.width, rows=bgra_image.height, step=bgra_image.width*4, data=bgra_image.tobytes())
        return self._process_image(cols=image.width, rows=image.height, step=image.width*4, data=image.tobytes())

    def _process_image(self, cols, rows, step, data):
        dp = ctypes.cast(data, c_ubyte_p) if not isinstance(
@@ -364,9 +353,9 @@ class OCROneAPI(OCRBase):
            if len(img_to_process.shape) == 2:
                img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_GRAY2RGB)
            elif img_to_process.shape[2] == 3:
                img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGR2RGB)
                img_rgb = img_to_process
            elif img_to_process.shape[2] == 4:
                img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_BGRA2RGB)
                img_rgb = cv2.cvtColor(img_to_process, cv2.COLOR_RGBA2RGB)
            else:
                raise ValueError(
                    f"Unsupported channels: {img_to_process.shape[2]}")
Loading