Loading modules/base.py +42 −1 Original line number Diff line number Diff line import gc import os from typing import Dict import time from typing import Dict, List, Callable, Union from copy import deepcopy from collections import OrderedDict from utils.logger import logger as LOGGER GPUINTENSIVE_SET = {'cuda', 'mps'} def register_hooks(hooks_registered: OrderedDict, callbacks: Union[List, Callable, Dict]): if callbacks is None: return if isinstance(callbacks, (Dict, OrderedDict)): for k, v in callbacks.items(): hooks_registered[k] = v else: nhooks = len(hooks_registered) if isinstance(callbacks, Callable): callbacks = [callbacks] for callback in callbacks: hk = 'hook_' + str(nhooks).zfill(2) while True: if hk not in hooks_registered: break hk = hk + '_' + str(time.time_ns()) hooks_registered[hk] = callback nhooks += 1 class BaseModule: params: Dict = None logger = LOGGER _preprocess_hooks: OrderedDict = None _postprocess_hooks: OrderedDict = None def __init__(self, **params) -> None: if params: if self.params is None: Loading @@ -19,6 +44,22 @@ class BaseModule: else: self.params.update(params) @classmethod def register_postprocess_hooks(cls, callbacks: Union[List, Callable]): """ these hooks would be shared among all objects inherited from the same super class """ assert cls._postprocess_hooks is not None register_hooks(cls._postprocess_hooks, callbacks) @classmethod def register_preprocess_hooks(cls, callbacks: Union[List, Callable, Dict]): """ these hooks would be shared among all objects inherited from the same super class """ assert cls._preprocess_hooks is not None register_hooks(cls._preprocess_hooks, callbacks) def updateParam(self, param_key: str, param_content): self_param_content = self.params[param_key] if isinstance(self_param_content, (str, float, int)): Loading modules/inpaint/__init__.py +1 −430 Original line number Diff line number Diff line import numpy as np import cv2 from typing import Dict, List from utils.registry import Registry from utils.textblock_mask import extract_ballon_mask from utils.imgproc_utils import enlarge_window from ..base import BaseModule, DEFAULT_DEVICE, gc_collect, DEVICE_SELECTOR, GPUINTENSIVE_SET from ..textdetector import TextBlock INPAINTERS = Registry('inpainters') register_inpainter = INPAINTERS.register_module class InpainterBase(BaseModule): inpaint_by_block = True check_need_inpaint = True def __init__(self, **params) -> None: super().__init__(**params) self.name = '' for key in INPAINTERS.module_dict: if INPAINTERS.module_dict[key] == self.__class__: self.name = key break self.setup_inpainter() def setup_inpainter(self): raise NotImplementedError def inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None, check_need_inpaint: bool = False) -> np.ndarray: if not self.inpaint_by_block or textblock_list is None: if check_need_inpaint: ballon_msk, non_text_msk = extract_ballon_mask(img, mask) if ballon_msk is not None: non_text_region = np.where(non_text_msk > 0) non_text_px = img[non_text_region] average_bg_color = np.mean(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 if std_max < inpaint_thresh: img = img.copy() img[np.where(ballon_msk > 0)] = average_bg_color return img try: return self._inpaint(img, mask) except Exception as e: if isinstance(e, torch.cuda.OutOfMemoryError): gc_collect() return self._inpaint(img, mask) else: raise e else: im_h, im_w = img.shape[:2] inpainted = np.copy(img) for blk in textblock_list: xyxy = blk.xyxy xyxy_e = enlarge_window(xyxy, im_w, im_h, ratio=1.7) im = inpainted[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] msk = mask[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] need_inpaint = True if self.check_need_inpaint or check_need_inpaint: ballon_msk, non_text_msk = extract_ballon_mask(im, msk) if ballon_msk is not None: non_text_region = np.where(non_text_msk > 0) non_text_px = im[non_text_region] average_bg_color = np.mean(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 if std_max < inpaint_thresh: need_inpaint = False im[np.where(ballon_msk > 0)] = average_bg_color # cv2.imshow('im', im) # cv2.imshow('ballon', ballon_msk) # cv2.imshow('non_text', non_text_msk) # cv2.waitKey(0) if need_inpaint: inpainted[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] = self._inpaint(im, msk) mask[xyxy[1]:xyxy[3], xyxy[0]:xyxy[2]] = 0 return inpainted def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: raise NotImplementedError @register_inpainter('opencv-tela') class OpenCVInpainter(InpainterBase): def setup_inpainter(self): self.inpaint_method = lambda img, mask, *args, **kwargs: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS) def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: return self.inpaint_method(img, mask) def is_computational_intensive(self) -> bool: return True def is_cpu_intensive(self) -> bool: return True @register_inpainter('patchmatch') class PatchmatchInpainter(InpainterBase): def setup_inpainter(self): from . import patch_match self.inpaint_method = lambda img, mask, *args, **kwargs: patch_match.inpaint(img, mask, patch_size=3) def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: return self.inpaint_method(img, mask) def is_computational_intensive(self) -> bool: return True def is_cpu_intensive(self) -> bool: return True import torch from utils.imgproc_utils import resize_keepasp from .aot import AOTGenerator, load_aot_model AOTMODEL: AOTGenerator = None AOTMODEL_PATH = 'data/models/aot_inpainter.ckpt' @register_inpainter('aot') class AOTInpainter(InpainterBase): params = { 'inpaint_size': { 'type': 'selector', 'options': [ 1024, 2048 ], 'select': 2048 }, 'device': DEVICE_SELECTOR(), 'description': 'manga-image-translator inpainter' } device = DEFAULT_DEVICE inpaint_size = 2048 model: AOTGenerator = None def setup_inpainter(self): global AOTMODEL self.device = self.params['device']['select'] if AOTMODEL is None: self.model = AOTMODEL = load_aot_model(AOTMODEL_PATH, self.device) else: self.model = AOTMODEL self.model.to(self.device) self.inpaint_by_block = self.device not in GPUINTENSIVE_SET self.inpaint_size = int(self.params['inpaint_size']['select']) def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray: img_original = np.copy(img) mask_original = np.copy(mask) mask_original[mask_original < 127] = 0 mask_original[mask_original >= 127] = 1 mask_original = mask_original[:, :, None] new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None img = resize_keepasp(img, new_shape, stride=None) mask = resize_keepasp(mask, new_shape, stride=None) im_h, im_w = img.shape[:2] pad_bottom = 128 - im_h if im_h < 128 else 0 pad_right = 128 - im_w if im_w < 128 else 0 mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 127.5 - 1.0 mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0 mask_torch[mask_torch < 0.5] = 0 mask_torch[mask_torch >= 0.5] = 1 if self.device != 'cpu': img_torch = img_torch.to(self.device) mask_torch = mask_torch.to(self.device) img_torch *= (1 - mask_torch) return img_torch, mask_torch, img_original, mask_original, pad_bottom, pad_right @torch.no_grad() def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: im_h, im_w = img.shape[:2] img_torch, mask_torch, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask) img_inpainted_torch = self.model(img_torch, mask_torch) img_inpainted = ((img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() + 1.0) * 127.5).astype(np.uint8) if pad_bottom > 0: img_inpainted = img_inpainted[:-pad_bottom] if pad_right > 0: img_inpainted = img_inpainted[:, :-pad_right] new_shape = img_inpainted.shape[:2] if new_shape[0] != im_h or new_shape[1] != im_w : img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR) img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original) return img_inpainted def updateParam(self, param_key: str, param_content): super().updateParam(param_key, param_content) if param_key == 'device': param_device = self.params['device']['select'] self.model.to(param_device) self.device = param_device self.inpaint_by_block = param_device not in GPUINTENSIVE_SET elif param_key == 'inpaint_size': self.inpaint_size = int(self.params['inpaint_size']['select']) from .lama import LamaFourier, load_lama_mpe LAMA_MPE: LamaFourier = None @register_inpainter('lama_mpe') class LamaInpainterMPE(InpainterBase): params = { 'inpaint_size': { 'type': 'selector', 'options': [ 1024, 2048 ], 'select': 2048 }, 'device': DEVICE_SELECTOR() } device = DEFAULT_DEVICE inpaint_size = 2048 def setup_inpainter(self): global LAMA_MPE self.device = self.params['device']['select'] if LAMA_MPE is None: self.model = LAMA_MPE = load_lama_mpe(r'data/models/lama_mpe.ckpt', self.device) else: self.model = LAMA_MPE self.model.to(self.device) self.inpaint_by_block = self.device not in GPUINTENSIVE_SET self.inpaint_size = int(self.params['inpaint_size']['select']) def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray: img_original = np.copy(img) mask_original = np.copy(mask) mask_original[mask_original < 127] = 0 mask_original[mask_original >= 127] = 1 mask_original = mask_original[:, :, None] new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None # high resolution input could produce cloudy artifacts img = resize_keepasp(img, new_shape, stride=64) mask = resize_keepasp(mask, new_shape, stride=64) im_h, im_w = img.shape[:2] longer = max(im_h, im_w) pad_bottom = longer - im_h if im_h < longer else 0 pad_right = longer - im_w if im_w < longer else 0 mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 255.0 mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0 mask_torch[mask_torch < 0.5] = 0 mask_torch[mask_torch >= 0.5] = 1 rel_pos, _, direct = self.model.load_masked_position_encoding(mask_torch[0][0].numpy()) rel_pos = torch.LongTensor(rel_pos).unsqueeze_(0) direct = torch.LongTensor(direct).unsqueeze_(0) if self.device != 'cpu': img_torch = img_torch.to(self.device) mask_torch = mask_torch.to(self.device) rel_pos = rel_pos.to(self.device) direct = direct.to(self.device) img_torch *= (1 - mask_torch) return img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right @torch.no_grad() def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: im_h, im_w = img.shape[:2] img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask) img_inpainted_torch = self.model(img_torch, mask_torch, rel_pos, direct) img_inpainted = (img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() * 255).astype(np.uint8) if pad_bottom > 0: img_inpainted = img_inpainted[:-pad_bottom] if pad_right > 0: img_inpainted = img_inpainted[:, :-pad_right] new_shape = img_inpainted.shape[:2] if new_shape[0] != im_h or new_shape[1] != im_w : img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR) img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original) return img_inpainted def updateParam(self, param_key: str, param_content): super().updateParam(param_key, param_content) if param_key == 'device': param_device = self.params['device']['select'] self.model.to(param_device) self.device = param_device self.inpaint_by_block = param_device not in GPUINTENSIVE_SET elif param_key == 'inpaint_size': self.inpaint_size = int(self.params['inpaint_size']['select']) # LAMA_ORI: LamaFourier = None # @register_inpainter('lama_ori') # class LamaInpainterORI(InpainterBase): # params = { # 'inpaint_size': { # 'type': 'selector', # 'options': [ # 1024, # 2048 # ], # 'select': 2048 # }, # 'device': { # 'type': 'selector', # 'options': [ # 'cpu', # 'cuda' # ], # 'select': DEFAULT_DEVICE # } # } # device = DEFAULT_DEVICE # inpaint_size = 2048 # def setup_inpainter(self): # global LAMA_ORI # self.device = self.params['device']['select'] # if LAMA_ORI is None: # self.model = LAMA_ORI = load_lama_mpe(r'data/models/lama_org.ckpt', self.device, False) # else: # self.model = LAMA_ORI # self.model.to(self.device) # self.inpaint_by_block = True if self.device == 'cuda' else False # self.inpaint_size = int(self.params['inpaint_size']['select']) # def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray: # img_original = np.copy(img) # mask_original = np.copy(mask) # mask_original[mask_original < 127] = 0 # mask_original[mask_original >= 127] = 1 # mask_original = mask_original[:, :, None] # new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None # # high resolution input could produce cloudy artifacts # img = resize_keepasp(img, new_shape, stride=64) # mask = resize_keepasp(mask, new_shape, stride=64) # im_h, im_w = img.shape[:2] # longer = max(im_h, im_w) # pad_bottom = longer - im_h if im_h < longer else 0 # pad_right = longer - im_w if im_w < longer else 0 # mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) # img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) # img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 255.0 # mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0 # mask_torch[mask_torch < 0.5] = 0 # mask_torch[mask_torch >= 0.5] = 1 # rel_pos, _, direct = self.model.load_masked_position_encoding(mask_torch[0][0].numpy()) # rel_pos = torch.LongTensor(rel_pos).unsqueeze_(0) # direct = torch.LongTensor(direct).unsqueeze_(0) # if self.device == 'cuda': # img_torch = img_torch.cuda() # mask_torch = mask_torch.cuda() # rel_pos = rel_pos.cuda() # direct = direct.cuda() # img_torch *= (1 - mask_torch) # return img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right # @torch.no_grad() # def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: # im_h, im_w = img.shape[:2] # img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask) # img_inpainted_torch = self.model(img_torch, mask_torch, rel_pos, direct) # img_inpainted = (img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() * 255).astype(np.uint8) # if pad_bottom > 0: # img_inpainted = img_inpainted[:-pad_bottom] # if pad_right > 0: # img_inpainted = img_inpainted[:, :-pad_right] # new_shape = img_inpainted.shape[:2] # if new_shape[0] != im_h or new_shape[1] != im_w : # img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR) # img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original) # return img_inpainted # def updateParam(self, param_key: str, param_content): # super().updateParam(param_key, param_content) # if param_key == 'device': # param_device = self.params['device']['select'] # self.model.to(param_device) # self.device = param_device # if param_device == 'cuda': # self.inpaint_by_block = False # else: # self.inpaint_by_block = True # elif param_key == 'inpaint_size': # self.inpaint_size = int(self.params['inpaint_size']['select']) No newline at end of file from .base import * No newline at end of file modules/inpaint/base.py 0 → 100644 +435 −0 File added.Preview size limit exceeded, changes collapsed. Show changes modules/ocr/__init__.py +14 −19 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ from typing import Tuple, List, Dict, Union, Callable from ordered_set import OrderedSet import numpy as np import logging from collections import OrderedDict from ..textdetector.textblock import TextBlock Loading @@ -13,6 +14,9 @@ from ..base import BaseModule, DEFAULT_DEVICE, DEVICE_SELECTOR class OCRBase(BaseModule): _postprocess_hooks = OrderedDict() _preprocess_hooks = OrderedDict() def __init__(self, **params) -> None: super().__init__(**params) self.name = '' Loading @@ -20,7 +24,6 @@ class OCRBase(BaseModule): if OCR.module_dict[key] == self.__class__: self.name = key break self.postprocess_hooks: OrderedSet[Callable] = OrderedSet() self.setup_ocr() def setup_ocr(self): Loading @@ -29,8 +32,6 @@ class OCRBase(BaseModule): def run_ocr(self, img: np.ndarray, blk_list: List[TextBlock] = None) -> Union[List[TextBlock], str]: if blk_list is None: text = self.ocr_img(img) for callback in self.postprocess_hooks: text = callback(text) return text elif isinstance(blk_list, TextBlock): blk_list = [blk_list] Loading @@ -38,14 +39,16 @@ class OCRBase(BaseModule): for blk in blk_list: blk.text = [] self.ocr_blk_list(img, blk_list) for blk in blk_list: if isinstance(blk.text, List): for ii, t in enumerate(blk.text): for callback in self.postprocess_hooks: blk.text[ii] = callback(t, blk=blk) else: for callback in self.postprocess_hooks: blk.text = callback(blk.text, blk=blk) for callback_name, callback in self._postprocess_hooks.items(): callback(textblocks=blk_list, img=img, ocr_module=self) # for blk in blk_list: # if isinstance(blk.text, List): # for ii, t in enumerate(blk.text): # for callback in self.postprocess_hooks: # blk.text[ii] = callback(t, blk=blk) # else: # for callback in self.postprocess_hooks: # blk.text = callback(blk.text, blk=blk) return blk_list def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]) -> None: Loading @@ -54,14 +57,6 @@ class OCRBase(BaseModule): def ocr_img(self, img: np.ndarray) -> str: raise NotImplementedError def register_postprocess_hooks(self, callbacks: Union[List, Callable]): if callbacks is None: return if isinstance(callbacks, Callable): callbacks = [callbacks] for callback in callbacks: self.postprocess_hooks.add(callback) from .model_32px import OCR32pxModel OCR32PXMODEL: OCR32pxModel = None Loading modules/textdetector/__init__.py +5 −0 Original line number Diff line number Diff line import numpy as np import cv2 from typing import Dict, List, Tuple from collections import OrderedDict from .textblock import TextBlock from utils.registry import Registry Loading @@ -11,6 +13,9 @@ from ..base import BaseModule, DEFAULT_DEVICE, DEVICE_SELECTOR class TextDetectorBase(BaseModule): _postprocess_hooks = OrderedDict() _preprocess_hooks = OrderedDict() def __init__(self, **params) -> None: super().__init__(**params) self.name = '' Loading Loading
modules/base.py +42 −1 Original line number Diff line number Diff line import gc import os from typing import Dict import time from typing import Dict, List, Callable, Union from copy import deepcopy from collections import OrderedDict from utils.logger import logger as LOGGER GPUINTENSIVE_SET = {'cuda', 'mps'} def register_hooks(hooks_registered: OrderedDict, callbacks: Union[List, Callable, Dict]): if callbacks is None: return if isinstance(callbacks, (Dict, OrderedDict)): for k, v in callbacks.items(): hooks_registered[k] = v else: nhooks = len(hooks_registered) if isinstance(callbacks, Callable): callbacks = [callbacks] for callback in callbacks: hk = 'hook_' + str(nhooks).zfill(2) while True: if hk not in hooks_registered: break hk = hk + '_' + str(time.time_ns()) hooks_registered[hk] = callback nhooks += 1 class BaseModule: params: Dict = None logger = LOGGER _preprocess_hooks: OrderedDict = None _postprocess_hooks: OrderedDict = None def __init__(self, **params) -> None: if params: if self.params is None: Loading @@ -19,6 +44,22 @@ class BaseModule: else: self.params.update(params) @classmethod def register_postprocess_hooks(cls, callbacks: Union[List, Callable]): """ these hooks would be shared among all objects inherited from the same super class """ assert cls._postprocess_hooks is not None register_hooks(cls._postprocess_hooks, callbacks) @classmethod def register_preprocess_hooks(cls, callbacks: Union[List, Callable, Dict]): """ these hooks would be shared among all objects inherited from the same super class """ assert cls._preprocess_hooks is not None register_hooks(cls._preprocess_hooks, callbacks) def updateParam(self, param_key: str, param_content): self_param_content = self.params[param_key] if isinstance(self_param_content, (str, float, int)): Loading
modules/inpaint/__init__.py +1 −430 Original line number Diff line number Diff line import numpy as np import cv2 from typing import Dict, List from utils.registry import Registry from utils.textblock_mask import extract_ballon_mask from utils.imgproc_utils import enlarge_window from ..base import BaseModule, DEFAULT_DEVICE, gc_collect, DEVICE_SELECTOR, GPUINTENSIVE_SET from ..textdetector import TextBlock INPAINTERS = Registry('inpainters') register_inpainter = INPAINTERS.register_module class InpainterBase(BaseModule): inpaint_by_block = True check_need_inpaint = True def __init__(self, **params) -> None: super().__init__(**params) self.name = '' for key in INPAINTERS.module_dict: if INPAINTERS.module_dict[key] == self.__class__: self.name = key break self.setup_inpainter() def setup_inpainter(self): raise NotImplementedError def inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None, check_need_inpaint: bool = False) -> np.ndarray: if not self.inpaint_by_block or textblock_list is None: if check_need_inpaint: ballon_msk, non_text_msk = extract_ballon_mask(img, mask) if ballon_msk is not None: non_text_region = np.where(non_text_msk > 0) non_text_px = img[non_text_region] average_bg_color = np.mean(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 if std_max < inpaint_thresh: img = img.copy() img[np.where(ballon_msk > 0)] = average_bg_color return img try: return self._inpaint(img, mask) except Exception as e: if isinstance(e, torch.cuda.OutOfMemoryError): gc_collect() return self._inpaint(img, mask) else: raise e else: im_h, im_w = img.shape[:2] inpainted = np.copy(img) for blk in textblock_list: xyxy = blk.xyxy xyxy_e = enlarge_window(xyxy, im_w, im_h, ratio=1.7) im = inpainted[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] msk = mask[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] need_inpaint = True if self.check_need_inpaint or check_need_inpaint: ballon_msk, non_text_msk = extract_ballon_mask(im, msk) if ballon_msk is not None: non_text_region = np.where(non_text_msk > 0) non_text_px = im[non_text_region] average_bg_color = np.mean(non_text_px, axis=0) std_bgr = np.std(non_text_px - average_bg_color, axis=0) std_max = np.max(std_bgr) inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10 if std_max < inpaint_thresh: need_inpaint = False im[np.where(ballon_msk > 0)] = average_bg_color # cv2.imshow('im', im) # cv2.imshow('ballon', ballon_msk) # cv2.imshow('non_text', non_text_msk) # cv2.waitKey(0) if need_inpaint: inpainted[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] = self._inpaint(im, msk) mask[xyxy[1]:xyxy[3], xyxy[0]:xyxy[2]] = 0 return inpainted def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: raise NotImplementedError @register_inpainter('opencv-tela') class OpenCVInpainter(InpainterBase): def setup_inpainter(self): self.inpaint_method = lambda img, mask, *args, **kwargs: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS) def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: return self.inpaint_method(img, mask) def is_computational_intensive(self) -> bool: return True def is_cpu_intensive(self) -> bool: return True @register_inpainter('patchmatch') class PatchmatchInpainter(InpainterBase): def setup_inpainter(self): from . import patch_match self.inpaint_method = lambda img, mask, *args, **kwargs: patch_match.inpaint(img, mask, patch_size=3) def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: return self.inpaint_method(img, mask) def is_computational_intensive(self) -> bool: return True def is_cpu_intensive(self) -> bool: return True import torch from utils.imgproc_utils import resize_keepasp from .aot import AOTGenerator, load_aot_model AOTMODEL: AOTGenerator = None AOTMODEL_PATH = 'data/models/aot_inpainter.ckpt' @register_inpainter('aot') class AOTInpainter(InpainterBase): params = { 'inpaint_size': { 'type': 'selector', 'options': [ 1024, 2048 ], 'select': 2048 }, 'device': DEVICE_SELECTOR(), 'description': 'manga-image-translator inpainter' } device = DEFAULT_DEVICE inpaint_size = 2048 model: AOTGenerator = None def setup_inpainter(self): global AOTMODEL self.device = self.params['device']['select'] if AOTMODEL is None: self.model = AOTMODEL = load_aot_model(AOTMODEL_PATH, self.device) else: self.model = AOTMODEL self.model.to(self.device) self.inpaint_by_block = self.device not in GPUINTENSIVE_SET self.inpaint_size = int(self.params['inpaint_size']['select']) def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray: img_original = np.copy(img) mask_original = np.copy(mask) mask_original[mask_original < 127] = 0 mask_original[mask_original >= 127] = 1 mask_original = mask_original[:, :, None] new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None img = resize_keepasp(img, new_shape, stride=None) mask = resize_keepasp(mask, new_shape, stride=None) im_h, im_w = img.shape[:2] pad_bottom = 128 - im_h if im_h < 128 else 0 pad_right = 128 - im_w if im_w < 128 else 0 mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 127.5 - 1.0 mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0 mask_torch[mask_torch < 0.5] = 0 mask_torch[mask_torch >= 0.5] = 1 if self.device != 'cpu': img_torch = img_torch.to(self.device) mask_torch = mask_torch.to(self.device) img_torch *= (1 - mask_torch) return img_torch, mask_torch, img_original, mask_original, pad_bottom, pad_right @torch.no_grad() def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: im_h, im_w = img.shape[:2] img_torch, mask_torch, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask) img_inpainted_torch = self.model(img_torch, mask_torch) img_inpainted = ((img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() + 1.0) * 127.5).astype(np.uint8) if pad_bottom > 0: img_inpainted = img_inpainted[:-pad_bottom] if pad_right > 0: img_inpainted = img_inpainted[:, :-pad_right] new_shape = img_inpainted.shape[:2] if new_shape[0] != im_h or new_shape[1] != im_w : img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR) img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original) return img_inpainted def updateParam(self, param_key: str, param_content): super().updateParam(param_key, param_content) if param_key == 'device': param_device = self.params['device']['select'] self.model.to(param_device) self.device = param_device self.inpaint_by_block = param_device not in GPUINTENSIVE_SET elif param_key == 'inpaint_size': self.inpaint_size = int(self.params['inpaint_size']['select']) from .lama import LamaFourier, load_lama_mpe LAMA_MPE: LamaFourier = None @register_inpainter('lama_mpe') class LamaInpainterMPE(InpainterBase): params = { 'inpaint_size': { 'type': 'selector', 'options': [ 1024, 2048 ], 'select': 2048 }, 'device': DEVICE_SELECTOR() } device = DEFAULT_DEVICE inpaint_size = 2048 def setup_inpainter(self): global LAMA_MPE self.device = self.params['device']['select'] if LAMA_MPE is None: self.model = LAMA_MPE = load_lama_mpe(r'data/models/lama_mpe.ckpt', self.device) else: self.model = LAMA_MPE self.model.to(self.device) self.inpaint_by_block = self.device not in GPUINTENSIVE_SET self.inpaint_size = int(self.params['inpaint_size']['select']) def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray: img_original = np.copy(img) mask_original = np.copy(mask) mask_original[mask_original < 127] = 0 mask_original[mask_original >= 127] = 1 mask_original = mask_original[:, :, None] new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None # high resolution input could produce cloudy artifacts img = resize_keepasp(img, new_shape, stride=64) mask = resize_keepasp(mask, new_shape, stride=64) im_h, im_w = img.shape[:2] longer = max(im_h, im_w) pad_bottom = longer - im_h if im_h < longer else 0 pad_right = longer - im_w if im_w < longer else 0 mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 255.0 mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0 mask_torch[mask_torch < 0.5] = 0 mask_torch[mask_torch >= 0.5] = 1 rel_pos, _, direct = self.model.load_masked_position_encoding(mask_torch[0][0].numpy()) rel_pos = torch.LongTensor(rel_pos).unsqueeze_(0) direct = torch.LongTensor(direct).unsqueeze_(0) if self.device != 'cpu': img_torch = img_torch.to(self.device) mask_torch = mask_torch.to(self.device) rel_pos = rel_pos.to(self.device) direct = direct.to(self.device) img_torch *= (1 - mask_torch) return img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right @torch.no_grad() def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: im_h, im_w = img.shape[:2] img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask) img_inpainted_torch = self.model(img_torch, mask_torch, rel_pos, direct) img_inpainted = (img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() * 255).astype(np.uint8) if pad_bottom > 0: img_inpainted = img_inpainted[:-pad_bottom] if pad_right > 0: img_inpainted = img_inpainted[:, :-pad_right] new_shape = img_inpainted.shape[:2] if new_shape[0] != im_h or new_shape[1] != im_w : img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR) img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original) return img_inpainted def updateParam(self, param_key: str, param_content): super().updateParam(param_key, param_content) if param_key == 'device': param_device = self.params['device']['select'] self.model.to(param_device) self.device = param_device self.inpaint_by_block = param_device not in GPUINTENSIVE_SET elif param_key == 'inpaint_size': self.inpaint_size = int(self.params['inpaint_size']['select']) # LAMA_ORI: LamaFourier = None # @register_inpainter('lama_ori') # class LamaInpainterORI(InpainterBase): # params = { # 'inpaint_size': { # 'type': 'selector', # 'options': [ # 1024, # 2048 # ], # 'select': 2048 # }, # 'device': { # 'type': 'selector', # 'options': [ # 'cpu', # 'cuda' # ], # 'select': DEFAULT_DEVICE # } # } # device = DEFAULT_DEVICE # inpaint_size = 2048 # def setup_inpainter(self): # global LAMA_ORI # self.device = self.params['device']['select'] # if LAMA_ORI is None: # self.model = LAMA_ORI = load_lama_mpe(r'data/models/lama_org.ckpt', self.device, False) # else: # self.model = LAMA_ORI # self.model.to(self.device) # self.inpaint_by_block = True if self.device == 'cuda' else False # self.inpaint_size = int(self.params['inpaint_size']['select']) # def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray: # img_original = np.copy(img) # mask_original = np.copy(mask) # mask_original[mask_original < 127] = 0 # mask_original[mask_original >= 127] = 1 # mask_original = mask_original[:, :, None] # new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None # # high resolution input could produce cloudy artifacts # img = resize_keepasp(img, new_shape, stride=64) # mask = resize_keepasp(mask, new_shape, stride=64) # im_h, im_w = img.shape[:2] # longer = max(im_h, im_w) # pad_bottom = longer - im_h if im_h < longer else 0 # pad_right = longer - im_w if im_w < longer else 0 # mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) # img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT) # img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 255.0 # mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0 # mask_torch[mask_torch < 0.5] = 0 # mask_torch[mask_torch >= 0.5] = 1 # rel_pos, _, direct = self.model.load_masked_position_encoding(mask_torch[0][0].numpy()) # rel_pos = torch.LongTensor(rel_pos).unsqueeze_(0) # direct = torch.LongTensor(direct).unsqueeze_(0) # if self.device == 'cuda': # img_torch = img_torch.cuda() # mask_torch = mask_torch.cuda() # rel_pos = rel_pos.cuda() # direct = direct.cuda() # img_torch *= (1 - mask_torch) # return img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right # @torch.no_grad() # def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray: # im_h, im_w = img.shape[:2] # img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask) # img_inpainted_torch = self.model(img_torch, mask_torch, rel_pos, direct) # img_inpainted = (img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() * 255).astype(np.uint8) # if pad_bottom > 0: # img_inpainted = img_inpainted[:-pad_bottom] # if pad_right > 0: # img_inpainted = img_inpainted[:, :-pad_right] # new_shape = img_inpainted.shape[:2] # if new_shape[0] != im_h or new_shape[1] != im_w : # img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR) # img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original) # return img_inpainted # def updateParam(self, param_key: str, param_content): # super().updateParam(param_key, param_content) # if param_key == 'device': # param_device = self.params['device']['select'] # self.model.to(param_device) # self.device = param_device # if param_device == 'cuda': # self.inpaint_by_block = False # else: # self.inpaint_by_block = True # elif param_key == 'inpaint_size': # self.inpaint_size = int(self.params['inpaint_size']['select']) No newline at end of file from .base import * No newline at end of file
modules/inpaint/base.py 0 → 100644 +435 −0 File added.Preview size limit exceeded, changes collapsed. Show changes
modules/ocr/__init__.py +14 −19 Original line number Diff line number Diff line Loading @@ -2,6 +2,7 @@ from typing import Tuple, List, Dict, Union, Callable from ordered_set import OrderedSet import numpy as np import logging from collections import OrderedDict from ..textdetector.textblock import TextBlock Loading @@ -13,6 +14,9 @@ from ..base import BaseModule, DEFAULT_DEVICE, DEVICE_SELECTOR class OCRBase(BaseModule): _postprocess_hooks = OrderedDict() _preprocess_hooks = OrderedDict() def __init__(self, **params) -> None: super().__init__(**params) self.name = '' Loading @@ -20,7 +24,6 @@ class OCRBase(BaseModule): if OCR.module_dict[key] == self.__class__: self.name = key break self.postprocess_hooks: OrderedSet[Callable] = OrderedSet() self.setup_ocr() def setup_ocr(self): Loading @@ -29,8 +32,6 @@ class OCRBase(BaseModule): def run_ocr(self, img: np.ndarray, blk_list: List[TextBlock] = None) -> Union[List[TextBlock], str]: if blk_list is None: text = self.ocr_img(img) for callback in self.postprocess_hooks: text = callback(text) return text elif isinstance(blk_list, TextBlock): blk_list = [blk_list] Loading @@ -38,14 +39,16 @@ class OCRBase(BaseModule): for blk in blk_list: blk.text = [] self.ocr_blk_list(img, blk_list) for blk in blk_list: if isinstance(blk.text, List): for ii, t in enumerate(blk.text): for callback in self.postprocess_hooks: blk.text[ii] = callback(t, blk=blk) else: for callback in self.postprocess_hooks: blk.text = callback(blk.text, blk=blk) for callback_name, callback in self._postprocess_hooks.items(): callback(textblocks=blk_list, img=img, ocr_module=self) # for blk in blk_list: # if isinstance(blk.text, List): # for ii, t in enumerate(blk.text): # for callback in self.postprocess_hooks: # blk.text[ii] = callback(t, blk=blk) # else: # for callback in self.postprocess_hooks: # blk.text = callback(blk.text, blk=blk) return blk_list def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]) -> None: Loading @@ -54,14 +57,6 @@ class OCRBase(BaseModule): def ocr_img(self, img: np.ndarray) -> str: raise NotImplementedError def register_postprocess_hooks(self, callbacks: Union[List, Callable]): if callbacks is None: return if isinstance(callbacks, Callable): callbacks = [callbacks] for callback in callbacks: self.postprocess_hooks.add(callback) from .model_32px import OCR32pxModel OCR32PXMODEL: OCR32pxModel = None Loading
modules/textdetector/__init__.py +5 −0 Original line number Diff line number Diff line import numpy as np import cv2 from typing import Dict, List, Tuple from collections import OrderedDict from .textblock import TextBlock from utils.registry import Registry Loading @@ -11,6 +13,9 @@ from ..base import BaseModule, DEFAULT_DEVICE, DEVICE_SELECTOR class TextDetectorBase(BaseModule): _postprocess_hooks = OrderedDict() _preprocess_hooks = OrderedDict() def __init__(self, **params) -> None: super().__init__(**params) self.name = '' Loading