update hook registration (1e348efb) · Commits · git-mirror / BallonsTranslator

modules/base.py

+42 −1

Original line number	Diff line number	Diff line
		import gc
		import os
		from typing import Dict
		import time
		from typing import Dict, List, Callable, Union
		from copy import deepcopy
		from collections import OrderedDict

		from utils.logger import logger as LOGGER

		GPUINTENSIVE_SET = {'cuda', 'mps'}

		def register_hooks(hooks_registered: OrderedDict, callbacks: Union[List, Callable, Dict]):
		if callbacks is None:
		return
		if isinstance(callbacks, (Dict, OrderedDict)):
		for k, v in callbacks.items():
		hooks_registered[k] = v
		else:
		nhooks = len(hooks_registered)

		if isinstance(callbacks, Callable):
		callbacks = [callbacks]
		for callback in callbacks:
		hk = 'hook_' + str(nhooks).zfill(2)
		while True:
		if hk not in hooks_registered:
		break
		hk = hk + '_' + str(time.time_ns())
		hooks_registered[hk] = callback
		nhooks += 1

		class BaseModule:

		params: Dict = None
		logger = LOGGER

		_preprocess_hooks: OrderedDict = None
		_postprocess_hooks: OrderedDict = None

		def __init__(self, **params) -> None:
		if params:
		if self.params is None:
		@@ -19,6 +44,22 @@ class BaseModule:
		else:
		self.params.update(params)

		@classmethod
		def register_postprocess_hooks(cls, callbacks: Union[List, Callable]):
		"""
		these hooks would be shared among all objects inherited from the same super class
		"""
		assert cls._postprocess_hooks is not None
		register_hooks(cls._postprocess_hooks, callbacks)

		@classmethod
		def register_preprocess_hooks(cls, callbacks: Union[List, Callable, Dict]):
		"""
		these hooks would be shared among all objects inherited from the same super class
		"""
		assert cls._preprocess_hooks is not None
		register_hooks(cls._preprocess_hooks, callbacks)

		def updateParam(self, param_key: str, param_content):
		self_param_content = self.params[param_key]
		if isinstance(self_param_content, (str, float, int)):

modules/inpaint/init.py

+1 −430

Original line number	Diff line number	Diff line
		import numpy as np
		import cv2
		from typing import Dict, List

		from utils.registry import Registry
		from utils.textblock_mask import extract_ballon_mask
		from utils.imgproc_utils import enlarge_window

		from ..base import BaseModule, DEFAULT_DEVICE, gc_collect, DEVICE_SELECTOR, GPUINTENSIVE_SET
		from ..textdetector import TextBlock

		INPAINTERS = Registry('inpainters')
		register_inpainter = INPAINTERS.register_module


		class InpainterBase(BaseModule):

		inpaint_by_block = True
		check_need_inpaint = True
		def __init__(self, **params) -> None:
		super().__init__(**params)
		self.name = ''
		for key in INPAINTERS.module_dict:
		if INPAINTERS.module_dict[key] == self.__class__:
		self.name = key
		break
		self.setup_inpainter()

		def setup_inpainter(self):
		raise NotImplementedError

		def inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None, check_need_inpaint: bool = False) -> np.ndarray:
		if not self.inpaint_by_block or textblock_list is None:
		if check_need_inpaint:
		ballon_msk, non_text_msk = extract_ballon_mask(img, mask)
		if ballon_msk is not None:
		non_text_region = np.where(non_text_msk > 0)
		non_text_px = img[non_text_region]
		average_bg_color = np.mean(non_text_px, axis=0)
		std_bgr = np.std(non_text_px - average_bg_color, axis=0)
		std_max = np.max(std_bgr)
		inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10
		if std_max < inpaint_thresh:
		img = img.copy()
		img[np.where(ballon_msk > 0)] = average_bg_color
		return img
		try:
		return self._inpaint(img, mask)
		except Exception as e:
		if isinstance(e, torch.cuda.OutOfMemoryError):
		gc_collect()
		return self._inpaint(img, mask)
		else:
		raise e
		else:
		im_h, im_w = img.shape[:2]
		inpainted = np.copy(img)
		for blk in textblock_list:
		xyxy = blk.xyxy
		xyxy_e = enlarge_window(xyxy, im_w, im_h, ratio=1.7)
		im = inpainted[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]]
		msk = mask[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]]
		need_inpaint = True
		if self.check_need_inpaint or check_need_inpaint:
		ballon_msk, non_text_msk = extract_ballon_mask(im, msk)
		if ballon_msk is not None:
		non_text_region = np.where(non_text_msk > 0)
		non_text_px = im[non_text_region]
		average_bg_color = np.mean(non_text_px, axis=0)
		std_bgr = np.std(non_text_px - average_bg_color, axis=0)
		std_max = np.max(std_bgr)
		inpaint_thresh = 7 if np.std(std_bgr) > 1 else 10
		if std_max < inpaint_thresh:
		need_inpaint = False
		im[np.where(ballon_msk > 0)] = average_bg_color
		# cv2.imshow('im', im)
		# cv2.imshow('ballon', ballon_msk)
		# cv2.imshow('non_text', non_text_msk)
		# cv2.waitKey(0)

		if need_inpaint:
		inpainted[xyxy_e[1]:xyxy_e[3], xyxy_e[0]:xyxy_e[2]] = self._inpaint(im, msk)

		mask[xyxy[1]:xyxy[3], xyxy[0]:xyxy[2]] = 0
		return inpainted

		def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray:
		raise NotImplementedError


		@register_inpainter('opencv-tela')
		class OpenCVInpainter(InpainterBase):

		def setup_inpainter(self):
		self.inpaint_method = lambda img, mask, args, *kwargs: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS)

		def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray:
		return self.inpaint_method(img, mask)

		def is_computational_intensive(self) -> bool:
		return True

		def is_cpu_intensive(self) -> bool:
		return True


		@register_inpainter('patchmatch')
		class PatchmatchInpainter(InpainterBase):

		def setup_inpainter(self):
		from . import patch_match
		self.inpaint_method = lambda img, mask, args, *kwargs: patch_match.inpaint(img, mask, patch_size=3)

		def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray:
		return self.inpaint_method(img, mask)

		def is_computational_intensive(self) -> bool:
		return True

		def is_cpu_intensive(self) -> bool:
		return True


		import torch
		from utils.imgproc_utils import resize_keepasp
		from .aot import AOTGenerator, load_aot_model
		AOTMODEL: AOTGenerator = None
		AOTMODEL_PATH = 'data/models/aot_inpainter.ckpt'


		@register_inpainter('aot')
		class AOTInpainter(InpainterBase):

		params = {
		'inpaint_size': {
		'type': 'selector',
		'options': [
		1024,
		2048
		],
		'select': 2048
		},
		'device': DEVICE_SELECTOR(),
		'description': 'manga-image-translator inpainter'
		}

		device = DEFAULT_DEVICE
		inpaint_size = 2048
		model: AOTGenerator = None

		def setup_inpainter(self):
		global AOTMODEL
		self.device = self.params['device']['select']
		if AOTMODEL is None:
		self.model = AOTMODEL = load_aot_model(AOTMODEL_PATH, self.device)
		else:
		self.model = AOTMODEL
		self.model.to(self.device)
		self.inpaint_by_block = self.device not in GPUINTENSIVE_SET
		self.inpaint_size = int(self.params['inpaint_size']['select'])

		def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray:

		img_original = np.copy(img)
		mask_original = np.copy(mask)
		mask_original[mask_original < 127] = 0
		mask_original[mask_original >= 127] = 1
		mask_original = mask_original[:, :, None]

		new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None

		img = resize_keepasp(img, new_shape, stride=None)
		mask = resize_keepasp(mask, new_shape, stride=None)

		im_h, im_w = img.shape[:2]
		pad_bottom = 128 - im_h if im_h < 128 else 0
		pad_right = 128 - im_w if im_w < 128 else 0
		mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT)
		img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT)

		img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 127.5 - 1.0
		mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0
		mask_torch[mask_torch < 0.5] = 0
		mask_torch[mask_torch >= 0.5] = 1

		if self.device != 'cpu':
		img_torch = img_torch.to(self.device)
		mask_torch = mask_torch.to(self.device)
		img_torch *= (1 - mask_torch)
		return img_torch, mask_torch, img_original, mask_original, pad_bottom, pad_right

		@torch.no_grad()
		def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray:

		im_h, im_w = img.shape[:2]
		img_torch, mask_torch, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask)
		img_inpainted_torch = self.model(img_torch, mask_torch)
		img_inpainted = ((img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() + 1.0) * 127.5).astype(np.uint8)
		if pad_bottom > 0:
		img_inpainted = img_inpainted[:-pad_bottom]
		if pad_right > 0:
		img_inpainted = img_inpainted[:, :-pad_right]
		new_shape = img_inpainted.shape[:2]
		if new_shape[0] != im_h or new_shape[1] != im_w :
		img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR)
		img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original)

		return img_inpainted

		def updateParam(self, param_key: str, param_content):
		super().updateParam(param_key, param_content)

		if param_key == 'device':
		param_device = self.params['device']['select']
		self.model.to(param_device)
		self.device = param_device
		self.inpaint_by_block = param_device not in GPUINTENSIVE_SET

		elif param_key == 'inpaint_size':
		self.inpaint_size = int(self.params['inpaint_size']['select'])


		from .lama import LamaFourier, load_lama_mpe

		LAMA_MPE: LamaFourier = None
		@register_inpainter('lama_mpe')
		class LamaInpainterMPE(InpainterBase):

		params = {
		'inpaint_size': {
		'type': 'selector',
		'options': [
		1024,
		2048
		],
		'select': 2048
		},
		'device': DEVICE_SELECTOR()
		}

		device = DEFAULT_DEVICE
		inpaint_size = 2048

		def setup_inpainter(self):
		global LAMA_MPE

		self.device = self.params['device']['select']
		if LAMA_MPE is None:
		self.model = LAMA_MPE = load_lama_mpe(r'data/models/lama_mpe.ckpt', self.device)
		else:
		self.model = LAMA_MPE
		self.model.to(self.device)
		self.inpaint_by_block = self.device not in GPUINTENSIVE_SET
		self.inpaint_size = int(self.params['inpaint_size']['select'])

		def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray:

		img_original = np.copy(img)
		mask_original = np.copy(mask)
		mask_original[mask_original < 127] = 0
		mask_original[mask_original >= 127] = 1
		mask_original = mask_original[:, :, None]

		new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None
		# high resolution input could produce cloudy artifacts
		img = resize_keepasp(img, new_shape, stride=64)
		mask = resize_keepasp(mask, new_shape, stride=64)

		im_h, im_w = img.shape[:2]
		longer = max(im_h, im_w)
		pad_bottom = longer - im_h if im_h < longer else 0
		pad_right = longer - im_w if im_w < longer else 0
		mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT)
		img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT)

		img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 255.0
		mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0
		mask_torch[mask_torch < 0.5] = 0
		mask_torch[mask_torch >= 0.5] = 1
		rel_pos, _, direct = self.model.load_masked_position_encoding(mask_torch[0][0].numpy())
		rel_pos = torch.LongTensor(rel_pos).unsqueeze_(0)
		direct = torch.LongTensor(direct).unsqueeze_(0)

		if self.device != 'cpu':
		img_torch = img_torch.to(self.device)
		mask_torch = mask_torch.to(self.device)
		rel_pos = rel_pos.to(self.device)
		direct = direct.to(self.device)
		img_torch *= (1 - mask_torch)
		return img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right

		@torch.no_grad()
		def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray:

		im_h, im_w = img.shape[:2]
		img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask)
		img_inpainted_torch = self.model(img_torch, mask_torch, rel_pos, direct)

		img_inpainted = (img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() * 255).astype(np.uint8)
		if pad_bottom > 0:
		img_inpainted = img_inpainted[:-pad_bottom]
		if pad_right > 0:
		img_inpainted = img_inpainted[:, :-pad_right]
		new_shape = img_inpainted.shape[:2]
		if new_shape[0] != im_h or new_shape[1] != im_w :
		img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR)
		img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original)

		return img_inpainted

		def updateParam(self, param_key: str, param_content):
		super().updateParam(param_key, param_content)

		if param_key == 'device':
		param_device = self.params['device']['select']
		self.model.to(param_device)
		self.device = param_device
		self.inpaint_by_block = param_device not in GPUINTENSIVE_SET

		elif param_key == 'inpaint_size':
		self.inpaint_size = int(self.params['inpaint_size']['select'])


		# LAMA_ORI: LamaFourier = None
		# @register_inpainter('lama_ori')
		# class LamaInpainterORI(InpainterBase):

		# params = {
		# 'inpaint_size': {
		# 'type': 'selector',
		# 'options': [
		# 1024,
		# 2048
		# ],
		# 'select': 2048
		# },
		# 'device': {
		# 'type': 'selector',
		# 'options': [
		# 'cpu',
		# 'cuda'
		# ],
		# 'select': DEFAULT_DEVICE
		# }
		# }

		# device = DEFAULT_DEVICE
		# inpaint_size = 2048

		# def setup_inpainter(self):
		# global LAMA_ORI

		# self.device = self.params['device']['select']
		# if LAMA_ORI is None:
		# self.model = LAMA_ORI = load_lama_mpe(r'data/models/lama_org.ckpt', self.device, False)
		# else:
		# self.model = LAMA_ORI
		# self.model.to(self.device)
		# self.inpaint_by_block = True if self.device == 'cuda' else False
		# self.inpaint_size = int(self.params['inpaint_size']['select'])

		# def inpaint_preprocess(self, img: np.ndarray, mask: np.ndarray) -> np.ndarray:

		# img_original = np.copy(img)
		# mask_original = np.copy(mask)
		# mask_original[mask_original < 127] = 0
		# mask_original[mask_original >= 127] = 1
		# mask_original = mask_original[:, :, None]

		# new_shape = self.inpaint_size if max(img.shape[0: 2]) > self.inpaint_size else None
		# # high resolution input could produce cloudy artifacts
		# img = resize_keepasp(img, new_shape, stride=64)
		# mask = resize_keepasp(mask, new_shape, stride=64)

		# im_h, im_w = img.shape[:2]
		# longer = max(im_h, im_w)
		# pad_bottom = longer - im_h if im_h < longer else 0
		# pad_right = longer - im_w if im_w < longer else 0
		# mask = cv2.copyMakeBorder(mask, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT)
		# img = cv2.copyMakeBorder(img, 0, pad_bottom, 0, pad_right, cv2.BORDER_REFLECT)

		# img_torch = torch.from_numpy(img).permute(2, 0, 1).unsqueeze_(0).float() / 255.0
		# mask_torch = torch.from_numpy(mask).unsqueeze_(0).unsqueeze_(0).float() / 255.0
		# mask_torch[mask_torch < 0.5] = 0
		# mask_torch[mask_torch >= 0.5] = 1
		# rel_pos, _, direct = self.model.load_masked_position_encoding(mask_torch[0][0].numpy())
		# rel_pos = torch.LongTensor(rel_pos).unsqueeze_(0)
		# direct = torch.LongTensor(direct).unsqueeze_(0)

		# if self.device == 'cuda':
		# img_torch = img_torch.cuda()
		# mask_torch = mask_torch.cuda()
		# rel_pos = rel_pos.cuda()
		# direct = direct.cuda()
		# img_torch *= (1 - mask_torch)
		# return img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right

		# @torch.no_grad()
		# def _inpaint(self, img: np.ndarray, mask: np.ndarray, textblock_list: List[TextBlock] = None) -> np.ndarray:

		# im_h, im_w = img.shape[:2]
		# img_torch, mask_torch, rel_pos, direct, img_original, mask_original, pad_bottom, pad_right = self.inpaint_preprocess(img, mask)
		# img_inpainted_torch = self.model(img_torch, mask_torch, rel_pos, direct)

		# img_inpainted = (img_inpainted_torch.cpu().squeeze_(0).permute(1, 2, 0).numpy() * 255).astype(np.uint8)
		# if pad_bottom > 0:
		# img_inpainted = img_inpainted[:-pad_bottom]
		# if pad_right > 0:
		# img_inpainted = img_inpainted[:, :-pad_right]
		# new_shape = img_inpainted.shape[:2]
		# if new_shape[0] != im_h or new_shape[1] != im_w :
		# img_inpainted = cv2.resize(img_inpainted, (im_w, im_h), interpolation = cv2.INTER_LINEAR)
		# img_inpainted = img_inpainted * mask_original + img_original * (1 - mask_original)

		# return img_inpainted

		# def updateParam(self, param_key: str, param_content):
		# super().updateParam(param_key, param_content)

		# if param_key == 'device':
		# param_device = self.params['device']['select']
		# self.model.to(param_device)
		# self.device = param_device
		# if param_device == 'cuda':
		# self.inpaint_by_block = False
		# else:
		# self.inpaint_by_block = True

		# elif param_key == 'inpaint_size':
		# self.inpaint_size = int(self.params['inpaint_size']['select'])
		No newline at end of file
		from .base import *
		No newline at end of file

modules/inpaint/base.py

0 → 100644

+435 −0

File added.

Preview size limit exceeded, changes collapsed.

modules/ocr/init.py

+14 −19

Original line number	Diff line number	Diff line
		@@ -2,6 +2,7 @@ from typing import Tuple, List, Dict, Union, Callable
		from ordered_set import OrderedSet
		import numpy as np
		import logging
		from collections import OrderedDict

		from ..textdetector.textblock import TextBlock

		@@ -13,6 +14,9 @@ from ..base import BaseModule, DEFAULT_DEVICE, DEVICE_SELECTOR

		class OCRBase(BaseModule):

		_postprocess_hooks = OrderedDict()
		_preprocess_hooks = OrderedDict()

		def __init__(self, **params) -> None:
		super().__init__(**params)
		self.name = ''
		@@ -20,7 +24,6 @@ class OCRBase(BaseModule):
		if OCR.module_dict[key] == self.__class__:
		self.name = key
		break
		self.postprocess_hooks: OrderedSet[Callable] = OrderedSet()
		self.setup_ocr()

		def setup_ocr(self):
		@@ -29,8 +32,6 @@ class OCRBase(BaseModule):
		def run_ocr(self, img: np.ndarray, blk_list: List[TextBlock] = None) -> Union[List[TextBlock], str]:
		if blk_list is None:
		text = self.ocr_img(img)
		for callback in self.postprocess_hooks:
		text = callback(text)
		return text
		elif isinstance(blk_list, TextBlock):
		blk_list = [blk_list]
		@@ -38,14 +39,16 @@ class OCRBase(BaseModule):
		for blk in blk_list:
		blk.text = []
		self.ocr_blk_list(img, blk_list)
		for blk in blk_list:
		if isinstance(blk.text, List):
		for ii, t in enumerate(blk.text):
		for callback in self.postprocess_hooks:
		blk.text[ii] = callback(t, blk=blk)
		else:
		for callback in self.postprocess_hooks:
		blk.text = callback(blk.text, blk=blk)
		for callback_name, callback in self._postprocess_hooks.items():
		callback(textblocks=blk_list, img=img, ocr_module=self)
		# for blk in blk_list:
		# if isinstance(blk.text, List):
		# for ii, t in enumerate(blk.text):
		# for callback in self.postprocess_hooks:
		# blk.text[ii] = callback(t, blk=blk)
		# else:
		# for callback in self.postprocess_hooks:
		# blk.text = callback(blk.text, blk=blk)
		return blk_list

		def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]) -> None:
		@@ -54,14 +57,6 @@ class OCRBase(BaseModule):
		def ocr_img(self, img: np.ndarray) -> str:
		raise NotImplementedError

		def register_postprocess_hooks(self, callbacks: Union[List, Callable]):
		if callbacks is None:
		return
		if isinstance(callbacks, Callable):
		callbacks = [callbacks]
		for callback in callbacks:
		self.postprocess_hooks.add(callback)


		from .model_32px import OCR32pxModel
		OCR32PXMODEL: OCR32pxModel = None

modules/textdetector/init.py

+5 −0

Original line number	Diff line number	Diff line
		import numpy as np
		import cv2
		from typing import Dict, List, Tuple
		from collections import OrderedDict

		from .textblock import TextBlock

		from utils.registry import Registry
		@@ -11,6 +13,9 @@ from ..base import BaseModule, DEFAULT_DEVICE, DEVICE_SELECTOR

		class TextDetectorBase(BaseModule):

		_postprocess_hooks = OrderedDict()
		_preprocess_hooks = OrderedDict()

		def __init__(self, **params) -> None:
		super().__init__(**params)
		self.name = ''