Loading imgutils/generic/__init__.py +1 −0 Original line number Diff line number Diff line from .classify import * from .enhance import * imgutils/generic/enhance.py 0 → 100644 +54 −0 Original line number Diff line number Diff line import numpy as np from PIL import Image from ..data import ImageTyping, load_image __all__ = [ 'ImageEnhancer', ] def _has_alpha_channel(image: Image.Image) -> bool: """ Check if the image has an alpha channel. :param image: The image to check. :type image: Image.Image :return: True if the image has an alpha channel, False otherwise. :rtype: bool """ return any(band in {'A', 'a', 'P'} for band in image.getbands()) class ImageEnhancer: def _process_rgb(self, rgb_array: np.ndarray): # input: a (3, H, W) float32[0.0, 1.0] array # output: another (3, H', W') float32[0.0, 1.0] array raise NotImplementedError def _process_alpha_channel_with_model(self, alpha_array: np.ndarray): assert len(alpha_array.shape) == 2, f'Alpha array should be 2-dim, but {alpha_array.shape!r} found.' enhanced_alpha_array = self._process_rgb(np.stack([alpha_array, alpha_array, alpha_array])).mean(axis=0) return enhanced_alpha_array def _process_rgba(self, rgba_array: np.ndarray): assert len(rgba_array.shape) == 3 and rgba_array.shape[0] == 4, \ f'RGBA array should be 3-dim and 4-channels, but {rgba_array.shape!r} found.' return np.concatenate([ self._process_rgb(rgba_array[:3, ...]), self._process_alpha_channel_with_model(rgba_array[3, ...])[None, ...] ], axis=0) def process(self, image: ImageTyping): image = load_image(image, mode=None, force_background=None) mode = 'RGBA' if _has_alpha_channel(image) else 'RGB' image = load_image(image, mode=mode, force_background=None) input_array = (np.array(image).astype(np.float32) / 255.0).transpose((2, 0, 1)) if _has_alpha_channel(image): output_array = self._process_rgba(input_array) else: output_array = self._process_rgb(input_array) output_array = (np.clip(output_array, a_min=0.0, a_max=1.0) * 255.0).astype(np.uint8).transpose((1, 2, 0)) return Image.fromarray(output_array, mode=mode) imgutils/upscale/__init__.py +5 −1 Original line number Diff line number Diff line from .cdc import upscale_with_cdc """ Overview: Upscale image to a larger size. """ from .cdc import * imgutils/upscale/cdc.py +63 −37 Original line number Diff line number Diff line Loading @@ -22,15 +22,18 @@ Overview: from functools import lru_cache from typing import Tuple, Any import cv2 import numpy as np from PIL import Image from huggingface_hub import hf_hub_download from .transparent import _rgba_preprocess, _rgba_postprocess from ..data import ImageTyping, load_image from ..data import ImageTyping from ..generic import ImageEnhancer from ..utils import open_onnx_model, area_batch_run __all__ = [ 'upscale_with_cdc', ] @lru_cache() def _open_cdc_upscaler_model(model: str) -> Tuple[Any, int]: Loading Loading @@ -62,9 +65,64 @@ def _open_cdc_upscaler_model(model: str) -> Tuple[Any, int]: _CDC_INPUT_UNIT = 16 def _upscale_for_rgb(input_: np.ndarray, model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, silent: bool = False): assert len(input_.shape) == 4 and input_.shape[:2] == (1, 3) ort, scale = _open_cdc_upscaler_model(model) def _method(ix): ix = ix.astype(np.float32) batch, channels, height, width = ix.shape p_height = 0 if height % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (height % _CDC_INPUT_UNIT) p_width = 0 if width % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (width % _CDC_INPUT_UNIT) if p_height > 0 or p_width > 0: # align to 16 ix = np.pad(ix, ((0, 0), (0, 0), (0, p_height), (0, p_width)), mode='reflect') actual_height, actual_width = height, width ox, = ort.run(['output'], {'input': ix}) batch, channels, scale_, height, scale_, width = ox.shape ox = ox.reshape((batch, channels, scale_ * height, scale_ * width)) ox = ox[..., :scale_ * actual_height, :scale_ * actual_width] # crop back return ox output_ = area_batch_run( input_, _method, tile_size=tile_size, tile_overlap=tile_overlap, batch_size=batch_size, scale=scale, silent=silent, process_title='CDC Upscale', ) output_ = np.clip(output_, a_min=0.0, a_max=1.0) return output_ class _Enhancer(ImageEnhancer): def __init__(self, model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, silent: bool = False): self.model = model self.tile_size = tile_size self.tile_overlap = tile_overlap self.batch_size = batch_size self.silent = silent def _process_rgb(self, rgb_array: np.ndarray): return _upscale_for_rgb( rgb_array[None, ...], model=self.model, tile_size=self.tile_size, tile_overlap=self.tile_overlap, batch_size=self.batch_size, silent=self.silent, )[0] @lru_cache() def _get_enhancer(model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, silent: bool = False): return _Enhancer(model, tile_size, tile_overlap, batch_size, silent) def upscale_with_cdc(image: ImageTyping, model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, alpha_interpolation: int = cv2.INTER_LINEAR, silent: bool = False, ) -> Image.Image: silent: bool = False) -> Image.Image: """ Upscale the input image using the CDC upscaler model. Loading @@ -83,9 +141,6 @@ def upscale_with_cdc(image: ImageTyping, model: str = 'HGSR-MHR-anime-aug_X4_320 :param batch_size: The batch size. (default: 1) :type batch_size: int :param alpha_interpolation: Interpolation for :func:`cv2.resize`. Default is ``cv2.INTER_LINEAR``. :type alpha_interpolation: int :param silent: Whether to suppress progress messages. (default: False) :type silent: bool Loading @@ -107,33 +162,4 @@ def upscale_with_cdc(image: ImageTyping, model: str = 'HGSR-MHR-anime-aug_X4_320 >>> upscale_with_cdc(image) <PIL.Image.Image image mode=RGBA size=4672x4672 at 0x7F0E48EDB640> """ image, alpha_mask = _rgba_preprocess(image) image = load_image(image, mode='RGB', force_background='white') input_ = np.array(image).astype(np.float32) / 255.0 input_ = input_.transpose((2, 0, 1))[None, ...] ort, scale = _open_cdc_upscaler_model(model) def _method(ix): ix = ix.astype(np.float32) batch, channels, height, width = ix.shape p_height = 0 if height % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (height % _CDC_INPUT_UNIT) p_width = 0 if width % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (width % _CDC_INPUT_UNIT) if p_height > 0 or p_width > 0: # align to 16 ix = np.pad(ix, ((0, 0), (0, 0), (0, p_height), (0, p_width)), mode='reflect') actual_height, actual_width = height, width ox, = ort.run(['output'], {'input': ix}) batch, channels, scale_, height, scale_, width = ox.shape ox = ox.reshape((batch, channels, scale_ * height, scale_ * width)) ox = ox[..., :scale_ * actual_height, :scale_ * actual_width] # crop back return ox output_ = area_batch_run( input_, _method, tile_size=tile_size, tile_overlap=tile_overlap, batch_size=batch_size, scale=scale, silent=silent, process_title='CDC Upscale', ) output_ = np.clip(output_, a_min=0.0, a_max=1.0) ret_image = Image.fromarray((output_[0].transpose((1, 2, 0)) * 255).astype(np.uint8), 'RGB') return _rgba_postprocess(ret_image, alpha_mask, interpolation=alpha_interpolation) return _get_enhancer(model, tile_size, tile_overlap, batch_size, silent).process(image) imgutils/upscale/transparent.pydeleted 100644 → 0 +0 −71 Original line number Diff line number Diff line from typing import Optional import cv2 import numpy as np from PIL import Image from ..data.image import ImageTyping, load_image def _has_alpha_channel(image: Image.Image) -> bool: """ Check if the image has an alpha channel. :param image: The image to check. :type image: Image.Image :return: True if the image has an alpha channel, False otherwise. :rtype: bool """ return any(band in {'A', 'a', 'P'} for band in image.getbands()) def _rgba_preprocess(image: ImageTyping): """ Preprocess the image for RGBA conversion. :param image: The image to preprocess. :type image: ImageTyping :return: Preprocessed image and alpha mask. :rtype: Tuple[Image.Image, Optional[np.ndarray]] """ image = load_image(image, force_background=None, mode=None) if _has_alpha_channel(image): image = image.convert('RGBA') pimage = image.convert('RGB') alpha_mask = np.array(image)[:, :, 3].astype(np.float32) / 255.0 else: pimage = image.convert('RGB') alpha_mask = None return pimage, alpha_mask def _rgba_postprocess(pimage, alpha_mask: Optional[np.ndarray] = None, interpolation: int = cv2.INTER_LINEAR): """ Postprocess the image after RGBA conversion. :param pimage: The processed image. :type pimage: Image.Image :param alpha_mask: The alpha mask. :type alpha_mask: Optional[np.ndarray] :param interpolation: Interpolation for :func:`cv2.resize`. Default is ``cv2.INTER_LINEAR``. :type interpolation: int :return: Postprocessed image. :rtype: Image.Image """ assert pimage.mode == 'RGB' if alpha_mask is None: return pimage else: channels = np.array(pimage) alpha_mask = cv2.resize(alpha_mask, channels.shape[:2], interpolation=interpolation) alpha_mask = np.clip(alpha_mask, a_min=0.0, a_max=1.0) alpha_channel = (alpha_mask * 255.0).astype(np.uint8)[..., np.newaxis] rgba_channels = np.concatenate([channels, alpha_channel], axis=-1) assert rgba_channels.shape == (*channels.shape[:-1], 4) return Image.fromarray(rgba_channels, mode='RGBA') Loading
imgutils/generic/__init__.py +1 −0 Original line number Diff line number Diff line from .classify import * from .enhance import *
imgutils/generic/enhance.py 0 → 100644 +54 −0 Original line number Diff line number Diff line import numpy as np from PIL import Image from ..data import ImageTyping, load_image __all__ = [ 'ImageEnhancer', ] def _has_alpha_channel(image: Image.Image) -> bool: """ Check if the image has an alpha channel. :param image: The image to check. :type image: Image.Image :return: True if the image has an alpha channel, False otherwise. :rtype: bool """ return any(band in {'A', 'a', 'P'} for band in image.getbands()) class ImageEnhancer: def _process_rgb(self, rgb_array: np.ndarray): # input: a (3, H, W) float32[0.0, 1.0] array # output: another (3, H', W') float32[0.0, 1.0] array raise NotImplementedError def _process_alpha_channel_with_model(self, alpha_array: np.ndarray): assert len(alpha_array.shape) == 2, f'Alpha array should be 2-dim, but {alpha_array.shape!r} found.' enhanced_alpha_array = self._process_rgb(np.stack([alpha_array, alpha_array, alpha_array])).mean(axis=0) return enhanced_alpha_array def _process_rgba(self, rgba_array: np.ndarray): assert len(rgba_array.shape) == 3 and rgba_array.shape[0] == 4, \ f'RGBA array should be 3-dim and 4-channels, but {rgba_array.shape!r} found.' return np.concatenate([ self._process_rgb(rgba_array[:3, ...]), self._process_alpha_channel_with_model(rgba_array[3, ...])[None, ...] ], axis=0) def process(self, image: ImageTyping): image = load_image(image, mode=None, force_background=None) mode = 'RGBA' if _has_alpha_channel(image) else 'RGB' image = load_image(image, mode=mode, force_background=None) input_array = (np.array(image).astype(np.float32) / 255.0).transpose((2, 0, 1)) if _has_alpha_channel(image): output_array = self._process_rgba(input_array) else: output_array = self._process_rgb(input_array) output_array = (np.clip(output_array, a_min=0.0, a_max=1.0) * 255.0).astype(np.uint8).transpose((1, 2, 0)) return Image.fromarray(output_array, mode=mode)
imgutils/upscale/__init__.py +5 −1 Original line number Diff line number Diff line from .cdc import upscale_with_cdc """ Overview: Upscale image to a larger size. """ from .cdc import *
imgutils/upscale/cdc.py +63 −37 Original line number Diff line number Diff line Loading @@ -22,15 +22,18 @@ Overview: from functools import lru_cache from typing import Tuple, Any import cv2 import numpy as np from PIL import Image from huggingface_hub import hf_hub_download from .transparent import _rgba_preprocess, _rgba_postprocess from ..data import ImageTyping, load_image from ..data import ImageTyping from ..generic import ImageEnhancer from ..utils import open_onnx_model, area_batch_run __all__ = [ 'upscale_with_cdc', ] @lru_cache() def _open_cdc_upscaler_model(model: str) -> Tuple[Any, int]: Loading Loading @@ -62,9 +65,64 @@ def _open_cdc_upscaler_model(model: str) -> Tuple[Any, int]: _CDC_INPUT_UNIT = 16 def _upscale_for_rgb(input_: np.ndarray, model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, silent: bool = False): assert len(input_.shape) == 4 and input_.shape[:2] == (1, 3) ort, scale = _open_cdc_upscaler_model(model) def _method(ix): ix = ix.astype(np.float32) batch, channels, height, width = ix.shape p_height = 0 if height % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (height % _CDC_INPUT_UNIT) p_width = 0 if width % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (width % _CDC_INPUT_UNIT) if p_height > 0 or p_width > 0: # align to 16 ix = np.pad(ix, ((0, 0), (0, 0), (0, p_height), (0, p_width)), mode='reflect') actual_height, actual_width = height, width ox, = ort.run(['output'], {'input': ix}) batch, channels, scale_, height, scale_, width = ox.shape ox = ox.reshape((batch, channels, scale_ * height, scale_ * width)) ox = ox[..., :scale_ * actual_height, :scale_ * actual_width] # crop back return ox output_ = area_batch_run( input_, _method, tile_size=tile_size, tile_overlap=tile_overlap, batch_size=batch_size, scale=scale, silent=silent, process_title='CDC Upscale', ) output_ = np.clip(output_, a_min=0.0, a_max=1.0) return output_ class _Enhancer(ImageEnhancer): def __init__(self, model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, silent: bool = False): self.model = model self.tile_size = tile_size self.tile_overlap = tile_overlap self.batch_size = batch_size self.silent = silent def _process_rgb(self, rgb_array: np.ndarray): return _upscale_for_rgb( rgb_array[None, ...], model=self.model, tile_size=self.tile_size, tile_overlap=self.tile_overlap, batch_size=self.batch_size, silent=self.silent, )[0] @lru_cache() def _get_enhancer(model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, silent: bool = False): return _Enhancer(model, tile_size, tile_overlap, batch_size, silent) def upscale_with_cdc(image: ImageTyping, model: str = 'HGSR-MHR-anime-aug_X4_320', tile_size: int = 512, tile_overlap: int = 64, batch_size: int = 1, alpha_interpolation: int = cv2.INTER_LINEAR, silent: bool = False, ) -> Image.Image: silent: bool = False) -> Image.Image: """ Upscale the input image using the CDC upscaler model. Loading @@ -83,9 +141,6 @@ def upscale_with_cdc(image: ImageTyping, model: str = 'HGSR-MHR-anime-aug_X4_320 :param batch_size: The batch size. (default: 1) :type batch_size: int :param alpha_interpolation: Interpolation for :func:`cv2.resize`. Default is ``cv2.INTER_LINEAR``. :type alpha_interpolation: int :param silent: Whether to suppress progress messages. (default: False) :type silent: bool Loading @@ -107,33 +162,4 @@ def upscale_with_cdc(image: ImageTyping, model: str = 'HGSR-MHR-anime-aug_X4_320 >>> upscale_with_cdc(image) <PIL.Image.Image image mode=RGBA size=4672x4672 at 0x7F0E48EDB640> """ image, alpha_mask = _rgba_preprocess(image) image = load_image(image, mode='RGB', force_background='white') input_ = np.array(image).astype(np.float32) / 255.0 input_ = input_.transpose((2, 0, 1))[None, ...] ort, scale = _open_cdc_upscaler_model(model) def _method(ix): ix = ix.astype(np.float32) batch, channels, height, width = ix.shape p_height = 0 if height % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (height % _CDC_INPUT_UNIT) p_width = 0 if width % _CDC_INPUT_UNIT == 0 else _CDC_INPUT_UNIT - (width % _CDC_INPUT_UNIT) if p_height > 0 or p_width > 0: # align to 16 ix = np.pad(ix, ((0, 0), (0, 0), (0, p_height), (0, p_width)), mode='reflect') actual_height, actual_width = height, width ox, = ort.run(['output'], {'input': ix}) batch, channels, scale_, height, scale_, width = ox.shape ox = ox.reshape((batch, channels, scale_ * height, scale_ * width)) ox = ox[..., :scale_ * actual_height, :scale_ * actual_width] # crop back return ox output_ = area_batch_run( input_, _method, tile_size=tile_size, tile_overlap=tile_overlap, batch_size=batch_size, scale=scale, silent=silent, process_title='CDC Upscale', ) output_ = np.clip(output_, a_min=0.0, a_max=1.0) ret_image = Image.fromarray((output_[0].transpose((1, 2, 0)) * 255).astype(np.uint8), 'RGB') return _rgba_postprocess(ret_image, alpha_mask, interpolation=alpha_interpolation) return _get_enhancer(model, tile_size, tile_overlap, batch_size, silent).process(image)
imgutils/upscale/transparent.pydeleted 100644 → 0 +0 −71 Original line number Diff line number Diff line from typing import Optional import cv2 import numpy as np from PIL import Image from ..data.image import ImageTyping, load_image def _has_alpha_channel(image: Image.Image) -> bool: """ Check if the image has an alpha channel. :param image: The image to check. :type image: Image.Image :return: True if the image has an alpha channel, False otherwise. :rtype: bool """ return any(band in {'A', 'a', 'P'} for band in image.getbands()) def _rgba_preprocess(image: ImageTyping): """ Preprocess the image for RGBA conversion. :param image: The image to preprocess. :type image: ImageTyping :return: Preprocessed image and alpha mask. :rtype: Tuple[Image.Image, Optional[np.ndarray]] """ image = load_image(image, force_background=None, mode=None) if _has_alpha_channel(image): image = image.convert('RGBA') pimage = image.convert('RGB') alpha_mask = np.array(image)[:, :, 3].astype(np.float32) / 255.0 else: pimage = image.convert('RGB') alpha_mask = None return pimage, alpha_mask def _rgba_postprocess(pimage, alpha_mask: Optional[np.ndarray] = None, interpolation: int = cv2.INTER_LINEAR): """ Postprocess the image after RGBA conversion. :param pimage: The processed image. :type pimage: Image.Image :param alpha_mask: The alpha mask. :type alpha_mask: Optional[np.ndarray] :param interpolation: Interpolation for :func:`cv2.resize`. Default is ``cv2.INTER_LINEAR``. :type interpolation: int :return: Postprocessed image. :rtype: Image.Image """ assert pimage.mode == 'RGB' if alpha_mask is None: return pimage else: channels = np.array(pimage) alpha_mask = cv2.resize(alpha_mask, channels.shape[:2], interpolation=interpolation) alpha_mask = np.clip(alpha_mask, a_min=0.0, a_max=1.0) alpha_channel = (alpha_mask * 255.0).astype(np.uint8)[..., np.newaxis] rgba_channels = np.concatenate([channels, alpha_channel], axis=-1) assert rgba_channels.shape == (*channels.shape[:-1], 4) return Image.fromarray(rgba_channels, mode='RGBA')