Loading modules/textdetector/__init__.py +3 −3 Original line number Diff line number Diff line Loading @@ -54,8 +54,8 @@ class ComicTextDetector(TextDetectorBase): params = { 'detect_size': { 'type': 'selector', 'options': [1024, 1152, 1280], 'select': 1280 'options': [896, 1024, 1152, 1280], 'select': 1024 }, 'det_rearrange_max_batches': { 'type': 'selector', Loading @@ -68,7 +68,7 @@ class ComicTextDetector(TextDetectorBase): _load_model_keys = {'model'} device = DEFAULT_DEVICE detect_size = 1280 detect_size = 1024 def __init__(self, **params) -> None: super().__init__(**params) Loading modules/textdetector/ctd/inference.py +10 −3 Original line number Diff line number Diff line Loading @@ -25,7 +25,8 @@ def det_rearrange_forward( dbnet_batch_forward: Callable[[np.ndarray, str], Tuple[np.ndarray, np.ndarray]], tgt_size: int = 1280, max_batch_size: int = 4, device='cuda', verbose=False): device='cuda', crop_as_square=False, verbose=False): ''' Rearrange image to square batches before feeding into network if following conditions are satisfied: \n 1. Extreme aspect ratio Loading Loading @@ -105,6 +106,9 @@ def det_rearrange_forward( if transpose: img = einops.rearrange(img, 'h w c -> w h c') if crop_as_square: pw_num = 1 else: pw_num = max(int(np.floor(2 * tgt_size / w)), 2) patch_size = ph = pw_num * w Loading @@ -130,7 +134,7 @@ def det_rearrange_forward( batch = np.array(batch) db, mask = dbnet_batch_forward(batch, device=device) for d, m in zip(db, mask): for ii, (d, m) in enumerate(zip(db, mask)): if pad_size > 0: paddb = int(db.shape[-1] / tgt_size * pad_size) padmsk = int(mask.shape[-1] / tgt_size * pad_size) Loading @@ -138,6 +142,9 @@ def det_rearrange_forward( m = m[..., :-padmsk, :-padmsk] db_lst.append(d) mask_lst.append(m) if verbose: cv2.imwrite(f'result/rearrange_db_{ii}.png', (d[0] * 255).astype(np.uint8)) cv2.imwrite(f'result/rearrange_thr_{ii}.png', (d[1] * 255).astype(np.uint8)) db = _unrearrange(db_lst, transpose, channel=2, pad_num=pad_num) mask = _unrearrange(mask_lst, transpose, channel=1, pad_num=pad_num) Loading utils/imgproc_utils.py +1 −1 Original line number Diff line number Diff line Loading @@ -351,7 +351,7 @@ def square_pad_resize(img: np.ndarray, tgt_size: int): down_scale_ratio = tgt_size / img.shape[0] assert down_scale_ratio <= 1 if down_scale_ratio < 1: img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_LINEAR) img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_AREA) return img, down_scale_ratio, pad_h, pad_w Loading Loading
modules/textdetector/__init__.py +3 −3 Original line number Diff line number Diff line Loading @@ -54,8 +54,8 @@ class ComicTextDetector(TextDetectorBase): params = { 'detect_size': { 'type': 'selector', 'options': [1024, 1152, 1280], 'select': 1280 'options': [896, 1024, 1152, 1280], 'select': 1024 }, 'det_rearrange_max_batches': { 'type': 'selector', Loading @@ -68,7 +68,7 @@ class ComicTextDetector(TextDetectorBase): _load_model_keys = {'model'} device = DEFAULT_DEVICE detect_size = 1280 detect_size = 1024 def __init__(self, **params) -> None: super().__init__(**params) Loading
modules/textdetector/ctd/inference.py +10 −3 Original line number Diff line number Diff line Loading @@ -25,7 +25,8 @@ def det_rearrange_forward( dbnet_batch_forward: Callable[[np.ndarray, str], Tuple[np.ndarray, np.ndarray]], tgt_size: int = 1280, max_batch_size: int = 4, device='cuda', verbose=False): device='cuda', crop_as_square=False, verbose=False): ''' Rearrange image to square batches before feeding into network if following conditions are satisfied: \n 1. Extreme aspect ratio Loading Loading @@ -105,6 +106,9 @@ def det_rearrange_forward( if transpose: img = einops.rearrange(img, 'h w c -> w h c') if crop_as_square: pw_num = 1 else: pw_num = max(int(np.floor(2 * tgt_size / w)), 2) patch_size = ph = pw_num * w Loading @@ -130,7 +134,7 @@ def det_rearrange_forward( batch = np.array(batch) db, mask = dbnet_batch_forward(batch, device=device) for d, m in zip(db, mask): for ii, (d, m) in enumerate(zip(db, mask)): if pad_size > 0: paddb = int(db.shape[-1] / tgt_size * pad_size) padmsk = int(mask.shape[-1] / tgt_size * pad_size) Loading @@ -138,6 +142,9 @@ def det_rearrange_forward( m = m[..., :-padmsk, :-padmsk] db_lst.append(d) mask_lst.append(m) if verbose: cv2.imwrite(f'result/rearrange_db_{ii}.png', (d[0] * 255).astype(np.uint8)) cv2.imwrite(f'result/rearrange_thr_{ii}.png', (d[1] * 255).astype(np.uint8)) db = _unrearrange(db_lst, transpose, channel=2, pad_num=pad_num) mask = _unrearrange(mask_lst, transpose, channel=1, pad_num=pad_num) Loading
utils/imgproc_utils.py +1 −1 Original line number Diff line number Diff line Loading @@ -351,7 +351,7 @@ def square_pad_resize(img: np.ndarray, tgt_size: int): down_scale_ratio = tgt_size / img.shape[0] assert down_scale_ratio <= 1 if down_scale_ratio < 1: img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_LINEAR) img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_AREA) return img, down_scale_ratio, pad_h, pad_w Loading