Commit 8e6b650a authored by dmMaze's avatar dmMaze
Browse files

update detect size

parent 3e945a54
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -54,8 +54,8 @@ class ComicTextDetector(TextDetectorBase):
    params = {
        'detect_size': {
            'type': 'selector',
            'options': [1024, 1152, 1280], 
            'select': 1280
            'options': [896, 1024, 1152, 1280], 
            'select': 1024
        }, 
        'det_rearrange_max_batches': {
            'type': 'selector',
@@ -68,7 +68,7 @@ class ComicTextDetector(TextDetectorBase):
    _load_model_keys = {'model'}

    device = DEFAULT_DEVICE
    detect_size = 1280
    detect_size = 1024
    def __init__(self, **params) -> None:
        super().__init__(**params)

+10 −3
Original line number Diff line number Diff line
@@ -25,7 +25,8 @@ def det_rearrange_forward(
    dbnet_batch_forward: Callable[[np.ndarray, str], Tuple[np.ndarray, np.ndarray]], 
    tgt_size: int = 1280, 
    max_batch_size: int = 4, 
    device='cuda', verbose=False):
    device='cuda', 
    crop_as_square=False, verbose=False):
    '''
    Rearrange image to square batches before feeding into network if following conditions are satisfied: \n
    1. Extreme aspect ratio
@@ -105,6 +106,9 @@ def det_rearrange_forward(
    if transpose:
        img = einops.rearrange(img, 'h w c -> w h c')
    
    if crop_as_square:
        pw_num = 1
    else:
        pw_num = max(int(np.floor(2 * tgt_size / w)), 2)
    patch_size = ph = pw_num * w

@@ -130,7 +134,7 @@ def det_rearrange_forward(
        batch = np.array(batch)
        db, mask = dbnet_batch_forward(batch, device=device)

        for d, m in zip(db, mask):
        for ii, (d, m) in enumerate(zip(db, mask)):
            if pad_size > 0:
                paddb = int(db.shape[-1] / tgt_size * pad_size)
                padmsk = int(mask.shape[-1] / tgt_size * pad_size)
@@ -138,6 +142,9 @@ def det_rearrange_forward(
                m = m[..., :-padmsk, :-padmsk]
            db_lst.append(d)
            mask_lst.append(m)
            if verbose:
                cv2.imwrite(f'result/rearrange_db_{ii}.png', (d[0] * 255).astype(np.uint8))
                cv2.imwrite(f'result/rearrange_thr_{ii}.png', (d[1] * 255).astype(np.uint8))

    db = _unrearrange(db_lst, transpose, channel=2, pad_num=pad_num)
    mask = _unrearrange(mask_lst, transpose, channel=1, pad_num=pad_num)
+1 −1
Original line number Diff line number Diff line
@@ -351,7 +351,7 @@ def square_pad_resize(img: np.ndarray, tgt_size: int):
    down_scale_ratio = tgt_size / img.shape[0]
    assert down_scale_ratio <= 1
    if down_scale_ratio < 1:
        img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_LINEAR)
        img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_AREA)

    return img, down_scale_ratio, pad_h, pad_w