Commit e6c8d1f8 authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): add convnext support

parent 0a741eb8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
from .base import register_creators_for_transformers, NotProcessorTypeError, create_transforms_from_transformers
from .clip import create_clip_transforms, create_transforms_from_clip_processor
from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor
+9 −0
Original line number Diff line number Diff line
@@ -12,6 +12,15 @@ def _check_transformers():
                               'Please install it by `pip install dghs-imgutils[transformers]`.')


IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225]
IMAGENET_STANDARD_MEAN = [0.5, 0.5, 0.5]
IMAGENET_STANDARD_STD = [0.5, 0.5, 0.5]
OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]

_DEFAULT = object()

class NotProcessorTypeError(TypeError):
    pass

+4 −6
Original line number Diff line number Diff line
from PIL import Image

from .base import _check_transformers, NotProcessorTypeError, register_creators_for_transformers
from .base import _check_transformers, NotProcessorTypeError, register_creators_for_transformers, OPENAI_CLIP_MEAN, \
    OPENAI_CLIP_STD, _DEFAULT
from ..pillow import PillowResize, PillowCenterCrop, PillowToTensor, PillowNormalize, PillowCompose, PillowRescale, \
    PillowConvertRGB

_DEFAULT_SIZE = {"shortest_edge": 224}
_DEFAULT_CROP_SIZE = {"height": 224, "width": 224}
_DEFAULT_IMAGE_MEAN = [0.48145466, 0.4578275, 0.40821073]
_DEFAULT_IMAGE_STD = [0.26862954, 0.26130258, 0.27577711]
_DEFAULT = object()


def create_clip_transforms(
@@ -26,8 +24,8 @@ def create_clip_transforms(
):
    size = size if size is not _DEFAULT else _DEFAULT_SIZE
    crop_size = crop_size if crop_size is not _DEFAULT else _DEFAULT_CROP_SIZE
    image_mean = image_mean if image_mean is not _DEFAULT else _DEFAULT_IMAGE_MEAN
    image_std = image_std if image_std is not _DEFAULT else _DEFAULT_IMAGE_STD
    image_mean = image_mean if image_mean is not _DEFAULT else OPENAI_CLIP_MEAN
    image_std = image_std if image_std is not _DEFAULT else OPENAI_CLIP_STD

    transform_list = []

+73 −0
Original line number Diff line number Diff line
from PIL import Image

from .base import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, _DEFAULT, register_creators_for_transformers, \
    _check_transformers, NotProcessorTypeError
from ..pillow import PillowRescale, PillowResize, PillowCenterCrop, PillowToTensor, PillowNormalize, PillowCompose

_DEFAULT_SIZE = {"shortest_edge": 384}
_DEFAULT_CROP_PCT = 224 / 256


def create_convnext_transforms(
        do_resize: bool = True,
        size=_DEFAULT,
        crop_pct: float = _DEFAULT,
        resample=Image.BILINEAR,
        do_rescale: bool = True,
        rescale_factor: float = 1 / 255,
        do_normalize: bool = True,
        image_mean=_DEFAULT,
        image_std=_DEFAULT,
):
    size = size if size is not _DEFAULT else _DEFAULT_SIZE
    crop_pct = crop_pct if crop_pct is not _DEFAULT else _DEFAULT_CROP_PCT
    image_mean = image_mean if image_mean is not _DEFAULT else IMAGENET_STANDARD_MEAN
    image_std = image_std if image_std is not _DEFAULT else IMAGENET_STANDARD_STD

    transform_list = []

    if do_resize:
        shortest_edge = size["shortest_edge"]
        if shortest_edge < 384:
            resize_shortest_edge = int(shortest_edge / crop_pct)
            transform_list.extend([
                PillowResize(resize_shortest_edge, interpolation=resample),
                PillowCenterCrop(shortest_edge)
            ])
        else:
            transform_list.append(PillowResize((shortest_edge, shortest_edge), interpolation=resample))

    transform_list.append(PillowToTensor())

    # Rescale (if different from 1/255)
    if do_rescale and rescale_factor != 1 / 255:
        transform_list.append(PillowRescale(rescale_factor * 255))

    if do_normalize:
        transform_list.append(PillowNormalize(mean=image_mean, std=image_std))

    return PillowCompose(transform_list)


@register_creators_for_transformers()
def create_transforms_from_convnext_processor(processor):
    _check_transformers()
    from transformers import ConvNextImageProcessor

    if isinstance(processor, ConvNextImageProcessor):
        pass
    else:
        raise NotProcessorTypeError(f'Unknown CLIP processor - {processor!r}.')
    processor: ConvNextImageProcessor

    return create_convnext_transforms(
        do_resize=processor.do_resize,
        size=processor.size,
        crop_pct=processor.crop_pct,
        resample=processor.resample,
        do_rescale=processor.do_rescale,
        rescale_factor=processor.rescale_factor,
        do_normalize=processor.do_normalize,
        image_mean=processor.image_mean,
        image_std=processor.image_std,
    )
+3 −3
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ else:


@pytest.mark.unittest
class TestPreprocessTransformersAlign:
class TestPreprocessTransformersClip:
    @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.')
    @pytest.mark.parametrize(*tmatrix({
        'repo_id': [
@@ -38,7 +38,7 @@ class TestPreprocessTransformersAlign:
            'nian_640.png',
        ]
    }))
    def test_image_preprocess_align(self, src_image, repo_id):
    def test_clip_image_preprocess_align(self, src_image, repo_id):
        from transformers import AutoImageProcessor
        image = load_image(get_testfile(src_image), mode='RGB', force_background='white')
        processor = AutoImageProcessor.from_pretrained(repo_id)
@@ -67,7 +67,7 @@ class TestPreprocessTransformersAlign:
            'nian_640.png',
        ]
    }))
    def test_auto_preprocess_align(self, src_image, repo_id):
    def test_clip_preprocess_align(self, src_image, repo_id):
        from transformers import AutoProcessor
        image = load_image(get_testfile(src_image), mode='RGB', force_background='white')
        processor = AutoProcessor.from_pretrained(repo_id)
Loading