Commit d6d95042 authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): add blip code

parent d3dfe641
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -99,3 +99,17 @@ create_transforms_from_bit_processor



create_blip_transforms
--------------------------------------------------------------------

.. autofunction:: create_blip_transforms



create_transforms_from_blip_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_blip_processor


+1 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ Supported Processors:
"""
from .base import register_creators_for_transformers, NotProcessorTypeError, create_transforms_from_transformers
from .bit import create_bit_transforms, create_transforms_from_bit_processor
from .blip import create_blip_transforms, create_transforms_from_blip_processor
from .clip import create_clip_transforms, create_transforms_from_clip_processor
from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor
from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor
+70 −0
Original line number Diff line number Diff line
from PIL import Image

from .base import OPENAI_CLIP_STD, OPENAI_CLIP_MEAN, _DEFAULT, _check_transformers, NotProcessorTypeError, \
    register_creators_for_transformers
from ..pillow import PillowConvertRGB, PillowRescale, PillowNormalize, PillowToTensor, PillowResize, PillowCompose

_DEFAULT_SIZE = {"height": 384, "width": 384}


def create_blip_transforms(
        do_resize: bool = True,
        size=_DEFAULT,
        resample=Image.BICUBIC,
        do_rescale: bool = True,
        rescale_factor: float = 1 / 255,
        do_normalize: bool = True,
        image_mean=_DEFAULT,
        image_std=_DEFAULT,
        do_convert_rgb: bool = True,
):
    size = size if size is not _DEFAULT else _DEFAULT_SIZE
    image_mean = image_mean if image_mean is not _DEFAULT else OPENAI_CLIP_MEAN
    image_std = image_std if image_std is not _DEFAULT else OPENAI_CLIP_STD

    transform_list = []

    # Convert to RGB if needed
    if do_convert_rgb:
        transform_list.append(PillowConvertRGB())

    # Resize if needed
    if do_resize:
        transform_list.append(PillowResize((size["height"], size["width"]), interpolation=resample))

    # Convert PIL to tensor (which automatically scales to [0,1])
    transform_list.append(PillowToTensor())

    # If you do_rescale is True, but we don't want the automatic [0,1] scaling of ToTensor
    if do_rescale and rescale_factor != 1 / 255:
        transform_list.append(PillowRescale(rescale_factor * 255))

    # Normalize if needed
    if do_normalize:
        transform_list.append(PillowNormalize(mean=image_mean, std=image_std))

    return PillowCompose(transform_list)


@register_creators_for_transformers()
def create_transforms_from_blip_processor(processor):
    _check_transformers()
    from transformers import BlipImageProcessor

    if isinstance(processor, BlipImageProcessor):
        pass
    else:
        raise NotProcessorTypeError(f'Unknown blip processor - {processor!r}.')
    processor: BlipImageProcessor

    return create_blip_transforms(
        do_resize=processor.do_resize,
        size=processor.size,
        resample=processor.resample,
        do_rescale=processor.do_rescale,
        rescale_factor=processor.rescale_factor,
        do_normalize=processor.do_normalize,
        image_mean=processor.image_mean,
        image_std=processor.image_std,
        do_convert_rgb=processor.do_convert_rgb,
    )
+82 −0
Original line number Diff line number Diff line
from unittest import skipUnless

import numpy as np
import pytest
from hbutils.testing import tmatrix

from imgutils.data import load_image
from imgutils.preprocess.transformers import create_transforms_from_transformers
from test.testings import get_testfile

try:
    import transformers
except (ImportError, ModuleNotFoundError):
    _HAS_TRANSFORMERS = False
else:
    _HAS_TRANSFORMERS = True


@pytest.mark.unittest
class TestPreprocessTransformersBlip:
    @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.')
    @pytest.mark.parametrize(*tmatrix({
        'repo_id': [
            'blackhole33/Image2text',
            'StanfordAIMI/XrayCLIP__vit-b-16__laion2b-s34b-b88k',
            'gizmo-ai/blip-image-captioning-large',
            'sooh-j/blip2-vizwizqa',
            'ethzanalytics/blip2-flan-t5-xl-sharded',
            'dblasko/blip-dalle3-img2prompt',
            'dineshcr7/Final-BLIP-LORA',
            'advaitadasein/blip2-opt-6.7b',
            'moranyanuka/blip-image-captioning-base-mocha',
            'upro/blip',
            'Revrse/icon-captioning-model',
            'Yhyu13/instructblip-vicuna-7b-gptq-4bit',
            'moranyanuka/blip-image-captioning-large-mocha',
            'Mediocreatmybest/blip2-opt-2.7b_8bit',
            'ybelkada/blip-image-captioning-base-football-finetuned',

            'Salesforce/blip-image-captioning-large',
            'Salesforce/blip-image-captioning-base',
            'Salesforce/blip2-opt-2.7b',
            'Salesforce/blip-vqa-base',
            'Salesforce/instructblip-vicuna-7b',
            'Salesforce/blip2-flan-t5-xxl',
            'Salesforce/blip2-opt-6.7b',
            'Salesforce/blip2-flan-t5-xl',
            'Salesforce/blip-vqa-capfilt-large',
            'Salesforce/instructblip-vicuna-13b',
            'Salesforce/blip2-opt-6.7b-coco',
            'Salesforce/instructblip-flan-t5-xl',
            'Salesforce/instructblip-flan-t5-xxl',
            'Salesforce/blip-itm-base-coco',
            'Salesforce/blip2-flan-t5-xl-coco',
            'Salesforce/blip2-opt-2.7b-coco',
            'Salesforce/blip-itm-large-flickr',
            'Salesforce/blip2-itm-vit-g-coco',
            'Salesforce/blip2-itm-vit-g',
            'Salesforce/blip-itm-base-flickr',
            'Salesforce/blip-itm-large-coco'
        ],
        'src_image': [
            'png_640.png',
            'png_640_m90.png',
            'nude_girl.png',
            'dori_640.png',
            'nian_640.png',
        ]
    }))
    def test_blip_image_preprocess_align(self, src_image, repo_id):
        from transformers import AutoImageProcessor
        image = load_image(get_testfile(src_image), mode='RGB', force_background='white')
        processor = AutoImageProcessor.from_pretrained(repo_id)

        trans = create_transforms_from_transformers(processor)

        expected_output = processor.preprocess(image)['pixel_values'][0]
        output = trans(image)
        np.testing.assert_array_almost_equal(
            output,
            expected_output,
        )