Loading docs/source/api_doc/preprocess/transformers.rst +14 −0 Original line number Diff line number Diff line Loading @@ -99,3 +99,17 @@ create_transforms_from_bit_processor create_blip_transforms -------------------------------------------------------------------- .. autofunction:: create_blip_transforms create_transforms_from_blip_processor -------------------------------------------------------------------- .. autofunction:: create_transforms_from_blip_processor imgutils/preprocess/transformers/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -8,6 +8,7 @@ Supported Processors: """ from .base import register_creators_for_transformers, NotProcessorTypeError, create_transforms_from_transformers from .bit import create_bit_transforms, create_transforms_from_bit_processor from .blip import create_blip_transforms, create_transforms_from_blip_processor from .clip import create_clip_transforms, create_transforms_from_clip_processor from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor Loading imgutils/preprocess/transformers/blip.py 0 → 100644 +70 −0 Original line number Diff line number Diff line from PIL import Image from .base import OPENAI_CLIP_STD, OPENAI_CLIP_MEAN, _DEFAULT, _check_transformers, NotProcessorTypeError, \ register_creators_for_transformers from ..pillow import PillowConvertRGB, PillowRescale, PillowNormalize, PillowToTensor, PillowResize, PillowCompose _DEFAULT_SIZE = {"height": 384, "width": 384} def create_blip_transforms( do_resize: bool = True, size=_DEFAULT, resample=Image.BICUBIC, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, image_mean=_DEFAULT, image_std=_DEFAULT, do_convert_rgb: bool = True, ): size = size if size is not _DEFAULT else _DEFAULT_SIZE image_mean = image_mean if image_mean is not _DEFAULT else OPENAI_CLIP_MEAN image_std = image_std if image_std is not _DEFAULT else OPENAI_CLIP_STD transform_list = [] # Convert to RGB if needed if do_convert_rgb: transform_list.append(PillowConvertRGB()) # Resize if needed if do_resize: transform_list.append(PillowResize((size["height"], size["width"]), interpolation=resample)) # Convert PIL to tensor (which automatically scales to [0,1]) transform_list.append(PillowToTensor()) # If you do_rescale is True, but we don't want the automatic [0,1] scaling of ToTensor if do_rescale and rescale_factor != 1 / 255: transform_list.append(PillowRescale(rescale_factor * 255)) # Normalize if needed if do_normalize: transform_list.append(PillowNormalize(mean=image_mean, std=image_std)) return PillowCompose(transform_list) @register_creators_for_transformers() def create_transforms_from_blip_processor(processor): _check_transformers() from transformers import BlipImageProcessor if isinstance(processor, BlipImageProcessor): pass else: raise NotProcessorTypeError(f'Unknown blip processor - {processor!r}.') processor: BlipImageProcessor return create_blip_transforms( do_resize=processor.do_resize, size=processor.size, resample=processor.resample, do_rescale=processor.do_rescale, rescale_factor=processor.rescale_factor, do_normalize=processor.do_normalize, image_mean=processor.image_mean, image_std=processor.image_std, do_convert_rgb=processor.do_convert_rgb, ) test/preprocess/transformers/test_blip.py 0 → 100644 +82 −0 Original line number Diff line number Diff line from unittest import skipUnless import numpy as np import pytest from hbutils.testing import tmatrix from imgutils.data import load_image from imgutils.preprocess.transformers import create_transforms_from_transformers from test.testings import get_testfile try: import transformers except (ImportError, ModuleNotFoundError): _HAS_TRANSFORMERS = False else: _HAS_TRANSFORMERS = True @pytest.mark.unittest class TestPreprocessTransformersBlip: @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.') @pytest.mark.parametrize(*tmatrix({ 'repo_id': [ 'blackhole33/Image2text', 'StanfordAIMI/XrayCLIP__vit-b-16__laion2b-s34b-b88k', 'gizmo-ai/blip-image-captioning-large', 'sooh-j/blip2-vizwizqa', 'ethzanalytics/blip2-flan-t5-xl-sharded', 'dblasko/blip-dalle3-img2prompt', 'dineshcr7/Final-BLIP-LORA', 'advaitadasein/blip2-opt-6.7b', 'moranyanuka/blip-image-captioning-base-mocha', 'upro/blip', 'Revrse/icon-captioning-model', 'Yhyu13/instructblip-vicuna-7b-gptq-4bit', 'moranyanuka/blip-image-captioning-large-mocha', 'Mediocreatmybest/blip2-opt-2.7b_8bit', 'ybelkada/blip-image-captioning-base-football-finetuned', 'Salesforce/blip-image-captioning-large', 'Salesforce/blip-image-captioning-base', 'Salesforce/blip2-opt-2.7b', 'Salesforce/blip-vqa-base', 'Salesforce/instructblip-vicuna-7b', 'Salesforce/blip2-flan-t5-xxl', 'Salesforce/blip2-opt-6.7b', 'Salesforce/blip2-flan-t5-xl', 'Salesforce/blip-vqa-capfilt-large', 'Salesforce/instructblip-vicuna-13b', 'Salesforce/blip2-opt-6.7b-coco', 'Salesforce/instructblip-flan-t5-xl', 'Salesforce/instructblip-flan-t5-xxl', 'Salesforce/blip-itm-base-coco', 'Salesforce/blip2-flan-t5-xl-coco', 'Salesforce/blip2-opt-2.7b-coco', 'Salesforce/blip-itm-large-flickr', 'Salesforce/blip2-itm-vit-g-coco', 'Salesforce/blip2-itm-vit-g', 'Salesforce/blip-itm-base-flickr', 'Salesforce/blip-itm-large-coco' ], 'src_image': [ 'png_640.png', 'png_640_m90.png', 'nude_girl.png', 'dori_640.png', 'nian_640.png', ] })) def test_blip_image_preprocess_align(self, src_image, repo_id): from transformers import AutoImageProcessor image = load_image(get_testfile(src_image), mode='RGB', force_background='white') processor = AutoImageProcessor.from_pretrained(repo_id) trans = create_transforms_from_transformers(processor) expected_output = processor.preprocess(image)['pixel_values'][0] output = trans(image) np.testing.assert_array_almost_equal( output, expected_output, ) Loading
docs/source/api_doc/preprocess/transformers.rst +14 −0 Original line number Diff line number Diff line Loading @@ -99,3 +99,17 @@ create_transforms_from_bit_processor create_blip_transforms -------------------------------------------------------------------- .. autofunction:: create_blip_transforms create_transforms_from_blip_processor -------------------------------------------------------------------- .. autofunction:: create_transforms_from_blip_processor
imgutils/preprocess/transformers/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -8,6 +8,7 @@ Supported Processors: """ from .base import register_creators_for_transformers, NotProcessorTypeError, create_transforms_from_transformers from .bit import create_bit_transforms, create_transforms_from_bit_processor from .blip import create_blip_transforms, create_transforms_from_blip_processor from .clip import create_clip_transforms, create_transforms_from_clip_processor from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor Loading
imgutils/preprocess/transformers/blip.py 0 → 100644 +70 −0 Original line number Diff line number Diff line from PIL import Image from .base import OPENAI_CLIP_STD, OPENAI_CLIP_MEAN, _DEFAULT, _check_transformers, NotProcessorTypeError, \ register_creators_for_transformers from ..pillow import PillowConvertRGB, PillowRescale, PillowNormalize, PillowToTensor, PillowResize, PillowCompose _DEFAULT_SIZE = {"height": 384, "width": 384} def create_blip_transforms( do_resize: bool = True, size=_DEFAULT, resample=Image.BICUBIC, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, image_mean=_DEFAULT, image_std=_DEFAULT, do_convert_rgb: bool = True, ): size = size if size is not _DEFAULT else _DEFAULT_SIZE image_mean = image_mean if image_mean is not _DEFAULT else OPENAI_CLIP_MEAN image_std = image_std if image_std is not _DEFAULT else OPENAI_CLIP_STD transform_list = [] # Convert to RGB if needed if do_convert_rgb: transform_list.append(PillowConvertRGB()) # Resize if needed if do_resize: transform_list.append(PillowResize((size["height"], size["width"]), interpolation=resample)) # Convert PIL to tensor (which automatically scales to [0,1]) transform_list.append(PillowToTensor()) # If you do_rescale is True, but we don't want the automatic [0,1] scaling of ToTensor if do_rescale and rescale_factor != 1 / 255: transform_list.append(PillowRescale(rescale_factor * 255)) # Normalize if needed if do_normalize: transform_list.append(PillowNormalize(mean=image_mean, std=image_std)) return PillowCompose(transform_list) @register_creators_for_transformers() def create_transforms_from_blip_processor(processor): _check_transformers() from transformers import BlipImageProcessor if isinstance(processor, BlipImageProcessor): pass else: raise NotProcessorTypeError(f'Unknown blip processor - {processor!r}.') processor: BlipImageProcessor return create_blip_transforms( do_resize=processor.do_resize, size=processor.size, resample=processor.resample, do_rescale=processor.do_rescale, rescale_factor=processor.rescale_factor, do_normalize=processor.do_normalize, image_mean=processor.image_mean, image_std=processor.image_std, do_convert_rgb=processor.do_convert_rgb, )
test/preprocess/transformers/test_blip.py 0 → 100644 +82 −0 Original line number Diff line number Diff line from unittest import skipUnless import numpy as np import pytest from hbutils.testing import tmatrix from imgutils.data import load_image from imgutils.preprocess.transformers import create_transforms_from_transformers from test.testings import get_testfile try: import transformers except (ImportError, ModuleNotFoundError): _HAS_TRANSFORMERS = False else: _HAS_TRANSFORMERS = True @pytest.mark.unittest class TestPreprocessTransformersBlip: @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.') @pytest.mark.parametrize(*tmatrix({ 'repo_id': [ 'blackhole33/Image2text', 'StanfordAIMI/XrayCLIP__vit-b-16__laion2b-s34b-b88k', 'gizmo-ai/blip-image-captioning-large', 'sooh-j/blip2-vizwizqa', 'ethzanalytics/blip2-flan-t5-xl-sharded', 'dblasko/blip-dalle3-img2prompt', 'dineshcr7/Final-BLIP-LORA', 'advaitadasein/blip2-opt-6.7b', 'moranyanuka/blip-image-captioning-base-mocha', 'upro/blip', 'Revrse/icon-captioning-model', 'Yhyu13/instructblip-vicuna-7b-gptq-4bit', 'moranyanuka/blip-image-captioning-large-mocha', 'Mediocreatmybest/blip2-opt-2.7b_8bit', 'ybelkada/blip-image-captioning-base-football-finetuned', 'Salesforce/blip-image-captioning-large', 'Salesforce/blip-image-captioning-base', 'Salesforce/blip2-opt-2.7b', 'Salesforce/blip-vqa-base', 'Salesforce/instructblip-vicuna-7b', 'Salesforce/blip2-flan-t5-xxl', 'Salesforce/blip2-opt-6.7b', 'Salesforce/blip2-flan-t5-xl', 'Salesforce/blip-vqa-capfilt-large', 'Salesforce/instructblip-vicuna-13b', 'Salesforce/blip2-opt-6.7b-coco', 'Salesforce/instructblip-flan-t5-xl', 'Salesforce/instructblip-flan-t5-xxl', 'Salesforce/blip-itm-base-coco', 'Salesforce/blip2-flan-t5-xl-coco', 'Salesforce/blip2-opt-2.7b-coco', 'Salesforce/blip-itm-large-flickr', 'Salesforce/blip2-itm-vit-g-coco', 'Salesforce/blip2-itm-vit-g', 'Salesforce/blip-itm-base-flickr', 'Salesforce/blip-itm-large-coco' ], 'src_image': [ 'png_640.png', 'png_640_m90.png', 'nude_girl.png', 'dori_640.png', 'nian_640.png', ] })) def test_blip_image_preprocess_align(self, src_image, repo_id): from transformers import AutoImageProcessor image = load_image(get_testfile(src_image), mode='RGB', force_background='white') processor = AutoImageProcessor.from_pretrained(repo_id) trans = create_transforms_from_transformers(processor) expected_output = processor.preprocess(image)['pixel_values'][0] output = trans(image) np.testing.assert_array_almost_equal( output, expected_output, )