Merge pull request #143 from deepghs/dev/transformers (3cadf7ee) · Commits · git-mirror / Imgutils

docs/source/api_doc/preprocess/transformers.rst

+48 −0

Original line number	Diff line number	Diff line
		@@ -27,6 +27,26 @@ create_transforms_from_transformers



		is_valid_size_dict
		--------------------------------------------------------------------

		.. autofunction:: is_valid_size_dict



		convert_to_size_dict
		--------------------------------------------------------------------

		.. autofunction:: convert_to_size_dict



		get_size_dict
		--------------------------------------------------------------------

		.. autofunction:: get_size_dict



		create_clip_transforms
		--------------------------------------------------------------------
		@@ -99,3 +119,31 @@ create_transforms_from_bit_processor



		create_blip_transforms
		--------------------------------------------------------------------

		.. autofunction:: create_blip_transforms



		create_transforms_from_blip_processor
		--------------------------------------------------------------------

		.. autofunction:: create_transforms_from_blip_processor



		create_mobilenetv2_transforms
		--------------------------------------------------------------------

		.. autofunction:: create_mobilenetv2_transforms



		create_transforms_from_mobilenetv2_processor
		--------------------------------------------------------------------

		.. autofunction:: create_transforms_from_mobilenetv2_processor

imgutils/preprocess/transformers/init.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -8,7 +8,10 @@ Supported Processors:
		"""
		from .base import register_creators_for_transformers, NotProcessorTypeError, create_transforms_from_transformers
		from .bit import create_bit_transforms, create_transforms_from_bit_processor
		from .blip import create_blip_transforms, create_transforms_from_blip_processor
		from .clip import create_clip_transforms, create_transforms_from_clip_processor
		from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor
		from .mobilenetv2 import create_mobilenetv2_transforms, create_transforms_from_mobilenetv2_processor
		from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor
		from .size import is_valid_size_dict, convert_to_size_dict, get_size_dict
		from .vit import create_vit_transforms, create_transforms_from_vit_processor

imgutils/preprocess/transformers/base.py

+49 −27

Original line number	Diff line number	Diff line
		"""
		Transformers Integration Module
		Transformers Integration Module for Image Processing

		This module provides functionality for integrating with the transformers library,
		particularly for image processing tasks. It includes constants for standard image
		normalization values and utilities for creating image transforms from transformers
		processors.
		This module provides functionality for integrating with the Hugging Face transformers library,
		particularly focused on image processing tasks. It includes standard image normalization
		constants and utilities for creating image transforms from transformers processors.

		Usage:
		>>> from transformers import AutoImageProcessor
		>>> from imgutils.preprocess.transformers import create_transforms_from_transformers
		>>>
		>>> processor = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
		>>> transforms = create_transforms_from_transformers(processor)
		>>> transforms
		PillowCompose(
		PillowConvertRGB(force_background='white')
		PillowResize(size=224, interpolation=bicubic, max_size=None, antialias=True)
		PillowCenterCrop(size=(224, 224))
		PillowToTensor()
		PillowNormalize(mean=[0.48145467 0.4578275 0.40821072], std=[0.26862955 0.2613026 0.2757771 ])
		)
		"""

		try:
		@@ -17,9 +31,9 @@ else:

		def _check_transformers():
		"""
		Check if transformers library is available.
		Check if the transformers library is available in the current environment.

		:raises EnvironmentError: If transformers is not installed
		:raises EnvironmentError: If transformers is not installed, with instructions for installation
		"""
		if not _HAS_TRANSFORMERS:
		raise EnvironmentError('No torchvision available.\n'
		@@ -39,10 +53,13 @@ _DEFAULT = object()

		class NotProcessorTypeError(TypeError):
		"""
		Exception raised when a processor type is not recognized or supported.
		Exception raised when an unsupported processor type is encountered.

		This error occurs when attempting to create transforms from an unsupported
		or unknown transformers processor type.
		This custom exception is used when the system cannot create transforms
		from a given transformers processor, either because the processor type
		is not recognized or is not supported by any registered transform creators.

		:inherits: TypeError
		"""
		pass

		@@ -52,21 +69,23 @@ _FN_CREATORS = []

		def register_creators_for_transformers():
		"""
		Decorator for registering transform creator functions.
		Decorator that registers functions as transform creators for transformers processors.

		This decorator adds the decorated function to the list of available
		transform creators that will be tried when creating transforms from
		This decorator system allows for extensible support of different processor types.
		When a function is decorated with this decorator, it is added to the list of
		available transform creators that will be tried when creating transforms from
		a transformers processor.

		:return: Decorator function
		:return: Decorator function that registers the decorated function
		:rtype: callable

		:example:

		>>> @register_creators_for_transformers()
		>>> def my_transform_creator(processor):
		... # Create and return transforms
		... pass
		>>> def create_clip_transforms(processor):
		... if not hasattr(processor, 'feature_extractor'):
		... raise NotProcessorTypeError()
		... # Create and return transforms for CLIP
		... return transforms
		"""

		def _decorator(func):
		@@ -78,16 +97,19 @@ def register_creators_for_transformers():

		def create_transforms_from_transformers(processor):
		"""
		Create image transforms from a transformers processor.
		Create appropriate image transforms from a given transformers processor.

		This function attempts to create image transforms by iterating through
		registered creator functions until one successfully creates transforms
		for the given processor type.

		:param processor: A processor instance from the transformers library
		:type processor: transformers.ImageProcessor or similar

		This function attempts to create appropriate image transforms by trying
		each registered creator function until one succeeds.
		:return: A composition of image transforms suitable for the given processor
		:rtype: PillowCompose or similar transform object

		:param processor: A transformers processor object
		:type processor: object
		:return: Image transforms appropriate for the given processor
		:rtype: object
		:raises NotProcessorTypeError: If no suitable creator is found for the processor
		:raises NotProcessorTypeError: If no registered creator can handle the processor type

		:example:
		>>> from transformers import AutoImageProcessor

imgutils/preprocess/transformers/bit.py

+3 −7

Original line number	Diff line number	Diff line
		@@ -9,7 +9,8 @@ from PIL import Image

		from .base import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, _DEFAULT, register_creators_for_transformers, _check_transformers, \
		NotProcessorTypeError
		from ..pillow import PillowConvertRGB, PillowResize, PillowCenterCrop, PillowToTensor, PillowNormalize, PillowCompose, \
		from .size import _create_resize
		from ..pillow import PillowConvertRGB, PillowCenterCrop, PillowToTensor, PillowNormalize, PillowCompose, \
		PillowRescale

		_DEFAULT_SIZE = {"shortest_edge": 224}
		@@ -76,12 +77,7 @@ def create_bit_transforms(

		# Resize
		if do_resize:
		if "shortest_edge" in size:
		transform_list.append(PillowResize(size["shortest_edge"], interpolation=resample))
		elif "height" in size and "width" in size:
		transform_list.append(PillowResize((size["height"], size["width"]), interpolation=resample))
		else:
		raise ValueError(f'Unknown size configuration - {size!r}.') # pragma: no cover
		transform_list.append(_create_resize(size, resample=resample))

		# Center crop
		if do_center_crop:

imgutils/preprocess/transformers/blip.py

0 → 100644

+121 −0

Original line number	Diff line number	Diff line
		"""
		This module provides image transformation utilities specifically designed for BLIP (Bootstrapping Language-Image Pre-training) models.
		It includes functions to create transformation pipelines for processing images according to BLIP's requirements.

		The transformations include operations like resizing, RGB conversion, normalization, and tensor conversion,
		all implemented using Pillow-based operations.
		"""

		from PIL import Image

		from .base import OPENAI_CLIP_STD, OPENAI_CLIP_MEAN, _DEFAULT, _check_transformers, NotProcessorTypeError, \
		register_creators_for_transformers
		from .size import _create_resize
		from ..pillow import PillowConvertRGB, PillowRescale, PillowNormalize, PillowToTensor, PillowCompose

		_DEFAULT_SIZE = {"height": 384, "width": 384}


		def create_blip_transforms(
		do_resize: bool = True,
		size=_DEFAULT,
		resample=Image.BICUBIC,
		do_rescale: bool = True,
		rescale_factor: float = 1 / 255,
		do_normalize: bool = True,
		image_mean=_DEFAULT,
		image_std=_DEFAULT,
		do_convert_rgb: bool = True,
		):
		"""
		Create a transformation pipeline for BLIP image processing.

		This function builds a sequence of image transformations commonly used in BLIP models,
		including RGB conversion, resizing, tensor conversion, rescaling, and normalization.

		:param do_resize: Whether to resize the image.
		:type do_resize: bool
		:param size: Target size for resizing, expects dict with 'height' and 'width' keys.
		Defaults to {'height': 384, 'width': 384}.
		:type size: dict
		:param resample: Resampling filter for resize operation. Defaults to PIL.Image.BICUBIC.
		:type resample: int
		:param do_rescale: Whether to rescale pixel values.
		:type do_rescale: bool
		:param rescale_factor: Factor to rescale pixel values. Defaults to 1/255.
		:type rescale_factor: float
		:param do_normalize: Whether to normalize the image.
		:type do_normalize: bool
		:param image_mean: Mean values for normalization. Defaults to OPENAI_CLIP_MEAN.
		:type image_mean: tuple or list
		:param image_std: Standard deviation values for normalization. Defaults to OPENAI_CLIP_STD.
		:type image_std: tuple or list
		:param do_convert_rgb: Whether to convert image to RGB.
		:type do_convert_rgb: bool

		:return: A composed transformation pipeline.
		:rtype: PillowCompose
		"""
		size = size if size is not _DEFAULT else _DEFAULT_SIZE
		image_mean = image_mean if image_mean is not _DEFAULT else OPENAI_CLIP_MEAN
		image_std = image_std if image_std is not _DEFAULT else OPENAI_CLIP_STD

		transform_list = []

		# Convert to RGB if needed
		if do_convert_rgb:
		transform_list.append(PillowConvertRGB())

		# Resize if needed
		if do_resize:
		transform_list.append(_create_resize(size, resample=resample))

		# Convert PIL to tensor (which automatically scales to [0,1])
		transform_list.append(PillowToTensor())

		# If you do_rescale is True, but we don't want the automatic [0,1] scaling of ToTensor
		if do_rescale and rescale_factor != 1 / 255:
		transform_list.append(PillowRescale(rescale_factor * 255))

		# Normalize if needed
		if do_normalize:
		transform_list.append(PillowNormalize(mean=image_mean, std=image_std))

		return PillowCompose(transform_list)


		@register_creators_for_transformers()
		def create_transforms_from_blip_processor(processor):
		"""
		Create image transformations from a HuggingFace BLIP processor.

		This function extracts configuration from a HuggingFace BLIP processor and creates
		a corresponding transformation pipeline using create_blip_transforms.

		:param processor: A HuggingFace BLIP image processor instance.
		:type processor: transformers.BlipImageProcessor

		:return: A composed transformation pipeline configured according to the processor's settings.
		:rtype: PillowCompose
		:raises NotProcessorTypeError: If the provided processor is not a BlipImageProcessor.
		"""
		_check_transformers()
		from transformers import BlipImageProcessor

		if isinstance(processor, BlipImageProcessor):
		pass
		else:
		raise NotProcessorTypeError(f'Unknown blip processor - {processor!r}.')
		processor: BlipImageProcessor

		return create_blip_transforms(
		do_resize=processor.do_resize,
		size=processor.size,
		resample=processor.resample,
		do_rescale=processor.do_rescale,
		rescale_factor=processor.rescale_factor,
		do_normalize=processor.do_normalize,
		image_mean=processor.image_mean,
		image_std=processor.image_std,
		do_convert_rgb=processor.do_convert_rgb,
		)