dev(narugo): finish docs for mobilenetv2 (f7bbf030) · Commits · git-mirror / Imgutils

docs/source/api_doc/preprocess/transformers.rst

+34 −0

Original line number	Diff line number	Diff line
		@@ -27,6 +27,26 @@ create_transforms_from_transformers



		is_valid_size_dict
		--------------------------------------------------------------------

		.. autofunction:: is_valid_size_dict



		convert_to_size_dict
		--------------------------------------------------------------------

		.. autofunction:: convert_to_size_dict



		get_size_dict
		--------------------------------------------------------------------

		.. autofunction:: get_size_dict



		create_clip_transforms
		--------------------------------------------------------------------
		@@ -113,3 +133,17 @@ create_transforms_from_blip_processor



		create_mobilenetv2_transforms
		--------------------------------------------------------------------

		.. autofunction:: create_mobilenetv2_transforms



		create_transforms_from_mobilenetv2_processor
		--------------------------------------------------------------------

		.. autofunction:: create_transforms_from_mobilenetv2_processor

imgutils/preprocess/transformers/init.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -13,4 +13,5 @@ from .clip import create_clip_transforms, create_transforms_from_clip_processor
		from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor
		from .mobilenetv2 import create_mobilenetv2_transforms, create_transforms_from_mobilenetv2_processor
		from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor
		from .size import is_valid_size_dict, convert_to_size_dict, get_size_dict
		from .vit import create_vit_transforms, create_transforms_from_vit_processor

imgutils/preprocess/transformers/mobilenetv2.py

+64 −16

Original line number	Diff line number	Diff line
		"""
		MobileNetV2 transforms module for creating image transformations compatible with MobileNetV2 models.

		This module provides functions to create compositions of image transforms that replicate
		the behavior of the MobileNetV2ImageProcessor from the transformers library. The main
		components include:

		The module is designed to work with PIL images and provide compatibility with
		transformer-based vision models while using native Python image processing.
		"""

		from typing import Dict, List, Optional, Union

		from PIL import Image
		@@ -24,22 +35,44 @@ def create_mobilenetv2_transforms(
		image_std: Optional[Union[float, List[float]]] = _DEFAULT,
		):
		"""
		Creates a composition of torchvision transforms that replicates the behavior of MobileNetV2ImageProcessor.

		Args:
		do_resize (bool, optional): Whether to resize the image. Defaults to True.
		size (Dict[str, int], optional): Size dictionary specifying resize parameters. Defaults to {"shortest_edge": 256}.
		resample (PIL.Image.Resampling, optional): Resampling filter for resizing. Defaults to PIL.Image.BILINEAR.
		do_center_crop (bool, optional): Whether to center crop the image. Defaults to True.
		crop_size (Dict[str, int], optional): Size of the center crop. Defaults to {"height": 224, "width": 224}.
		do_rescale (bool, optional): Whether to rescale pixel values. Defaults to True.
		rescale_factor (Union[int, float], optional): Factor to rescale by. Defaults to 1/255.
		do_normalize (bool, optional): Whether to normalize the image. Defaults to True.
		image_mean (Optional[Union[float, List[float]]], optional): Mean values for normalization. Defaults to IMAGENET_DEFAULT_MEAN.
		image_std (Optional[Union[float, List[float]]], optional): Std values for normalization. Defaults to IMAGENET_DEFAULT_STD.

		Returns:
		torchvision.PillowCompose: A composition of transforms matching MobileNetV2ImageProcessor behavior.
		Creates a composition of transforms that replicates the behavior of MobileNetV2ImageProcessor.

		This function builds a pipeline of image transformations typically used for MobileNetV2 models,
		including resizing, center cropping, tensor conversion, rescaling, and normalization.

		:param do_resize: Whether to resize the image.
		:type do_resize: bool

		:param size: Size dictionary specifying resize parameters. Can include keys like
		'shortest_edge', 'height', 'width', etc.
		:type size: Optional[Dict[str, int]]

		:param resample: Resampling filter to use for resizing operations.
		:type resample: PIL.Image.Resampling

		:param do_center_crop: Whether to apply center cropping to the image.
		:type do_center_crop: bool

		:param crop_size: Dictionary specifying the height and width for center cropping.
		:type crop_size: Dict[str, int]

		:param do_rescale: Whether to rescale pixel values after tensor conversion.
		:type do_rescale: bool

		:param rescale_factor: Factor by which to rescale the image pixel values.
		:type rescale_factor: Union[int, float]

		:param do_normalize: Whether to normalize the image with mean and std.
		:type do_normalize: bool

		:param image_mean: Mean values for normalization, per channel.
		:type image_mean: Optional[Union[float, List[float]]]

		:param image_std: Standard deviation values for normalization, per channel.
		:type image_std: Optional[Union[float, List[float]]]

		:return: A composition of transforms matching MobileNetV2ImageProcessor behavior.
		:rtype: PillowCompose
		"""
		transform_list = []

		@@ -77,6 +110,21 @@ def create_mobilenetv2_transforms(

		@register_creators_for_transformers()
		def create_transforms_from_mobilenetv2_processor(processor):
		"""
		Creates transform composition from a MobileNetV2ImageProcessor instance.

		This function extracts configuration from a transformers MobileNetV2ImageProcessor
		and creates an equivalent transform pipeline using the create_mobilenetv2_transforms
		function.

		:param processor: A MobileNetV2ImageProcessor instance from the transformers library.
		:type processor: transformers.MobileNetV2ImageProcessor

		:return: A composition of transforms matching the processor's configuration.
		:rtype: PillowCompose

		:raises NotProcessorTypeError: If the provided processor is not a MobileNetV2ImageProcessor.
		"""
		_check_transformers()
		from transformers import MobileNetV2ImageProcessor

imgutils/preprocess/transformers/size.py

+96 −16

Original line number	Diff line number	Diff line
		"""
		Image resizing configuration and processing module.

		This module provides utilities for handling and standardizing image size specifications
		in various formats. It supports multiple ways to specify image sizes including fixed dimensions,
		aspect ratio preservation, and maximum size constraints.

		Size dictionary formats supported:
		- {"height": h, "width": w} : Exact dimensions
		- {"shortest_edge": s} : Preserve aspect ratio with given shortest edge
		- {"shortest_edge": s, "longest_edge": l} : Constrain both edges
		- {"longest_edge": l} : Maximum size while preserving aspect ratio
		- {"max_height": h, "max_width": w} : Independent height/width constraints
		"""

		from PIL import Image

		from ..pillow import PillowResize
		@@ -12,10 +27,52 @@ VALID_SIZE_DICT_KEYS = (


		def is_valid_size_dict(size_dict):
		"""
		Validate if a dictionary contains valid image size specifications.

		:param size_dict: Dictionary to validate
		:type size_dict: dict

		:return: True if the dictionary contains valid size specifications, False otherwise
		:rtype: bool

		:examples:
		>>> is_valid_size_dict({"height": 100, "width": 200})
		True
		>>> is_valid_size_dict({"shortest_edge": 100})
		True
		>>> is_valid_size_dict({"invalid_key": 100})
		False
		"""
		return isinstance(size_dict, dict) and any(set(size_dict.keys()) == keys for keys in VALID_SIZE_DICT_KEYS)


		def convert_to_size_dict(size, max_size=None, default_to_square=True, height_width_order=True):
		"""
		Convert various size input formats to a standardized size dictionary.

		:param size: Size specification as integer, tuple/list, or None
		:type size: int or tuple or list or None
		:param max_size: Optional maximum size constraint
		:type max_size: int or None
		:param default_to_square: If True, single integer creates square dimensions
		:type default_to_square: bool
		:param height_width_order: If True, tuple values are (height, width), else (width, height)
		:type height_width_order: bool

		:return: Dictionary with standardized size format
		:rtype: dict

		:raises ValueError: If size specification is invalid or incompatible with other parameters

		:examples:
		>>> convert_to_size_dict(100)
		{'height': 100, 'width': 100}
		>>> convert_to_size_dict((200, 300), height_width_order=True)
		{'height': 200, 'width': 300}
		>>> convert_to_size_dict(100, max_size=200, default_to_square=False)
		{'shortest_edge': 100, 'longest_edge': 200}
		"""
		if isinstance(size, int):
		if default_to_square:
		if max_size is not None:
		@@ -47,17 +104,34 @@ def get_size_dict(
		param_name="size",
		) -> dict:
		"""
		Converts size parameter into a standardized dictionary format.

		Args:
		size: Input size as int, tuple/list, or dict
		max_size: Optional maximum size constraint
		height_width_order: If True, tuple order is (height,width)
		default_to_square: If True, single int creates square output
		param_name: Parameter name for error messages

		Returns:
		Dictionary with standardized size format
		Convert and validate size parameters into a standardized dictionary format.

		This function serves as the main entry point for size processing, handling various
		input formats and ensuring they conform to valid size specifications.

		:param size: Size specification as integer, tuple/list, dictionary, or None
		:type size: int or tuple or list or dict or None
		:param max_size: Optional maximum size constraint
		:type max_size: int or None
		:param height_width_order: If True, tuple values are (height, width), else (width, height)
		:type height_width_order: bool
		:param default_to_square: If True, single integer creates square dimensions
		:type default_to_square: bool
		:param param_name: Parameter name for error messages
		:type param_name: str

		:return: Dictionary with standardized size format
		:rtype: dict

		:raises ValueError: If size specification is invalid or incompatible with other parameters

		:examples:
		>>> get_size_dict(100)
		{'height': 100, 'width': 100}
		>>> get_size_dict({'shortest_edge': 100})
		{'shortest_edge': 100}
		>>> get_size_dict((200, 300), height_width_order=True)
		{'height': 200, 'width': 300}
		"""
		if not isinstance(size, dict):
		size_dict = convert_to_size_dict(size, max_size, default_to_square, height_width_order)
		@@ -75,16 +149,22 @@ def _create_resize(size, resample=Image.BICUBIC):
		"""
		Create a PillowResize transform based on the given size configuration.

		:param size: Dictionary containing size configuration, either with 'shortest_edge'
		or both 'height' and 'width' keys
		This internal function creates a resize transformation that respects the specified
		size constraints while maintaining aspect ratio when appropriate.

		:param size: Dictionary containing size configuration
		:type size: dict
		:param resample: PIL resampling filter to use for resizing, defaults to Image.BICUBIC
		:param resample: PIL resampling filter to use for resizing
		:type resample: int

		:return: A PillowResize transform configured according to the size parameters
		:return: Configured resize transformation object
		:rtype: PillowResize

		:raises ValueError: If the size configuration is not recognized
		:raises ValueError: If the size configuration is invalid or not recognized

		:examples:
		>>> transform = _create_resize({'shortest_edge': 100})
		>>> transform = _create_resize({'height': 200, 'width': 300})
		"""
		size = get_size_dict(size)
		if "shortest_edge" in size:

test/preprocess/transformers/test_mobilenetv2.py

0 → 100644

+66 −0

Original line number	Diff line number	Diff line
		from unittest import skipUnless

		import numpy as np
		import pytest
		from hbutils.testing import tmatrix

		from imgutils.data import load_image
		from imgutils.preprocess.transformers import create_transforms_from_transformers
		from test.testings import get_testfile

		try:
		import transformers
		except (ImportError, ModuleNotFoundError):
		_HAS_TRANSFORMERS = False
		else:
		_HAS_TRANSFORMERS = True


		@pytest.mark.unittest
		class TestPreprocessTransformersMobilenetV2:
		@skipUnless(_HAS_TRANSFORMERS, 'Transformers required.')
		@pytest.mark.parametrize(*tmatrix({
		'repo_id': [
		'ChispiDEV/autotrain-1tqht-w0zz7',
		'elenaThevalley/mobilenet_v2_1.0_224-finetuned-prueba',
		'cannu/autotrain-4hvd9-vjy72',
		'pradanaadn/mobilenet_v2-activity-recognition',
		'Diginsa/Plant-Disease-Detection-Project',
		'jayanthspratap/mobilenet_v2_1.0_224-cxr-view',
		'sngsfydy/MyMobileNet_v2',
		'nikkopg/102623_mobilenet_v2_1.0_224-finetuned-stucktip',
		'nikkopg/102723-mobilenet_v2_1.0_224-finetuned-stucktip',
		'amiune/clasificacion-bananas',
		'KCAZAR/mi-banana-variedades',
		'aslez123/mobilenet_fashion',
		'nikkopg/102523_mobilenet_v2_1.0_224-finetuned-stucktip',
		'ChispiDEV/autotrain-pky99-ias73',
		'sngsfydy/MobileNetV2_with_Trainer_11_10_2023',

		'google/mobilenet_v2_1.0_224',
		'google/deeplabv3_mobilenet_v2_1.0_513',
		'google/mobilenet_v2_0.75_160',
		'google/mobilenet_v2_1.4_224',
		'google/mobilenet_v2_0.35_96',
		],
		'src_image': [
		'png_640.png',
		'png_640_m90.png',
		'nude_girl.png',
		'dori_640.png',
		'nian_640.png',
		]
		}))
		def test_mobilenetv2_image_preprocess_align(self, src_image, repo_id):
		from transformers import AutoImageProcessor
		image = load_image(get_testfile(src_image), mode='RGB', force_background='white')
		processor = AutoImageProcessor.from_pretrained(repo_id)

		trans = create_transforms_from_transformers(processor)

		expected_output = processor.preprocess(image)['pixel_values'][0]
		output = trans(image)
		np.testing.assert_array_almost_equal(
		output,
		expected_output,
		)