Loading docs/source/api_doc/preprocess/transformers.rst +34 −0 Original line number Diff line number Diff line Loading @@ -27,6 +27,26 @@ create_transforms_from_transformers is_valid_size_dict -------------------------------------------------------------------- .. autofunction:: is_valid_size_dict convert_to_size_dict -------------------------------------------------------------------- .. autofunction:: convert_to_size_dict get_size_dict -------------------------------------------------------------------- .. autofunction:: get_size_dict create_clip_transforms -------------------------------------------------------------------- Loading Loading @@ -113,3 +133,17 @@ create_transforms_from_blip_processor create_mobilenetv2_transforms -------------------------------------------------------------------- .. autofunction:: create_mobilenetv2_transforms create_transforms_from_mobilenetv2_processor -------------------------------------------------------------------- .. autofunction:: create_transforms_from_mobilenetv2_processor imgutils/preprocess/transformers/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -13,4 +13,5 @@ from .clip import create_clip_transforms, create_transforms_from_clip_processor from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor from .mobilenetv2 import create_mobilenetv2_transforms, create_transforms_from_mobilenetv2_processor from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor from .size import is_valid_size_dict, convert_to_size_dict, get_size_dict from .vit import create_vit_transforms, create_transforms_from_vit_processor imgutils/preprocess/transformers/mobilenetv2.py +64 −16 Original line number Diff line number Diff line """ MobileNetV2 transforms module for creating image transformations compatible with MobileNetV2 models. This module provides functions to create compositions of image transforms that replicate the behavior of the MobileNetV2ImageProcessor from the transformers library. The main components include: The module is designed to work with PIL images and provide compatibility with transformer-based vision models while using native Python image processing. """ from typing import Dict, List, Optional, Union from PIL import Image Loading @@ -24,22 +35,44 @@ def create_mobilenetv2_transforms( image_std: Optional[Union[float, List[float]]] = _DEFAULT, ): """ Creates a composition of torchvision transforms that replicates the behavior of MobileNetV2ImageProcessor. Args: do_resize (bool, optional): Whether to resize the image. Defaults to True. size (Dict[str, int], optional): Size dictionary specifying resize parameters. Defaults to {"shortest_edge": 256}. resample (PIL.Image.Resampling, optional): Resampling filter for resizing. Defaults to PIL.Image.BILINEAR. do_center_crop (bool, optional): Whether to center crop the image. Defaults to True. crop_size (Dict[str, int], optional): Size of the center crop. Defaults to {"height": 224, "width": 224}. do_rescale (bool, optional): Whether to rescale pixel values. Defaults to True. rescale_factor (Union[int, float], optional): Factor to rescale by. Defaults to 1/255. do_normalize (bool, optional): Whether to normalize the image. Defaults to True. image_mean (Optional[Union[float, List[float]]], optional): Mean values for normalization. Defaults to IMAGENET_DEFAULT_MEAN. image_std (Optional[Union[float, List[float]]], optional): Std values for normalization. Defaults to IMAGENET_DEFAULT_STD. Returns: torchvision.PillowCompose: A composition of transforms matching MobileNetV2ImageProcessor behavior. Creates a composition of transforms that replicates the behavior of MobileNetV2ImageProcessor. This function builds a pipeline of image transformations typically used for MobileNetV2 models, including resizing, center cropping, tensor conversion, rescaling, and normalization. :param do_resize: Whether to resize the image. :type do_resize: bool :param size: Size dictionary specifying resize parameters. Can include keys like 'shortest_edge', 'height', 'width', etc. :type size: Optional[Dict[str, int]] :param resample: Resampling filter to use for resizing operations. :type resample: PIL.Image.Resampling :param do_center_crop: Whether to apply center cropping to the image. :type do_center_crop: bool :param crop_size: Dictionary specifying the height and width for center cropping. :type crop_size: Dict[str, int] :param do_rescale: Whether to rescale pixel values after tensor conversion. :type do_rescale: bool :param rescale_factor: Factor by which to rescale the image pixel values. :type rescale_factor: Union[int, float] :param do_normalize: Whether to normalize the image with mean and std. :type do_normalize: bool :param image_mean: Mean values for normalization, per channel. :type image_mean: Optional[Union[float, List[float]]] :param image_std: Standard deviation values for normalization, per channel. :type image_std: Optional[Union[float, List[float]]] :return: A composition of transforms matching MobileNetV2ImageProcessor behavior. :rtype: PillowCompose """ transform_list = [] Loading Loading @@ -77,6 +110,21 @@ def create_mobilenetv2_transforms( @register_creators_for_transformers() def create_transforms_from_mobilenetv2_processor(processor): """ Creates transform composition from a MobileNetV2ImageProcessor instance. This function extracts configuration from a transformers MobileNetV2ImageProcessor and creates an equivalent transform pipeline using the create_mobilenetv2_transforms function. :param processor: A MobileNetV2ImageProcessor instance from the transformers library. :type processor: transformers.MobileNetV2ImageProcessor :return: A composition of transforms matching the processor's configuration. :rtype: PillowCompose :raises NotProcessorTypeError: If the provided processor is not a MobileNetV2ImageProcessor. """ _check_transformers() from transformers import MobileNetV2ImageProcessor Loading imgutils/preprocess/transformers/size.py +96 −16 Original line number Diff line number Diff line """ Image resizing configuration and processing module. This module provides utilities for handling and standardizing image size specifications in various formats. It supports multiple ways to specify image sizes including fixed dimensions, aspect ratio preservation, and maximum size constraints. Size dictionary formats supported: - {"height": h, "width": w} : Exact dimensions - {"shortest_edge": s} : Preserve aspect ratio with given shortest edge - {"shortest_edge": s, "longest_edge": l} : Constrain both edges - {"longest_edge": l} : Maximum size while preserving aspect ratio - {"max_height": h, "max_width": w} : Independent height/width constraints """ from PIL import Image from ..pillow import PillowResize Loading @@ -12,10 +27,52 @@ VALID_SIZE_DICT_KEYS = ( def is_valid_size_dict(size_dict): """ Validate if a dictionary contains valid image size specifications. :param size_dict: Dictionary to validate :type size_dict: dict :return: True if the dictionary contains valid size specifications, False otherwise :rtype: bool :examples: >>> is_valid_size_dict({"height": 100, "width": 200}) True >>> is_valid_size_dict({"shortest_edge": 100}) True >>> is_valid_size_dict({"invalid_key": 100}) False """ return isinstance(size_dict, dict) and any(set(size_dict.keys()) == keys for keys in VALID_SIZE_DICT_KEYS) def convert_to_size_dict(size, max_size=None, default_to_square=True, height_width_order=True): """ Convert various size input formats to a standardized size dictionary. :param size: Size specification as integer, tuple/list, or None :type size: int or tuple or list or None :param max_size: Optional maximum size constraint :type max_size: int or None :param default_to_square: If True, single integer creates square dimensions :type default_to_square: bool :param height_width_order: If True, tuple values are (height, width), else (width, height) :type height_width_order: bool :return: Dictionary with standardized size format :rtype: dict :raises ValueError: If size specification is invalid or incompatible with other parameters :examples: >>> convert_to_size_dict(100) {'height': 100, 'width': 100} >>> convert_to_size_dict((200, 300), height_width_order=True) {'height': 200, 'width': 300} >>> convert_to_size_dict(100, max_size=200, default_to_square=False) {'shortest_edge': 100, 'longest_edge': 200} """ if isinstance(size, int): if default_to_square: if max_size is not None: Loading Loading @@ -47,17 +104,34 @@ def get_size_dict( param_name="size", ) -> dict: """ Converts size parameter into a standardized dictionary format. Args: size: Input size as int, tuple/list, or dict max_size: Optional maximum size constraint height_width_order: If True, tuple order is (height,width) default_to_square: If True, single int creates square output param_name: Parameter name for error messages Returns: Dictionary with standardized size format Convert and validate size parameters into a standardized dictionary format. This function serves as the main entry point for size processing, handling various input formats and ensuring they conform to valid size specifications. :param size: Size specification as integer, tuple/list, dictionary, or None :type size: int or tuple or list or dict or None :param max_size: Optional maximum size constraint :type max_size: int or None :param height_width_order: If True, tuple values are (height, width), else (width, height) :type height_width_order: bool :param default_to_square: If True, single integer creates square dimensions :type default_to_square: bool :param param_name: Parameter name for error messages :type param_name: str :return: Dictionary with standardized size format :rtype: dict :raises ValueError: If size specification is invalid or incompatible with other parameters :examples: >>> get_size_dict(100) {'height': 100, 'width': 100} >>> get_size_dict({'shortest_edge': 100}) {'shortest_edge': 100} >>> get_size_dict((200, 300), height_width_order=True) {'height': 200, 'width': 300} """ if not isinstance(size, dict): size_dict = convert_to_size_dict(size, max_size, default_to_square, height_width_order) Loading @@ -75,16 +149,22 @@ def _create_resize(size, resample=Image.BICUBIC): """ Create a PillowResize transform based on the given size configuration. :param size: Dictionary containing size configuration, either with 'shortest_edge' or both 'height' and 'width' keys This internal function creates a resize transformation that respects the specified size constraints while maintaining aspect ratio when appropriate. :param size: Dictionary containing size configuration :type size: dict :param resample: PIL resampling filter to use for resizing, defaults to Image.BICUBIC :param resample: PIL resampling filter to use for resizing :type resample: int :return: A PillowResize transform configured according to the size parameters :return: Configured resize transformation object :rtype: PillowResize :raises ValueError: If the size configuration is not recognized :raises ValueError: If the size configuration is invalid or not recognized :examples: >>> transform = _create_resize({'shortest_edge': 100}) >>> transform = _create_resize({'height': 200, 'width': 300}) """ size = get_size_dict(size) if "shortest_edge" in size: Loading test/preprocess/transformers/test_mobilenetv2.py 0 → 100644 +66 −0 Original line number Diff line number Diff line from unittest import skipUnless import numpy as np import pytest from hbutils.testing import tmatrix from imgutils.data import load_image from imgutils.preprocess.transformers import create_transforms_from_transformers from test.testings import get_testfile try: import transformers except (ImportError, ModuleNotFoundError): _HAS_TRANSFORMERS = False else: _HAS_TRANSFORMERS = True @pytest.mark.unittest class TestPreprocessTransformersMobilenetV2: @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.') @pytest.mark.parametrize(*tmatrix({ 'repo_id': [ 'ChispiDEV/autotrain-1tqht-w0zz7', 'elenaThevalley/mobilenet_v2_1.0_224-finetuned-prueba', 'cannu/autotrain-4hvd9-vjy72', 'pradanaadn/mobilenet_v2-activity-recognition', 'Diginsa/Plant-Disease-Detection-Project', 'jayanthspratap/mobilenet_v2_1.0_224-cxr-view', 'sngsfydy/MyMobileNet_v2', 'nikkopg/102623_mobilenet_v2_1.0_224-finetuned-stucktip', 'nikkopg/102723-mobilenet_v2_1.0_224-finetuned-stucktip', 'amiune/clasificacion-bananas', 'KCAZAR/mi-banana-variedades', 'aslez123/mobilenet_fashion', 'nikkopg/102523_mobilenet_v2_1.0_224-finetuned-stucktip', 'ChispiDEV/autotrain-pky99-ias73', 'sngsfydy/MobileNetV2_with_Trainer_11_10_2023', 'google/mobilenet_v2_1.0_224', 'google/deeplabv3_mobilenet_v2_1.0_513', 'google/mobilenet_v2_0.75_160', 'google/mobilenet_v2_1.4_224', 'google/mobilenet_v2_0.35_96', ], 'src_image': [ 'png_640.png', 'png_640_m90.png', 'nude_girl.png', 'dori_640.png', 'nian_640.png', ] })) def test_mobilenetv2_image_preprocess_align(self, src_image, repo_id): from transformers import AutoImageProcessor image = load_image(get_testfile(src_image), mode='RGB', force_background='white') processor = AutoImageProcessor.from_pretrained(repo_id) trans = create_transforms_from_transformers(processor) expected_output = processor.preprocess(image)['pixel_values'][0] output = trans(image) np.testing.assert_array_almost_equal( output, expected_output, ) Loading
docs/source/api_doc/preprocess/transformers.rst +34 −0 Original line number Diff line number Diff line Loading @@ -27,6 +27,26 @@ create_transforms_from_transformers is_valid_size_dict -------------------------------------------------------------------- .. autofunction:: is_valid_size_dict convert_to_size_dict -------------------------------------------------------------------- .. autofunction:: convert_to_size_dict get_size_dict -------------------------------------------------------------------- .. autofunction:: get_size_dict create_clip_transforms -------------------------------------------------------------------- Loading Loading @@ -113,3 +133,17 @@ create_transforms_from_blip_processor create_mobilenetv2_transforms -------------------------------------------------------------------- .. autofunction:: create_mobilenetv2_transforms create_transforms_from_mobilenetv2_processor -------------------------------------------------------------------- .. autofunction:: create_transforms_from_mobilenetv2_processor
imgutils/preprocess/transformers/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -13,4 +13,5 @@ from .clip import create_clip_transforms, create_transforms_from_clip_processor from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor from .mobilenetv2 import create_mobilenetv2_transforms, create_transforms_from_mobilenetv2_processor from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor from .size import is_valid_size_dict, convert_to_size_dict, get_size_dict from .vit import create_vit_transforms, create_transforms_from_vit_processor
imgutils/preprocess/transformers/mobilenetv2.py +64 −16 Original line number Diff line number Diff line """ MobileNetV2 transforms module for creating image transformations compatible with MobileNetV2 models. This module provides functions to create compositions of image transforms that replicate the behavior of the MobileNetV2ImageProcessor from the transformers library. The main components include: The module is designed to work with PIL images and provide compatibility with transformer-based vision models while using native Python image processing. """ from typing import Dict, List, Optional, Union from PIL import Image Loading @@ -24,22 +35,44 @@ def create_mobilenetv2_transforms( image_std: Optional[Union[float, List[float]]] = _DEFAULT, ): """ Creates a composition of torchvision transforms that replicates the behavior of MobileNetV2ImageProcessor. Args: do_resize (bool, optional): Whether to resize the image. Defaults to True. size (Dict[str, int], optional): Size dictionary specifying resize parameters. Defaults to {"shortest_edge": 256}. resample (PIL.Image.Resampling, optional): Resampling filter for resizing. Defaults to PIL.Image.BILINEAR. do_center_crop (bool, optional): Whether to center crop the image. Defaults to True. crop_size (Dict[str, int], optional): Size of the center crop. Defaults to {"height": 224, "width": 224}. do_rescale (bool, optional): Whether to rescale pixel values. Defaults to True. rescale_factor (Union[int, float], optional): Factor to rescale by. Defaults to 1/255. do_normalize (bool, optional): Whether to normalize the image. Defaults to True. image_mean (Optional[Union[float, List[float]]], optional): Mean values for normalization. Defaults to IMAGENET_DEFAULT_MEAN. image_std (Optional[Union[float, List[float]]], optional): Std values for normalization. Defaults to IMAGENET_DEFAULT_STD. Returns: torchvision.PillowCompose: A composition of transforms matching MobileNetV2ImageProcessor behavior. Creates a composition of transforms that replicates the behavior of MobileNetV2ImageProcessor. This function builds a pipeline of image transformations typically used for MobileNetV2 models, including resizing, center cropping, tensor conversion, rescaling, and normalization. :param do_resize: Whether to resize the image. :type do_resize: bool :param size: Size dictionary specifying resize parameters. Can include keys like 'shortest_edge', 'height', 'width', etc. :type size: Optional[Dict[str, int]] :param resample: Resampling filter to use for resizing operations. :type resample: PIL.Image.Resampling :param do_center_crop: Whether to apply center cropping to the image. :type do_center_crop: bool :param crop_size: Dictionary specifying the height and width for center cropping. :type crop_size: Dict[str, int] :param do_rescale: Whether to rescale pixel values after tensor conversion. :type do_rescale: bool :param rescale_factor: Factor by which to rescale the image pixel values. :type rescale_factor: Union[int, float] :param do_normalize: Whether to normalize the image with mean and std. :type do_normalize: bool :param image_mean: Mean values for normalization, per channel. :type image_mean: Optional[Union[float, List[float]]] :param image_std: Standard deviation values for normalization, per channel. :type image_std: Optional[Union[float, List[float]]] :return: A composition of transforms matching MobileNetV2ImageProcessor behavior. :rtype: PillowCompose """ transform_list = [] Loading Loading @@ -77,6 +110,21 @@ def create_mobilenetv2_transforms( @register_creators_for_transformers() def create_transforms_from_mobilenetv2_processor(processor): """ Creates transform composition from a MobileNetV2ImageProcessor instance. This function extracts configuration from a transformers MobileNetV2ImageProcessor and creates an equivalent transform pipeline using the create_mobilenetv2_transforms function. :param processor: A MobileNetV2ImageProcessor instance from the transformers library. :type processor: transformers.MobileNetV2ImageProcessor :return: A composition of transforms matching the processor's configuration. :rtype: PillowCompose :raises NotProcessorTypeError: If the provided processor is not a MobileNetV2ImageProcessor. """ _check_transformers() from transformers import MobileNetV2ImageProcessor Loading
imgutils/preprocess/transformers/size.py +96 −16 Original line number Diff line number Diff line """ Image resizing configuration and processing module. This module provides utilities for handling and standardizing image size specifications in various formats. It supports multiple ways to specify image sizes including fixed dimensions, aspect ratio preservation, and maximum size constraints. Size dictionary formats supported: - {"height": h, "width": w} : Exact dimensions - {"shortest_edge": s} : Preserve aspect ratio with given shortest edge - {"shortest_edge": s, "longest_edge": l} : Constrain both edges - {"longest_edge": l} : Maximum size while preserving aspect ratio - {"max_height": h, "max_width": w} : Independent height/width constraints """ from PIL import Image from ..pillow import PillowResize Loading @@ -12,10 +27,52 @@ VALID_SIZE_DICT_KEYS = ( def is_valid_size_dict(size_dict): """ Validate if a dictionary contains valid image size specifications. :param size_dict: Dictionary to validate :type size_dict: dict :return: True if the dictionary contains valid size specifications, False otherwise :rtype: bool :examples: >>> is_valid_size_dict({"height": 100, "width": 200}) True >>> is_valid_size_dict({"shortest_edge": 100}) True >>> is_valid_size_dict({"invalid_key": 100}) False """ return isinstance(size_dict, dict) and any(set(size_dict.keys()) == keys for keys in VALID_SIZE_DICT_KEYS) def convert_to_size_dict(size, max_size=None, default_to_square=True, height_width_order=True): """ Convert various size input formats to a standardized size dictionary. :param size: Size specification as integer, tuple/list, or None :type size: int or tuple or list or None :param max_size: Optional maximum size constraint :type max_size: int or None :param default_to_square: If True, single integer creates square dimensions :type default_to_square: bool :param height_width_order: If True, tuple values are (height, width), else (width, height) :type height_width_order: bool :return: Dictionary with standardized size format :rtype: dict :raises ValueError: If size specification is invalid or incompatible with other parameters :examples: >>> convert_to_size_dict(100) {'height': 100, 'width': 100} >>> convert_to_size_dict((200, 300), height_width_order=True) {'height': 200, 'width': 300} >>> convert_to_size_dict(100, max_size=200, default_to_square=False) {'shortest_edge': 100, 'longest_edge': 200} """ if isinstance(size, int): if default_to_square: if max_size is not None: Loading Loading @@ -47,17 +104,34 @@ def get_size_dict( param_name="size", ) -> dict: """ Converts size parameter into a standardized dictionary format. Args: size: Input size as int, tuple/list, or dict max_size: Optional maximum size constraint height_width_order: If True, tuple order is (height,width) default_to_square: If True, single int creates square output param_name: Parameter name for error messages Returns: Dictionary with standardized size format Convert and validate size parameters into a standardized dictionary format. This function serves as the main entry point for size processing, handling various input formats and ensuring they conform to valid size specifications. :param size: Size specification as integer, tuple/list, dictionary, or None :type size: int or tuple or list or dict or None :param max_size: Optional maximum size constraint :type max_size: int or None :param height_width_order: If True, tuple values are (height, width), else (width, height) :type height_width_order: bool :param default_to_square: If True, single integer creates square dimensions :type default_to_square: bool :param param_name: Parameter name for error messages :type param_name: str :return: Dictionary with standardized size format :rtype: dict :raises ValueError: If size specification is invalid or incompatible with other parameters :examples: >>> get_size_dict(100) {'height': 100, 'width': 100} >>> get_size_dict({'shortest_edge': 100}) {'shortest_edge': 100} >>> get_size_dict((200, 300), height_width_order=True) {'height': 200, 'width': 300} """ if not isinstance(size, dict): size_dict = convert_to_size_dict(size, max_size, default_to_square, height_width_order) Loading @@ -75,16 +149,22 @@ def _create_resize(size, resample=Image.BICUBIC): """ Create a PillowResize transform based on the given size configuration. :param size: Dictionary containing size configuration, either with 'shortest_edge' or both 'height' and 'width' keys This internal function creates a resize transformation that respects the specified size constraints while maintaining aspect ratio when appropriate. :param size: Dictionary containing size configuration :type size: dict :param resample: PIL resampling filter to use for resizing, defaults to Image.BICUBIC :param resample: PIL resampling filter to use for resizing :type resample: int :return: A PillowResize transform configured according to the size parameters :return: Configured resize transformation object :rtype: PillowResize :raises ValueError: If the size configuration is not recognized :raises ValueError: If the size configuration is invalid or not recognized :examples: >>> transform = _create_resize({'shortest_edge': 100}) >>> transform = _create_resize({'height': 200, 'width': 300}) """ size = get_size_dict(size) if "shortest_edge" in size: Loading
test/preprocess/transformers/test_mobilenetv2.py 0 → 100644 +66 −0 Original line number Diff line number Diff line from unittest import skipUnless import numpy as np import pytest from hbutils.testing import tmatrix from imgutils.data import load_image from imgutils.preprocess.transformers import create_transforms_from_transformers from test.testings import get_testfile try: import transformers except (ImportError, ModuleNotFoundError): _HAS_TRANSFORMERS = False else: _HAS_TRANSFORMERS = True @pytest.mark.unittest class TestPreprocessTransformersMobilenetV2: @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.') @pytest.mark.parametrize(*tmatrix({ 'repo_id': [ 'ChispiDEV/autotrain-1tqht-w0zz7', 'elenaThevalley/mobilenet_v2_1.0_224-finetuned-prueba', 'cannu/autotrain-4hvd9-vjy72', 'pradanaadn/mobilenet_v2-activity-recognition', 'Diginsa/Plant-Disease-Detection-Project', 'jayanthspratap/mobilenet_v2_1.0_224-cxr-view', 'sngsfydy/MyMobileNet_v2', 'nikkopg/102623_mobilenet_v2_1.0_224-finetuned-stucktip', 'nikkopg/102723-mobilenet_v2_1.0_224-finetuned-stucktip', 'amiune/clasificacion-bananas', 'KCAZAR/mi-banana-variedades', 'aslez123/mobilenet_fashion', 'nikkopg/102523_mobilenet_v2_1.0_224-finetuned-stucktip', 'ChispiDEV/autotrain-pky99-ias73', 'sngsfydy/MobileNetV2_with_Trainer_11_10_2023', 'google/mobilenet_v2_1.0_224', 'google/deeplabv3_mobilenet_v2_1.0_513', 'google/mobilenet_v2_0.75_160', 'google/mobilenet_v2_1.4_224', 'google/mobilenet_v2_0.35_96', ], 'src_image': [ 'png_640.png', 'png_640_m90.png', 'nude_girl.png', 'dori_640.png', 'nian_640.png', ] })) def test_mobilenetv2_image_preprocess_align(self, src_image, repo_id): from transformers import AutoImageProcessor image = load_image(get_testfile(src_image), mode='RGB', force_background='white') processor = AutoImageProcessor.from_pretrained(repo_id) trans = create_transforms_from_transformers(processor) expected_output = processor.preprocess(image)['pixel_values'][0] output = trans(image) np.testing.assert_array_almost_equal( output, expected_output, )