Commit f7bbf030 authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): finish docs for mobilenetv2

parent a9e66b3b
Loading
Loading
Loading
Loading
+34 −0
Original line number Diff line number Diff line
@@ -27,6 +27,26 @@ create_transforms_from_transformers



is_valid_size_dict
--------------------------------------------------------------------

.. autofunction:: is_valid_size_dict



convert_to_size_dict
--------------------------------------------------------------------

.. autofunction:: convert_to_size_dict



get_size_dict
--------------------------------------------------------------------

.. autofunction:: get_size_dict



create_clip_transforms
--------------------------------------------------------------------
@@ -113,3 +133,17 @@ create_transforms_from_blip_processor



create_mobilenetv2_transforms
--------------------------------------------------------------------

.. autofunction:: create_mobilenetv2_transforms



create_transforms_from_mobilenetv2_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_mobilenetv2_processor


+1 −0
Original line number Diff line number Diff line
@@ -13,4 +13,5 @@ from .clip import create_clip_transforms, create_transforms_from_clip_processor
from .convnext import create_convnext_transforms, create_transforms_from_convnext_processor
from .mobilenetv2 import create_mobilenetv2_transforms, create_transforms_from_mobilenetv2_processor
from .siglip import create_siglip_transforms, create_transforms_from_siglip_processor
from .size import is_valid_size_dict, convert_to_size_dict, get_size_dict
from .vit import create_vit_transforms, create_transforms_from_vit_processor
+64 −16
Original line number Diff line number Diff line
"""
MobileNetV2 transforms module for creating image transformations compatible with MobileNetV2 models.

This module provides functions to create compositions of image transforms that replicate
the behavior of the MobileNetV2ImageProcessor from the transformers library. The main
components include:

The module is designed to work with PIL images and provide compatibility with
transformer-based vision models while using native Python image processing.
"""

from typing import Dict, List, Optional, Union

from PIL import Image
@@ -24,22 +35,44 @@ def create_mobilenetv2_transforms(
        image_std: Optional[Union[float, List[float]]] = _DEFAULT,
):
    """
    Creates a composition of torchvision transforms that replicates the behavior of MobileNetV2ImageProcessor.

    Args:
        do_resize (bool, optional): Whether to resize the image. Defaults to True.
        size (Dict[str, int], optional): Size dictionary specifying resize parameters. Defaults to {"shortest_edge": 256}.
        resample (PIL.Image.Resampling, optional): Resampling filter for resizing. Defaults to PIL.Image.BILINEAR.
        do_center_crop (bool, optional): Whether to center crop the image. Defaults to True.
        crop_size (Dict[str, int], optional): Size of the center crop. Defaults to {"height": 224, "width": 224}.
        do_rescale (bool, optional): Whether to rescale pixel values. Defaults to True.
        rescale_factor (Union[int, float], optional): Factor to rescale by. Defaults to 1/255.
        do_normalize (bool, optional): Whether to normalize the image. Defaults to True.
        image_mean (Optional[Union[float, List[float]]], optional): Mean values for normalization. Defaults to IMAGENET_DEFAULT_MEAN.
        image_std (Optional[Union[float, List[float]]], optional): Std values for normalization. Defaults to IMAGENET_DEFAULT_STD.

    Returns:
        torchvision.PillowCompose: A composition of transforms matching MobileNetV2ImageProcessor behavior.
    Creates a composition of transforms that replicates the behavior of MobileNetV2ImageProcessor.

    This function builds a pipeline of image transformations typically used for MobileNetV2 models,
    including resizing, center cropping, tensor conversion, rescaling, and normalization.

    :param do_resize: Whether to resize the image.
    :type do_resize: bool

    :param size: Size dictionary specifying resize parameters. Can include keys like
                 'shortest_edge', 'height', 'width', etc.
    :type size: Optional[Dict[str, int]]

    :param resample: Resampling filter to use for resizing operations.
    :type resample: PIL.Image.Resampling

    :param do_center_crop: Whether to apply center cropping to the image.
    :type do_center_crop: bool

    :param crop_size: Dictionary specifying the height and width for center cropping.
    :type crop_size: Dict[str, int]

    :param do_rescale: Whether to rescale pixel values after tensor conversion.
    :type do_rescale: bool

    :param rescale_factor: Factor by which to rescale the image pixel values.
    :type rescale_factor: Union[int, float]

    :param do_normalize: Whether to normalize the image with mean and std.
    :type do_normalize: bool

    :param image_mean: Mean values for normalization, per channel.
    :type image_mean: Optional[Union[float, List[float]]]

    :param image_std: Standard deviation values for normalization, per channel.
    :type image_std: Optional[Union[float, List[float]]]

    :return: A composition of transforms matching MobileNetV2ImageProcessor behavior.
    :rtype: PillowCompose
    """
    transform_list = []

@@ -77,6 +110,21 @@ def create_mobilenetv2_transforms(

@register_creators_for_transformers()
def create_transforms_from_mobilenetv2_processor(processor):
    """
    Creates transform composition from a MobileNetV2ImageProcessor instance.

    This function extracts configuration from a transformers MobileNetV2ImageProcessor
    and creates an equivalent transform pipeline using the create_mobilenetv2_transforms
    function.

    :param processor: A MobileNetV2ImageProcessor instance from the transformers library.
    :type processor: transformers.MobileNetV2ImageProcessor

    :return: A composition of transforms matching the processor's configuration.
    :rtype: PillowCompose

    :raises NotProcessorTypeError: If the provided processor is not a MobileNetV2ImageProcessor.
    """
    _check_transformers()
    from transformers import MobileNetV2ImageProcessor

+96 −16
Original line number Diff line number Diff line
"""
Image resizing configuration and processing module.

This module provides utilities for handling and standardizing image size specifications
in various formats. It supports multiple ways to specify image sizes including fixed dimensions,
aspect ratio preservation, and maximum size constraints.

Size dictionary formats supported:
    - {"height": h, "width": w} : Exact dimensions
    - {"shortest_edge": s} : Preserve aspect ratio with given shortest edge
    - {"shortest_edge": s, "longest_edge": l} : Constrain both edges
    - {"longest_edge": l} : Maximum size while preserving aspect ratio
    - {"max_height": h, "max_width": w} : Independent height/width constraints
"""

from PIL import Image

from ..pillow import PillowResize
@@ -12,10 +27,52 @@ VALID_SIZE_DICT_KEYS = (


def is_valid_size_dict(size_dict):
    """
    Validate if a dictionary contains valid image size specifications.

    :param size_dict: Dictionary to validate
    :type size_dict: dict

    :return: True if the dictionary contains valid size specifications, False otherwise
    :rtype: bool

    :examples:
        >>> is_valid_size_dict({"height": 100, "width": 200})
        True
        >>> is_valid_size_dict({"shortest_edge": 100})
        True
        >>> is_valid_size_dict({"invalid_key": 100})
        False
    """
    return isinstance(size_dict, dict) and any(set(size_dict.keys()) == keys for keys in VALID_SIZE_DICT_KEYS)


def convert_to_size_dict(size, max_size=None, default_to_square=True, height_width_order=True):
    """
    Convert various size input formats to a standardized size dictionary.

    :param size: Size specification as integer, tuple/list, or None
    :type size: int or tuple or list or None
    :param max_size: Optional maximum size constraint
    :type max_size: int or None
    :param default_to_square: If True, single integer creates square dimensions
    :type default_to_square: bool
    :param height_width_order: If True, tuple values are (height, width), else (width, height)
    :type height_width_order: bool

    :return: Dictionary with standardized size format
    :rtype: dict

    :raises ValueError: If size specification is invalid or incompatible with other parameters

    :examples:
        >>> convert_to_size_dict(100)
        {'height': 100, 'width': 100}
        >>> convert_to_size_dict((200, 300), height_width_order=True)
        {'height': 200, 'width': 300}
        >>> convert_to_size_dict(100, max_size=200, default_to_square=False)
        {'shortest_edge': 100, 'longest_edge': 200}
    """
    if isinstance(size, int):
        if default_to_square:
            if max_size is not None:
@@ -47,17 +104,34 @@ def get_size_dict(
        param_name="size",
) -> dict:
    """
    Converts size parameter into a standardized dictionary format.

    Args:
        size: Input size as int, tuple/list, or dict
        max_size: Optional maximum size constraint
        height_width_order: If True, tuple order is (height,width)
        default_to_square: If True, single int creates square output
        param_name: Parameter name for error messages

    Returns:
        Dictionary with standardized size format
    Convert and validate size parameters into a standardized dictionary format.

    This function serves as the main entry point for size processing, handling various
    input formats and ensuring they conform to valid size specifications.

    :param size: Size specification as integer, tuple/list, dictionary, or None
    :type size: int or tuple or list or dict or None
    :param max_size: Optional maximum size constraint
    :type max_size: int or None
    :param height_width_order: If True, tuple values are (height, width), else (width, height)
    :type height_width_order: bool
    :param default_to_square: If True, single integer creates square dimensions
    :type default_to_square: bool
    :param param_name: Parameter name for error messages
    :type param_name: str

    :return: Dictionary with standardized size format
    :rtype: dict

    :raises ValueError: If size specification is invalid or incompatible with other parameters

    :examples:
        >>> get_size_dict(100)
        {'height': 100, 'width': 100}
        >>> get_size_dict({'shortest_edge': 100})
        {'shortest_edge': 100}
        >>> get_size_dict((200, 300), height_width_order=True)
        {'height': 200, 'width': 300}
    """
    if not isinstance(size, dict):
        size_dict = convert_to_size_dict(size, max_size, default_to_square, height_width_order)
@@ -75,16 +149,22 @@ def _create_resize(size, resample=Image.BICUBIC):
    """
    Create a PillowResize transform based on the given size configuration.

    :param size: Dictionary containing size configuration, either with 'shortest_edge'
                or both 'height' and 'width' keys
    This internal function creates a resize transformation that respects the specified
    size constraints while maintaining aspect ratio when appropriate.

    :param size: Dictionary containing size configuration
    :type size: dict
    :param resample: PIL resampling filter to use for resizing, defaults to Image.BICUBIC
    :param resample: PIL resampling filter to use for resizing
    :type resample: int

    :return: A PillowResize transform configured according to the size parameters
    :return: Configured resize transformation object
    :rtype: PillowResize

    :raises ValueError: If the size configuration is not recognized
    :raises ValueError: If the size configuration is invalid or not recognized

    :examples:
        >>> transform = _create_resize({'shortest_edge': 100})
        >>> transform = _create_resize({'height': 200, 'width': 300})
    """
    size = get_size_dict(size)
    if "shortest_edge" in size:
+66 −0
Original line number Diff line number Diff line
from unittest import skipUnless

import numpy as np
import pytest
from hbutils.testing import tmatrix

from imgutils.data import load_image
from imgutils.preprocess.transformers import create_transforms_from_transformers
from test.testings import get_testfile

try:
    import transformers
except (ImportError, ModuleNotFoundError):
    _HAS_TRANSFORMERS = False
else:
    _HAS_TRANSFORMERS = True


@pytest.mark.unittest
class TestPreprocessTransformersMobilenetV2:
    @skipUnless(_HAS_TRANSFORMERS, 'Transformers required.')
    @pytest.mark.parametrize(*tmatrix({
        'repo_id': [
            'ChispiDEV/autotrain-1tqht-w0zz7',
            'elenaThevalley/mobilenet_v2_1.0_224-finetuned-prueba',
            'cannu/autotrain-4hvd9-vjy72',
            'pradanaadn/mobilenet_v2-activity-recognition',
            'Diginsa/Plant-Disease-Detection-Project',
            'jayanthspratap/mobilenet_v2_1.0_224-cxr-view',
            'sngsfydy/MyMobileNet_v2',
            'nikkopg/102623_mobilenet_v2_1.0_224-finetuned-stucktip',
            'nikkopg/102723-mobilenet_v2_1.0_224-finetuned-stucktip',
            'amiune/clasificacion-bananas',
            'KCAZAR/mi-banana-variedades',
            'aslez123/mobilenet_fashion',
            'nikkopg/102523_mobilenet_v2_1.0_224-finetuned-stucktip',
            'ChispiDEV/autotrain-pky99-ias73',
            'sngsfydy/MobileNetV2_with_Trainer_11_10_2023',

            'google/mobilenet_v2_1.0_224',
            'google/deeplabv3_mobilenet_v2_1.0_513',
            'google/mobilenet_v2_0.75_160',
            'google/mobilenet_v2_1.4_224',
            'google/mobilenet_v2_0.35_96',
        ],
        'src_image': [
            'png_640.png',
            'png_640_m90.png',
            'nude_girl.png',
            'dori_640.png',
            'nian_640.png',
        ]
    }))
    def test_mobilenetv2_image_preprocess_align(self, src_image, repo_id):
        from transformers import AutoImageProcessor
        image = load_image(get_testfile(src_image), mode='RGB', force_background='white')
        processor = AutoImageProcessor.from_pretrained(repo_id)

        trans = create_transforms_from_transformers(processor)

        expected_output = processor.preprocess(image)['pixel_values'][0]
        output = trans(image)
        np.testing.assert_array_almost_equal(
            output,
            expected_output,
        )
Loading