Loading imgutils/detect/visual.py +69 −25 Original line number Diff line number Diff line """ Overview: Visualize the detection results. This module provides functionality for visualizing object detection results on images. It includes tools for drawing bounding boxes, labels, and confidence scores on detected objects. The main function :func:`detection_visualize` can be used to visualize detection results from various object detection models, with customizable appearance settings like font size, padding, and label visibility. See :func:`imgutils.detect.head.detect_heads` and :func:`imgutils.detect.person.detect_person` for examples. """ import math from typing import List, Tuple, Optional from PIL import ImageFont, ImageDraw Loading @@ -13,6 +19,17 @@ from imgutils.data import ImageTyping, load_image def _try_get_font_from_matplotlib(fp=None, fontsize: int = 12): """ Attempt to get a font from matplotlib for text rendering. :param fp: Font properties object or None. If None, uses default sans-serif font. :type fp: matplotlib.font_manager.FontProperties or None :param fontsize: Size of the font to be used. :type fontsize: int :return: A PIL ImageFont object if matplotlib is available, None otherwise. :rtype: PIL.ImageFont.FreeTypeFont or None """ try: # noinspection PyPackageRequirements import matplotlib Loading @@ -27,49 +44,76 @@ def _try_get_font_from_matplotlib(fp=None, fontsize: int = 12): def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]], labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12, fp=None, no_label: bool = False): max_short_edge_size: Optional[int] = None, fp=None, no_label: bool = False): """ Overview: Visualize the results of the object detection. :param image: Image be detected. :param detection: The detection results list, each item includes the detected area `(x0, y0, x1, y1)`, the target type (always `head`) and the target confidence score. :param labels: An array of known labels. If not provided, the labels will be automatically detected from the given ``detection``. :param text_padding: Text padding of the labels. Default is ``6``. :param fontsize: Font size of the labels. At runtime, an attempt will be made to retrieve the font used for rendering from `matplotlib`. Therefore, if `matplotlib` is not installed, only the default pixel font provided with `Pillow` can be used, and the font size cannot be changed. :param no_label: Do not show labels. Default is ``False``. :return: A `PIL` image with the same size as the provided image `image`, which contains the original image content as well as the visualized bounding boxes. Visualize object detection results by drawing bounding boxes and labels on an image. :param image: Input image to visualize detections on. Can be a PIL Image, numpy array, or path to image file. :type image: ImageTyping :param detection: List of detection results, each containing ((x0, y0, x1, y1), label, confidence_score). Coordinates should be in pixels, not normalized. :type detection: List[Tuple[Tuple[float, float, float, float], str, float]] :param labels: List of predefined labels. If None, labels will be extracted from detection results. :type labels: Optional[List[str]] :param text_padding: Padding around label text in pixels. :type text_padding: int :param fontsize: Font size for label text. :type fontsize: int :param max_short_edge_size: Maximum size of shortest image edge. If specified, image will be resized while maintaining aspect ratio. :type max_short_edge_size: Optional[int] :param fp: Font properties for matplotlib font. Only used if matplotlib is available. :type fp: matplotlib.font_manager.FontProperties or None :param no_label: If True, suppresses drawing of labels. :type no_label: bool :return: PIL Image with visualized detection results. :rtype: PIL.Image.Image Examples:: >>> from imgutils.detect import detect_heads, detection_visualize >>> >>> image = load_image("path/to/image.jpg") >>> detections = detect_heads(image) >>> visualized = detection_visualize(image, detections) >>> visualized.save("output.png") See :func:`imgutils.detect.head.detect_heads` and :func:`imgutils.detect.person.detect_person` for examples. """ image = load_image(image, force_background=None, mode='RGBA') original_width, original_height = image.width, image.height if max_short_edge_size is not None and max_short_edge_size < min(original_height, original_width): r = max_short_edge_size / min(original_height, original_width) new_width = int(math.ceil(original_width * r)) new_height = int(math.ceil(original_height * r)) else: new_width, new_height = original_width, original_height visual_image = image.copy() if (new_width, new_height) != (original_width, original_height): visual_image = visual_image.resize((new_width, new_height)) draw = ImageDraw.Draw(visual_image, mode='RGBA') font = _try_get_font_from_matplotlib(fp, fontsize) or ImageFont.load_default() labels = sorted(labels or {label for _, label, _ in detection}) _colors = list(map(str, rnd_colors(len(labels)))) _color_map = dict(zip(labels, _colors)) for _, ((xmin, ymin, xmax, ymax), label, score) in sorted(enumerate(detection), key=lambda x: (x[1][2], x[0])): for _, ((x0, y0, x1, y1), label, score) in sorted(enumerate(detection), key=lambda x: (x[1][2], x[0])): x0, y0 = int(x0 * new_width / original_width), int(y0 * new_height / original_height) x1, y1 = int(x1 * new_width / original_width), int(y1 * new_height / original_height) box_color = _color_map[label] draw.rectangle((xmin, ymin, xmax, ymax), outline=box_color, width=2) draw.rectangle((x0, y0, x1, y1), outline=box_color, width=2) if not no_label: label_text = f'{label}: {score * 100:.2f}%' _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((xmin, ymin), label_text, font=font) _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((x0, y0), label_text, font=font) _t_width, _t_height = _t_x1 - _t_x0, _t_y1 - _t_y0 if ymin - _t_height - text_padding < 0: _t_text_rect = (xmin, ymin, xmin + _t_width + text_padding * 2, ymin + _t_height + text_padding * 2) _t_text_co = (xmin + text_padding, ymin + text_padding) if y0 - _t_height - text_padding < 0: _t_text_rect = (x0, y0, x0 + _t_width + text_padding * 2, y0 + _t_height + text_padding * 2) _t_text_co = (x0 + text_padding, y0 + text_padding) else: _t_text_rect = (xmin, ymin - _t_height - text_padding * 2, xmin + _t_width + text_padding * 2, ymin) _t_text_co = (xmin + text_padding, ymin - _t_height - text_padding) _t_text_rect = (x0, y0 - _t_height - text_padding * 2, x0 + _t_width + text_padding * 2, y0) _t_text_co = (x0 + text_padding, y0 - _t_height - text_padding) draw.rectangle(_t_text_rect, fill=str(Color(box_color, alpha=0.5))) draw.text(_t_text_co, label_text, fill="black", font=font) Loading test/detect/test_visual.py +15 −0 Original line number Diff line number Diff line Loading @@ -21,3 +21,18 @@ class TestDetectVisual: visual.convert('RGB'), throw_exception=False ) < 1e-2 def test_detection_visualize_480(self, image_diff): image = get_testfile('genshin_post.jpg') visual = detection_visualize(image, [ ((202, 155, 356, 294), 'first', 0.878), ((938, 87, 1121, 262), 'second', 0.846), ((652, 440, 725, 514), 'third', 0.839), ((464, 250, 535, 326), 'fourth', 0.765) ], fontsize=24, max_short_edge_size=480) assert image_diff( load_image(get_testfile('genshin_post_face_visual_480.jpg'), mode='RGB'), visual.convert('RGB'), throw_exception=False ) < 1e-2 test/testfile/genshin_post_face_visual_480.jpg 0 → 100644 +91.5 KiB Loading image diff... Loading
imgutils/detect/visual.py +69 −25 Original line number Diff line number Diff line """ Overview: Visualize the detection results. This module provides functionality for visualizing object detection results on images. It includes tools for drawing bounding boxes, labels, and confidence scores on detected objects. The main function :func:`detection_visualize` can be used to visualize detection results from various object detection models, with customizable appearance settings like font size, padding, and label visibility. See :func:`imgutils.detect.head.detect_heads` and :func:`imgutils.detect.person.detect_person` for examples. """ import math from typing import List, Tuple, Optional from PIL import ImageFont, ImageDraw Loading @@ -13,6 +19,17 @@ from imgutils.data import ImageTyping, load_image def _try_get_font_from_matplotlib(fp=None, fontsize: int = 12): """ Attempt to get a font from matplotlib for text rendering. :param fp: Font properties object or None. If None, uses default sans-serif font. :type fp: matplotlib.font_manager.FontProperties or None :param fontsize: Size of the font to be used. :type fontsize: int :return: A PIL ImageFont object if matplotlib is available, None otherwise. :rtype: PIL.ImageFont.FreeTypeFont or None """ try: # noinspection PyPackageRequirements import matplotlib Loading @@ -27,49 +44,76 @@ def _try_get_font_from_matplotlib(fp=None, fontsize: int = 12): def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]], labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12, fp=None, no_label: bool = False): max_short_edge_size: Optional[int] = None, fp=None, no_label: bool = False): """ Overview: Visualize the results of the object detection. :param image: Image be detected. :param detection: The detection results list, each item includes the detected area `(x0, y0, x1, y1)`, the target type (always `head`) and the target confidence score. :param labels: An array of known labels. If not provided, the labels will be automatically detected from the given ``detection``. :param text_padding: Text padding of the labels. Default is ``6``. :param fontsize: Font size of the labels. At runtime, an attempt will be made to retrieve the font used for rendering from `matplotlib`. Therefore, if `matplotlib` is not installed, only the default pixel font provided with `Pillow` can be used, and the font size cannot be changed. :param no_label: Do not show labels. Default is ``False``. :return: A `PIL` image with the same size as the provided image `image`, which contains the original image content as well as the visualized bounding boxes. Visualize object detection results by drawing bounding boxes and labels on an image. :param image: Input image to visualize detections on. Can be a PIL Image, numpy array, or path to image file. :type image: ImageTyping :param detection: List of detection results, each containing ((x0, y0, x1, y1), label, confidence_score). Coordinates should be in pixels, not normalized. :type detection: List[Tuple[Tuple[float, float, float, float], str, float]] :param labels: List of predefined labels. If None, labels will be extracted from detection results. :type labels: Optional[List[str]] :param text_padding: Padding around label text in pixels. :type text_padding: int :param fontsize: Font size for label text. :type fontsize: int :param max_short_edge_size: Maximum size of shortest image edge. If specified, image will be resized while maintaining aspect ratio. :type max_short_edge_size: Optional[int] :param fp: Font properties for matplotlib font. Only used if matplotlib is available. :type fp: matplotlib.font_manager.FontProperties or None :param no_label: If True, suppresses drawing of labels. :type no_label: bool :return: PIL Image with visualized detection results. :rtype: PIL.Image.Image Examples:: >>> from imgutils.detect import detect_heads, detection_visualize >>> >>> image = load_image("path/to/image.jpg") >>> detections = detect_heads(image) >>> visualized = detection_visualize(image, detections) >>> visualized.save("output.png") See :func:`imgutils.detect.head.detect_heads` and :func:`imgutils.detect.person.detect_person` for examples. """ image = load_image(image, force_background=None, mode='RGBA') original_width, original_height = image.width, image.height if max_short_edge_size is not None and max_short_edge_size < min(original_height, original_width): r = max_short_edge_size / min(original_height, original_width) new_width = int(math.ceil(original_width * r)) new_height = int(math.ceil(original_height * r)) else: new_width, new_height = original_width, original_height visual_image = image.copy() if (new_width, new_height) != (original_width, original_height): visual_image = visual_image.resize((new_width, new_height)) draw = ImageDraw.Draw(visual_image, mode='RGBA') font = _try_get_font_from_matplotlib(fp, fontsize) or ImageFont.load_default() labels = sorted(labels or {label for _, label, _ in detection}) _colors = list(map(str, rnd_colors(len(labels)))) _color_map = dict(zip(labels, _colors)) for _, ((xmin, ymin, xmax, ymax), label, score) in sorted(enumerate(detection), key=lambda x: (x[1][2], x[0])): for _, ((x0, y0, x1, y1), label, score) in sorted(enumerate(detection), key=lambda x: (x[1][2], x[0])): x0, y0 = int(x0 * new_width / original_width), int(y0 * new_height / original_height) x1, y1 = int(x1 * new_width / original_width), int(y1 * new_height / original_height) box_color = _color_map[label] draw.rectangle((xmin, ymin, xmax, ymax), outline=box_color, width=2) draw.rectangle((x0, y0, x1, y1), outline=box_color, width=2) if not no_label: label_text = f'{label}: {score * 100:.2f}%' _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((xmin, ymin), label_text, font=font) _t_x0, _t_y0, _t_x1, _t_y1 = draw.textbbox((x0, y0), label_text, font=font) _t_width, _t_height = _t_x1 - _t_x0, _t_y1 - _t_y0 if ymin - _t_height - text_padding < 0: _t_text_rect = (xmin, ymin, xmin + _t_width + text_padding * 2, ymin + _t_height + text_padding * 2) _t_text_co = (xmin + text_padding, ymin + text_padding) if y0 - _t_height - text_padding < 0: _t_text_rect = (x0, y0, x0 + _t_width + text_padding * 2, y0 + _t_height + text_padding * 2) _t_text_co = (x0 + text_padding, y0 + text_padding) else: _t_text_rect = (xmin, ymin - _t_height - text_padding * 2, xmin + _t_width + text_padding * 2, ymin) _t_text_co = (xmin + text_padding, ymin - _t_height - text_padding) _t_text_rect = (x0, y0 - _t_height - text_padding * 2, x0 + _t_width + text_padding * 2, y0) _t_text_co = (x0 + text_padding, y0 - _t_height - text_padding) draw.rectangle(_t_text_rect, fill=str(Color(box_color, alpha=0.5))) draw.text(_t_text_co, label_text, fill="black", font=font) Loading
test/detect/test_visual.py +15 −0 Original line number Diff line number Diff line Loading @@ -21,3 +21,18 @@ class TestDetectVisual: visual.convert('RGB'), throw_exception=False ) < 1e-2 def test_detection_visualize_480(self, image_diff): image = get_testfile('genshin_post.jpg') visual = detection_visualize(image, [ ((202, 155, 356, 294), 'first', 0.878), ((938, 87, 1121, 262), 'second', 0.846), ((652, 440, 725, 514), 'third', 0.839), ((464, 250, 535, 326), 'fourth', 0.765) ], fontsize=24, max_short_edge_size=480) assert image_diff( load_image(get_testfile('genshin_post_face_visual_480.jpg'), mode='RGB'), visual.convert('RGB'), throw_exception=False ) < 1e-2