Loading .github/workflows/export.yml +3 −2 Original line number Diff line number Diff line Loading @@ -18,8 +18,9 @@ jobs: model-name: # - 'lpips' # - 'monochrome' - 'person_detect' - 'face_detect' # - 'person_detect' # - 'face_detect' - 'manbits_detect' steps: - name: Checkout code Loading imgutils/detect/_yolo.py +12 −11 Original line number Diff line number Diff line Loading @@ -86,24 +86,25 @@ def _xy_postprocess(x, y, old_size, new_size): return x, y def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, label): output = output[:, output[-1, :] > conf_threshold] boxes = output[:4, :].transpose(1, 0) scores = output[4, :] records = sorted(zip(boxes, scores), key=lambda x: -x[1]) if not records: def _data_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, labels: List[str]): max_scores = output[4:, :].max(axis=0) output = output[:, max_scores > conf_threshold].transpose(1, 0) boxes = output[:, :4] scores = output[:, 4:] filtered_max_scores = scores.max(axis=1) if not boxes.size: return [] boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records])) scores = np.stack([score for _, score in records]) idx = _yolo_nms(boxes, scores, thresh=iou_threshold) boxes = _yolo_xywh2xyxy(boxes) idx = _yolo_nms(boxes, filtered_max_scores, thresh=iou_threshold) boxes, scores = boxes[idx], scores[idx] detections = [] for box, score in zip(boxes, scores): x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size) x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size) detections.append(((x0, y0, x1, y1), label, float(score))) max_score_id = score.argmax() detections.append(((x0, y0, x1, y1), labels[max_score_id], float(score[max_score_id]))) return detections imgutils/detect/face.py +2 −2 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from typing import List, Tuple from huggingface_hub import hf_hub_download from ._yolo import _image_preprocess, _data_simple_postprocess from ._yolo import _image_preprocess, _data_postprocess from ..data import ImageTyping, load_image, rgb_encode from ..utils import open_onnx_model Loading Loading @@ -67,4 +67,4 @@ def detect_faces(image: ImageTyping, level: str = 's', max_infer_size=1216, data = rgb_encode(new_image)[None, ...] output, = _open_face_detect_model(level).run(['output0'], {'images': data}) return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, 'head') return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, ['head']) imgutils/detect/person.py +2 −2 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from functools import lru_cache from huggingface_hub import hf_hub_download from ._yolo import _image_preprocess, _data_simple_postprocess from ._yolo import _image_preprocess, _data_postprocess from ..data import ImageTyping, load_image, rgb_encode from ..utils import open_onnx_model Loading Loading @@ -67,4 +67,4 @@ def detect_person(image: ImageTyping, level: str = 's', max_infer_size=1216, data = rgb_encode(new_image)[None, ...] output, = _open_person_detect_model(level).run(['output0'], {'images': data}) return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, 'person') return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, ['person']) imgutils/detect/visual.py +6 −3 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ Overview: See :func:`imgutils.detect.face.detect_faces` and :func:`imgutils.detect.person.detect_person` for examples. """ from typing import List, Tuple from typing import List, Tuple, Optional from PIL import ImageFont, ImageDraw from hbutils.color import rnd_colors, Color Loading @@ -26,7 +26,8 @@ def _try_get_font_from_matplotlib(fontsize: int = 12): def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]], text_padding: int = 6, fontsize: int = 12, no_label: bool = False): labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12, no_label: bool = False): """ Overview: Visualize the results of the object detection. Loading @@ -34,6 +35,8 @@ def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, f :param image: Image be detected. :param detection: The detection results list, each item includes the detected area `(x0, y0, x1, y1)`, the target type (always `head`) and the target confidence score. :param labels: An array of known labels. If not provided, the labels will be automatically detected from the given ``detection``. :param text_padding: Text padding of the labels. Default is ``6``. :param fontsize: Font size of the labels. At runtime, an attempt will be made to retrieve the font used for rendering from `matplotlib`. Therefore, if `matplotlib` is not installed, only the default pixel font Loading @@ -50,7 +53,7 @@ def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, f draw = ImageDraw.Draw(visual_image, mode='RGBA') font = _try_get_font_from_matplotlib(fontsize) or ImageFont.load_default() labels = sorted({label for _, label, _ in detection}) labels = sorted(labels or {label for _, label, _ in detection}) _colors = list(map(str, rnd_colors(len(labels)))) _color_map = dict(zip(labels, _colors)) for (xmin, ymin, xmax, ymax), label, score in detection: Loading Loading
.github/workflows/export.yml +3 −2 Original line number Diff line number Diff line Loading @@ -18,8 +18,9 @@ jobs: model-name: # - 'lpips' # - 'monochrome' - 'person_detect' - 'face_detect' # - 'person_detect' # - 'face_detect' - 'manbits_detect' steps: - name: Checkout code Loading
imgutils/detect/_yolo.py +12 −11 Original line number Diff line number Diff line Loading @@ -86,24 +86,25 @@ def _xy_postprocess(x, y, old_size, new_size): return x, y def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, label): output = output[:, output[-1, :] > conf_threshold] boxes = output[:4, :].transpose(1, 0) scores = output[4, :] records = sorted(zip(boxes, scores), key=lambda x: -x[1]) if not records: def _data_postprocess(output, conf_threshold, iou_threshold, old_size, new_size, labels: List[str]): max_scores = output[4:, :].max(axis=0) output = output[:, max_scores > conf_threshold].transpose(1, 0) boxes = output[:, :4] scores = output[:, 4:] filtered_max_scores = scores.max(axis=1) if not boxes.size: return [] boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records])) scores = np.stack([score for _, score in records]) idx = _yolo_nms(boxes, scores, thresh=iou_threshold) boxes = _yolo_xywh2xyxy(boxes) idx = _yolo_nms(boxes, filtered_max_scores, thresh=iou_threshold) boxes, scores = boxes[idx], scores[idx] detections = [] for box, score in zip(boxes, scores): x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size) x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size) detections.append(((x0, y0, x1, y1), label, float(score))) max_score_id = score.argmax() detections.append(((x0, y0, x1, y1), labels[max_score_id], float(score[max_score_id]))) return detections
imgutils/detect/face.py +2 −2 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from typing import List, Tuple from huggingface_hub import hf_hub_download from ._yolo import _image_preprocess, _data_simple_postprocess from ._yolo import _image_preprocess, _data_postprocess from ..data import ImageTyping, load_image, rgb_encode from ..utils import open_onnx_model Loading Loading @@ -67,4 +67,4 @@ def detect_faces(image: ImageTyping, level: str = 's', max_infer_size=1216, data = rgb_encode(new_image)[None, ...] output, = _open_face_detect_model(level).run(['output0'], {'images': data}) return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, 'head') return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, ['head'])
imgutils/detect/person.py +2 −2 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ from functools import lru_cache from huggingface_hub import hf_hub_download from ._yolo import _image_preprocess, _data_simple_postprocess from ._yolo import _image_preprocess, _data_postprocess from ..data import ImageTyping, load_image, rgb_encode from ..utils import open_onnx_model Loading Loading @@ -67,4 +67,4 @@ def detect_person(image: ImageTyping, level: str = 's', max_infer_size=1216, data = rgb_encode(new_image)[None, ...] output, = _open_person_detect_model(level).run(['output0'], {'images': data}) return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, 'person') return _data_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size, ['person'])
imgutils/detect/visual.py +6 −3 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ Overview: See :func:`imgutils.detect.face.detect_faces` and :func:`imgutils.detect.person.detect_person` for examples. """ from typing import List, Tuple from typing import List, Tuple, Optional from PIL import ImageFont, ImageDraw from hbutils.color import rnd_colors, Color Loading @@ -26,7 +26,8 @@ def _try_get_font_from_matplotlib(fontsize: int = 12): def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]], text_padding: int = 6, fontsize: int = 12, no_label: bool = False): labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12, no_label: bool = False): """ Overview: Visualize the results of the object detection. Loading @@ -34,6 +35,8 @@ def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, f :param image: Image be detected. :param detection: The detection results list, each item includes the detected area `(x0, y0, x1, y1)`, the target type (always `head`) and the target confidence score. :param labels: An array of known labels. If not provided, the labels will be automatically detected from the given ``detection``. :param text_padding: Text padding of the labels. Default is ``6``. :param fontsize: Font size of the labels. At runtime, an attempt will be made to retrieve the font used for rendering from `matplotlib`. Therefore, if `matplotlib` is not installed, only the default pixel font Loading @@ -50,7 +53,7 @@ def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, f draw = ImageDraw.Draw(visual_image, mode='RGBA') font = _try_get_font_from_matplotlib(fontsize) or ImageFont.load_default() labels = sorted({label for _, label, _ in detection}) labels = sorted(labels or {label for _, label, _ in detection}) _colors = list(map(str, rnd_colors(len(labels)))) _color_map = dict(zip(labels, _colors)) for (xmin, ymin, xmax, ymax), label, score in detection: Loading