Commit db34832a authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): save the client code

parent 37aef137
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
from .face import detect_faces
from .person import detect_person
+106 −0
Original line number Diff line number Diff line
import math
from typing import List

import numpy as np
from PIL import Image


def _yolo_xywh2xyxy(x: np.ndarray) -> np.ndarray:
    """
    Copied from yolov8.

    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner.

    Args:
        x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
    Returns:
        y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
    """
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y


def _yolo_nms(boxes, scores, thresh: float = 0.7) -> List[int]:
    """
    dets: ndarray, (num_boxes, 5)
        每一行表示一个bounding box:[xmin, ymin, xmax, ymax, score]
        其中xmin, ymin, xmax, ymax分别表示框的左上角和右下角坐标,score表示框的分数
    thresh: float
        两个框的IoU阈值
    """
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    # 按照score降序排列
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        # 计算其他所有框与当前框的IoU
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)

        inter = w * h
        iou = inter / (areas[i] + areas[order[1:]] - inter)

        # 保留IoU小于阈值的框
        inds = np.where(iou <= thresh)[0]
        order = order[inds + 1]

    return keep


def _image_preprocess(image: Image.Image, max_infer_size: int = 1216, align: int = 32):
    old_width, old_height = image.width, image.height
    new_width, new_height = old_width, old_height
    r = max_infer_size / max(new_width, new_height)
    if r < 1:
        new_width, new_height = new_width * r, new_height * r
    new_width = int(math.ceil(new_width / align) * align)
    new_height = int(math.ceil(new_height / align) * align)
    image = image.resize((new_width, new_height))
    return image, (old_width, old_height), (new_width, new_height)


def _xy_postprocess(x, y, old_size, new_size):
    old_width, old_height = old_size
    new_width, new_height = new_size
    x, y = x / new_width * old_width, y / new_height * old_height
    x = int(np.clip(x, a_min=0, a_max=old_width).round())
    y = int(np.clip(y, a_min=0, a_max=old_height).round())
    return x, y


def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size):
    output = output[:, output[-1, :] > conf_threshold]
    boxes = output[:4, :].transpose(1, 0)
    scores = output[4, :]
    records = sorted(zip(boxes, scores), key=lambda x: -x[1])

    boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records]))
    scores = np.stack([score for _, score in records])
    idx = _yolo_nms(boxes, scores, thresh=iou_threshold)
    boxes, scores = boxes[idx], scores[idx]

    detections = []
    for box, score in zip(boxes, scores):
        x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
        x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
        detections.append(((x0, y0, x1, y1), float(score)))

    return detections
+25 −0
Original line number Diff line number Diff line
from functools import lru_cache

from huggingface_hub import hf_hub_download

from ._yolo import _image_preprocess, _data_simple_postprocess
from ..data import ImageTyping, load_image, rgb_encode
from ..utils import open_onnx_model


@lru_cache()
def _open_face_detect_model(level: str = 's'):
    return open_onnx_model(hf_hub_download(
        'deepghs/imgutils-models',
        f'face_detect/face_detect_best_{level}.onnx'
    ))


def detect_faces(image: ImageTyping, level: str = 's', max_infer_size=1216,
                 conf_threshold: float = 0.25, iou_threshold: float = 0.7):
    image = load_image(image, mode='RGB')
    new_image, old_size, new_size = _image_preprocess(image, max_infer_size)

    data = rgb_encode(new_image)[None, ...]
    output, = _open_face_detect_model(level).run(['output0'], {'images': data})
    return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)
+25 −0
Original line number Diff line number Diff line
from functools import lru_cache

from huggingface_hub import hf_hub_download

from ._yolo import _image_preprocess, _data_simple_postprocess
from ..data import ImageTyping, load_image, rgb_encode
from ..utils import open_onnx_model


@lru_cache()
def _open_person_detect_model(level: str = 's'):
    return open_onnx_model(hf_hub_download(
        'deepghs/imgutils-models',
        f'person_detect/person_detect_best_{level}.onnx'
    ))


def detect_person(image: ImageTyping, level: str = 's', max_infer_size=1216,
                  conf_threshold: float = 0.25, iou_threshold: float = 0.7):
    image = load_image(image, mode='RGB')
    new_image, old_size, new_size = _image_preprocess(image, max_infer_size)

    data = rgb_encode(new_image)[None, ...]
    output, = _open_person_detect_model(level).run(['output0'], {'images': data})
    return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)