dev(narugo): save the client code (db34832a) · Commits · git-mirror / Imgutils

imgutils/detect/init.py

0 → 100644

+2 −0

Original line number	Diff line number	Diff line
		from .face import detect_faces
		from .person import detect_person

imgutils/detect/_yolo.py

0 → 100644

+106 −0

Original line number	Diff line number	Diff line
		import math
		from typing import List

		import numpy as np
		from PIL import Image


		def _yolo_xywh2xyxy(x: np.ndarray) -> np.ndarray:
		"""
		Copied from yolov8.

		Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
		top-left corner and (x2, y2) is the bottom-right corner.

		Args:
		x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
		Returns:
		y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
		"""
		y = np.copy(x)
		y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
		y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
		y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
		y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
		return y


		def _yolo_nms(boxes, scores, thresh: float = 0.7) -> List[int]:
		"""
		dets: ndarray, (num_boxes, 5)
		每一行表示一个bounding box：[xmin, ymin, xmax, ymax, score]
		其中xmin, ymin, xmax, ymax分别表示框的左上角和右下角坐标，score表示框的分数
		thresh: float
		两个框的IoU阈值
		"""
		x1 = boxes[:, 0]
		y1 = boxes[:, 1]
		x2 = boxes[:, 2]
		y2 = boxes[:, 3]
		areas = (x2 - x1 + 1) * (y2 - y1 + 1)

		# 按照score降序排列
		order = scores.argsort()[::-1]

		keep = []
		while order.size > 0:
		i = order[0]
		keep.append(i)
		# 计算其他所有框与当前框的IoU
		xx1 = np.maximum(x1[i], x1[order[1:]])
		yy1 = np.maximum(y1[i], y1[order[1:]])
		xx2 = np.minimum(x2[i], x2[order[1:]])
		yy2 = np.minimum(y2[i], y2[order[1:]])

		w = np.maximum(0.0, xx2 - xx1 + 1)
		h = np.maximum(0.0, yy2 - yy1 + 1)

		inter = w * h
		iou = inter / (areas[i] + areas[order[1:]] - inter)

		# 保留IoU小于阈值的框
		inds = np.where(iou <= thresh)[0]
		order = order[inds + 1]

		return keep


		def _image_preprocess(image: Image.Image, max_infer_size: int = 1216, align: int = 32):
		old_width, old_height = image.width, image.height
		new_width, new_height = old_width, old_height
		r = max_infer_size / max(new_width, new_height)
		if r < 1:
		new_width, new_height = new_width * r, new_height * r
		new_width = int(math.ceil(new_width / align) * align)
		new_height = int(math.ceil(new_height / align) * align)
		image = image.resize((new_width, new_height))
		return image, (old_width, old_height), (new_width, new_height)


		def _xy_postprocess(x, y, old_size, new_size):
		old_width, old_height = old_size
		new_width, new_height = new_size
		x, y = x / new_width * old_width, y / new_height * old_height
		x = int(np.clip(x, a_min=0, a_max=old_width).round())
		y = int(np.clip(y, a_min=0, a_max=old_height).round())
		return x, y


		def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size):
		output = output[:, output[-1, :] > conf_threshold]
		boxes = output[:4, :].transpose(1, 0)
		scores = output[4, :]
		records = sorted(zip(boxes, scores), key=lambda x: -x[1])

		boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records]))
		scores = np.stack([score for _, score in records])
		idx = _yolo_nms(boxes, scores, thresh=iou_threshold)
		boxes, scores = boxes[idx], scores[idx]

		detections = []
		for box, score in zip(boxes, scores):
		x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
		x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
		detections.append(((x0, y0, x1, y1), float(score)))

		return detections

imgutils/detect/face.py

0 → 100644

+25 −0

Original line number	Diff line number	Diff line
		from functools import lru_cache

		from huggingface_hub import hf_hub_download

		from ._yolo import _image_preprocess, _data_simple_postprocess
		from ..data import ImageTyping, load_image, rgb_encode
		from ..utils import open_onnx_model


		@lru_cache()
		def _open_face_detect_model(level: str = 's'):
		return open_onnx_model(hf_hub_download(
		'deepghs/imgutils-models',
		f'face_detect/face_detect_best_{level}.onnx'
		))


		def detect_faces(image: ImageTyping, level: str = 's', max_infer_size=1216,
		conf_threshold: float = 0.25, iou_threshold: float = 0.7):
		image = load_image(image, mode='RGB')
		new_image, old_size, new_size = _image_preprocess(image, max_infer_size)

		data = rgb_encode(new_image)[None, ...]
		output, = _open_face_detect_model(level).run(['output0'], {'images': data})
		return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)

imgutils/detect/person.py

0 → 100644

+25 −0

Original line number	Diff line number	Diff line
		from functools import lru_cache

		from huggingface_hub import hf_hub_download

		from ._yolo import _image_preprocess, _data_simple_postprocess
		from ..data import ImageTyping, load_image, rgb_encode
		from ..utils import open_onnx_model


		@lru_cache()
		def _open_person_detect_model(level: str = 's'):
		return open_onnx_model(hf_hub_download(
		'deepghs/imgutils-models',
		f'person_detect/person_detect_best_{level}.onnx'
		))


		def detect_person(image: ImageTyping, level: str = 's', max_infer_size=1216,
		conf_threshold: float = 0.25, iou_threshold: float = 0.7):
		image = load_image(image, mode='RGB')
		new_image, old_size, new_size = _image_preprocess(image, max_infer_size)

		data = rgb_encode(new_image)[None, ...]
		output, = _open_person_detect_model(level).run(['output0'], {'images': data})
		return _data_simple_postprocess(output[0], conf_threshold, iou_threshold, old_size, new_size)