dev(narugo): prepare mldanbooru model (63711ece) · Commits · git-mirror / Imgutils

docs/source/api_doc/tagging/index.rst

+2 −1

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ imgutils.tagging
		.. toctree::
		:maxdepth: 3

		mldanbooru
		wd14
		deepdanbooru
		format
		wd14

docs/source/api_doc/tagging/mldanbooru.rst

0 → 100644

+14 −0

Original line number	Diff line number	Diff line
		imgutils.tagging.mldanbooru
		====================================

		.. currentmodule:: imgutils.tagging.mldanbooru

		.. automodule:: imgutils.tagging.mldanbooru


		get_mldanbooru_tags
		------------------------

		.. autofunction:: get_mldanbooru_tags

imgutils/tagging/init.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -4,4 +4,5 @@ Overview:
		"""
		from .deepdanbooru import get_deepdanbooru_tags
		from .format import tags_to_text
		from .mldanbooru import get_mldanbooru_tags
		from .wd14 import get_wd14_tags

imgutils/tagging/mldanbooru.py

0 → 100644

+82 −0

Original line number	Diff line number	Diff line
		from functools import lru_cache
		from typing import Tuple, List

		import numpy as np
		import pandas as pd
		from PIL import Image
		from huggingface_hub import hf_hub_download

		from ..data import load_image, ImageTyping
		from ..utils import open_onnx_model


		@lru_cache()
		def _open_mldanbooru_model():
		return open_onnx_model(hf_hub_download('deepghs/ml-danbooru-onnx', 'ml_caformer_m36_dec-5-97527.onnx'))


		def _resize_align(image: Image.Image, size: int, keep_ratio: float = True, align: int = 4) -> Image.Image:
		if not keep_ratio:
		target_size = (size, size)
		else:
		min_edge = min(image.size)
		target_size = (
		int(image.size[0] / min_edge * size),
		int(image.size[1] / min_edge * size),
		)

		target_size = (
		(target_size[0] // align) * align,
		(target_size[1] // align) * align,
		)

		return image.resize(target_size, resample=Image.BILINEAR)


		def _to_tensor(image: Image.Image):
		# noinspection PyTypeChecker
		img: np.ndarray = np.array(image, dtype=np.uint8, copy=True)
		img = img.reshape((image.size[1], image.size[0], len(image.getbands())))

		# put it from HWC to CHW format
		img = img.transpose((2, 0, 1))
		return img.astype(np.float32) / 255


		@lru_cache()
		def _get_mldanbooru_labels(use_real_name: bool = False) -> Tuple[List[str], List[int], List[int]]:
		path = hf_hub_download('deepghs/imgutils-models', 'mldanbooru/mldanbooru_tags.csv')
		df = pd.read_csv(path)

		return df["name"].tolist() if not use_real_name else df['real_name'].tolist()


		def get_mldanbooru_tags(image: ImageTyping, use_real_name: bool = False,
		threshold: float = 0.7, size: int = 448, keep_ratio: bool = False):
		"""
		Example:
		Here are some images for example

		.. image:: tagging_demo.dat.svg
		:align: center

		>>> import os
		>>> from imgutils.tagging import get_mldanbooru_tags
		>>>
		>>> get_mldanbooru_tags('skadi.jpg')
		{'1girl': 0.9999984502792358, 'long_hair': 0.9999946355819702, 'red_eyes': 0.9994951486587524, 'navel': 0.998144268989563, 'breasts': 0.9978417158126831, 'solo': 0.9941409230232239, 'shorts': 0.9799384474754333, 'gloves': 0.979142427444458, 'very_long_hair': 0.961823582649231, 'looking_at_viewer': 0.961323618888855, 'silver_hair': 0.9490893483161926, 'large_breasts': 0.9450850486755371, 'midriff': 0.9425153136253357, 'sweat': 0.9409335255622864, 'thighs': 0.9319437146186829, 'crop_top': 0.9265308976173401, 'baseball_bat': 0.9259042143821716, 'sky': 0.922250509262085, 'holding': 0.9199565052986145, 'outdoors': 0.9175475835800171, 'day': 0.9102761745452881, 'black_gloves': 0.9076938629150391, 'stomach': 0.9052775502204895, 'shirt': 0.8938589692115784, 'cowboy_shot': 0.8894285559654236, 'bangs': 0.8891903162002563, 'blue_sky': 0.8845980763435364, 'parted_lips': 0.8842408061027527, 'hair_between_eyes': 0.8659475445747375, 'sportswear': 0.862621009349823, 'no_headwear': 0.8616052865982056, 'cloud': 0.8562789559364319, 'short_shorts': 0.8555729389190674, 'no_hat': 0.8533340096473694, 'black_shorts': 0.8477485775947571, 'short_sleeves': 0.8430152535438538, 'low-tied_long_hair': 0.8340626955032349, 'crop_top_overhang': 0.8266023397445679, 'holding_baseball_bat': 0.8222048282623291, 'standing': 0.8202669620513916, 'black_shirt': 0.8061150312423706, 'ass_visible_through_thighs': 0.7803354859352112, 'thigh_gap': 0.7789446711540222, 'arms_up': 0.7052110433578491}
		>>>
		>>> get_mldanbooru_tags('hutao.jpg')
		{'1girl': 0.9999866485595703, 'skirt': 0.997043788433075, 'tongue': 0.9969649910926819, 'hair_ornament': 0.9957101345062256, 'tongue_out': 0.9928386807441711, 'flower': 0.9886980056762695, 'twintails': 0.9864778518676758, 'ghost': 0.9769423007965088, 'hair_flower': 0.9747489094734192, 'bag': 0.9736957550048828, 'long_hair': 0.9388670325279236, 'backpack': 0.9356311559677124, 'brown_hair': 0.91000896692276, 'cardigan': 0.8955123424530029, 'red_eyes': 0.8910233378410339, 'plaid': 0.8904104828834534, 'looking_at_viewer': 0.8881211280822754, 'school_uniform': 0.8876776695251465, 'outdoors': 0.8864808678627014, 'jacket': 0.8810517191886902, 'plaid_skirt': 0.8798807263374329, 'ahoge': 0.8765745162963867, 'pleated_skirt': 0.8737136125564575, 'nail_polish': 0.8650439381599426, 'solo': 0.8613706827163696, 'blue_cardigan': 0.8571277260780334, 'bangs': 0.8333670496940613, 'very_long_hair': 0.8160212635993958, 'eyebrows_visible_through_hair': 0.8122442364692688, 'hairclip': 0.8091571927070618, 'red_nails': 0.8082079887390137, ':p': 0.8048468232154846, 'long_sleeves': 0.8042327165603638, 'shirt': 0.7984272241592407, 'blazer': 0.794708251953125, 'ribbon': 0.78981614112854, 'hair_ribbon': 0.7892146110534668, 'star-shaped_pupils': 0.7867060899734497, 'gradient_hair': 0.786359965801239, 'white_shirt': 0.7790888547897339, 'brown_skirt': 0.7760675549507141, 'symbol-shaped_pupils': 0.774523913860321, 'smile': 0.7721588015556335, 'hair_between_eyes': 0.7697228789329529, 'cowboy_shot': 0.755959689617157, 'multicolored_hair': 0.7477189898490906, 'blush': 0.7476690411567688, 'railing': 0.7476617693901062, 'blue_jacket': 0.7458406090736389, 'sleeves_past_wrists': 0.741143524646759, 'day': 0.7364678978919983, 'collared_shirt': 0.7193643450737, 'red_neckwear': 0.7108616828918457, 'flower-shaped_pupils': 0.7086325287818909, 'miniskirt': 0.7055293321609497, 'holding': 0.7039415836334229, 'open_clothes': 0.7018357515335083}
		"""
		image = load_image(image, mode='RGB')
		real_input = _to_tensor(_resize_align(image, size, keep_ratio))
		real_input = real_input.reshape(1, *real_input.shape)

		model = _open_mldanbooru_model()
		native_output, = model.run(['output'], {'input': real_input})

		output = (1 / (1 + np.exp(-native_output))).reshape(-1)
		tags = _get_mldanbooru_labels(use_real_name)
		pairs = sorted([(tags[i], ratio) for i, ratio in enumerate(output)], key=lambda x: (-x[1], x[0]))
		return {tag: float(ratio) for tag, ratio in pairs if ratio >= threshold}

zoo/monochrome/encode.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@ def image_encode(image: ImageTyping, bins: int = 200, mf: Optional[int] = 5,
		if image.width * image.height > maxpixels:
		r = (image.width * image.height / maxpixels) ** 0.5
		new_width, new_height = map(lambda x: int(round(x / r)), image.size)
		image = image.resize((new_width, new_height))
		image = image._resize_align((new_width, new_height))

		if mf is not None:
		image = image.filter(ImageFilter.MedianFilter(mf))