Unverified Commit c5957252 authored by narugo1992's avatar narugo1992 Committed by GitHub
Browse files

Merge pull request #56 from deepghs/dev/tagprune

dev(narugo): a better tag pruner
parents ecf979b6 37730ac1
Loading
Loading
Loading
Loading
+16 −2
Original line number Diff line number Diff line
@@ -6,10 +6,24 @@ imgutils.tagging.character
.. automodule:: imgutils.tagging.character


CHAR_WHITELIST
CHAR_WHITELIST_SUFFIX
------------------------------

.. autodata:: CHAR_WHITELIST
.. autodata:: CHAR_WHITELIST_SUFFIX



CHAR_WHITELIST_PREFIX
------------------------------

.. autodata:: CHAR_WHITELIST_PREFIX



CHAR_WHITELIST_WORD
------------------------------

.. autodata:: CHAR_WHITELIST_WORD



+130 −12
Original line number Diff line number Diff line
@@ -6,22 +6,127 @@ from typing import Union, List, Mapping, Tuple, Dict, Set, Optional

from .match import _split_to_words, _words_to_matcher

CHAR_WHITELIST = [
    'drill', 'pubic_hair', 'closed_eyes', 'half-closed_eyes', 'empty_eyes'
CHAR_WHITELIST_SUFFIX = [
    'anal_hair',
    'anal_tail',
    'arm_behind_head',
    'arm_hair',
    'arm_under_breasts',
    'arms_behind_head',
    'bird_on_head',
    'blood_in_hair',
    'breasts_on_glass',
    'breasts_on_head',
    'cat_on_head',
    'closed_eyes',
    'clothed_female_nude_female',
    'clothed_female_nude_male',
    'clothed_male_nude_female',
    'clothes_between_breasts',
    'cream_on_face',
    'drying_hair',
    'empty_eyes',
    'face_to_breasts',
    'facial',
    'food_on_face',
    'food_on_head',
    'game_boy',
    "grabbing_another's_hair",
    'grabbing_own_breast',
    'gun_to_head',
    'half-closed_eyes',
    'head_between_breasts',
    'heart_in_eye',
    'multiple_boys',
    'multiple_girls',
    'object_on_breast',
    'object_on_head',
    'paint_splatter_on_face',
    'parted_lips',
    'penis_on_face',
    'person_on_head',
    'pokemon_on_head',
    'pubic_hair',
    'rabbit_on_head',
    'rice_on_face',
    'severed_head',
    'star_in_eye',
    'sticker_on_face',
    'tentacles_on_male',
    'tying_hair'
]
CHAR_WHITELIST_PREFIX = [
    'holding', 'hand on', 'hands on', 'hand to', 'hands to',
    'hand in', 'hands in', 'hand over', 'hands over',
    'futa with', 'futa on', 'cum on', 'covering', 'adjusting', 'rubbing',
    'sitting', 'shading', 'playing', 'cutting',
]
CHAR_WHITELIST_WORD = [
    'drill',
]
CHAR_SUFFIXES = [
    'eyes', 'skin', 'hair', 'bun', 'bangs', 'cut', 'sidelocks',
    'twintails', 'braid', 'braids', 'afro', 'ahoge', 'drill',
    'drills', 'bald', 'dreadlocks', 'side up', 'ponytail', 'updo',
    'beard', 'mustache', 'pointy ears', 'ear', 'horn',
    'beard', 'mustache', 'pointy ears', 'ear', 'horn', 'tail', 'wing',
    'ornament', 'hairband', 'pupil', 'bow', 'eyewear', 'headwear',
    'ribbon', 'crown', 'cap', 'hat', 'hairclip', 'breast', 'mole',
    'halo', 'earrings', 'animal ear fluff', 'hair flower', 'glasses',
    'fang', 'female', 'girl', 'boy', 'male', 'beret', 'heterochromia',
    'headdress', 'headgear', 'eyepatch', 'headphones', 'eyebrows', 'eyelashes',
    'sunglasses', 'hair intakes', 'scrunchie', 'ear_piercing', 'head',
    'on face', 'on head', 'on hair', 'headband', 'hair rings', 'under_mouth',
    'freckles', 'lip', 'eyeliner', 'eyeshadow', 'tassel', 'over one eye',
    'drill', 'drill hair',
]
CHAR_PREFIXES = [
    'hair over', 'hair between'
    'hair over', 'hair between', 'facial',
]

_WordTupleTyping = Tuple[str, ...]


class _WordPool:
    """
    Helper class to manage  character tags.
    """

    def __init__(self, words: Optional[List[str]] = None):
        """
        Initialize a _WordPool instance.

        :param words: A list of words to include, defaults to None
        :type words: Optional[List[str]], optional
        """
        self._words: Dict[int, Set[_WordTupleTyping]] = {}
        for word in (words or []):
            self._append(word)

    def _append(self, text: str):
        """
        Append a word to the pool.

        :param text: The word to append
        :type text: str
        """
        for item in _words_to_matcher(_split_to_words(text)):
            if len(item) not in self._words:
                self._words[len(item)] = set()
            self._words[len(item)].add(item)

    def __contains__(self, text: str):
        """
        Check if a given text equals to any word from the pool.

        :param text: The text to check
        :type text: str
        :return: True if the text equals to a word, False otherwise
        :rtype: bool
        """
        words = tuple(_split_to_words(text))
        return len(words) in self._words and words in self._words[len(words)]


class _SuffixPool:
    """
    Helper class to manage suffixes for character tags.
@@ -125,23 +230,36 @@ class CharacterTagPool:
    A pool of character-related tags for detection and removal of basic character tags.
    """

    def __init__(self, whitelist: Optional[List[str]] = None,
    def __init__(
            self,
            whitelist_suffixes: Optional[List[str]] = None,
            whitelist_prefixes: Optional[List[str]] = None,
            whitelist_words: Optional[List[str]] = None,
            suffixes: Optional[List[str]] = None,
                 prefixes: Optional[List[str]] = None):
            prefixes: Optional[List[str]] = None
    ):
        """
        Initialize a CharacterTagPool instance.

        :param whitelist: A list of whitelisted tags, defaults to None
        :type whitelist: Optional[List[str]], optional
        :param whitelist_suffixes: A list of whitelisted suffixes, defaults to None
        :type whitelist_suffixes: Optional[List[str]], optional
        :param suffixes: A list of suffixes to consider, defaults to None
        :type suffixes: Optional[List[str]], optional
        :param prefixes: A list of prefixes to consider, defaults to None
        :type prefixes: Optional[List[str]], optional
        """
        self._whitelist = _SuffixPool(whitelist or CHAR_WHITELIST)
        self._whitelist_suffix = _SuffixPool(whitelist_suffixes or CHAR_WHITELIST_SUFFIX)
        self._whitelist_prefix = _PrefixPool(whitelist_prefixes or CHAR_WHITELIST_PREFIX)
        self._whitelist_words = _WordPool(whitelist_words or CHAR_WHITELIST_WORD)
        self._suffixes = _SuffixPool(suffixes or CHAR_SUFFIXES)
        self._prefixes = _PrefixPool(prefixes or CHAR_PREFIXES)

    def _is_in_whitelist(self, tag: str) -> bool:
        return (tag in self._whitelist_words) or (tag in self._whitelist_suffix) or (tag in self._whitelist_prefix)

    def _is_in_common(self, tag: str) -> bool:
        return (tag in self._suffixes) or (tag in self._prefixes)

    def is_basic_character_tag(self, tag: str) -> bool:
        """
        Check if a given tag is a basic character tag.
@@ -151,10 +269,10 @@ class CharacterTagPool:
        :return: True if the tag is a basic character tag, False otherwise
        :rtype: bool
        """
        if tag in self._whitelist:
        if self._is_in_whitelist(tag):
            return False
        else:
            return (tag in self._suffixes) or (tag in self._prefixes)
            return self._is_in_common(tag)

    def drop_basic_character_tags(self, tags: Union[List[str], Mapping[str, float]]) \
            -> Union[List[str], Mapping[str, float]]:
+3 −3
Original line number Diff line number Diff line
@@ -25,9 +25,9 @@ class TestTaggingCharacter:

    def test_drop_basic_character_tags(self, complex_dict_tags, complex_list_tags):
        assert drop_basic_character_tags(complex_dict_tags) == pytest.approx({
            '1girl': 0.998362123966217, 'solo': 0.9912548065185547, 'breasts': 0.983635425567627,
            '1girl': 0.998362123966217, 'solo': 0.9912548065185547,
            'looking_at_viewer': 0.9146994352340698, 'blush': 0.8892400860786438, 'smile': 0.43393653631210327,
            'large_breasts': 0.5196534395217896, 'navel': 0.9653235077857971, 'closed_mouth': 0.9369247555732727,
            'navel': 0.9653235077857971, 'closed_mouth': 0.9369247555732727,
            'nipples': 0.9660118222236633, 'collarbone': 0.588348925113678, 'nude': 0.9496222734451294,
            'sweat': 0.8690457344055176, 'pussy': 0.9868264198303223, 'spread_legs': 0.9603149890899658,
            'armpits': 0.9024748802185059, 'stomach': 0.6723923087120056, 'arms_up': 0.9380699396133423,
@@ -37,7 +37,7 @@ class TestTaggingCharacter:
            'clitoris': 0.5310801267623901
        }, abs=1e-3)
        assert drop_basic_character_tags(complex_list_tags) == [
            '1girl', 'solo', 'breasts', 'looking_at_viewer', 'blush', 'smile', 'large_breasts', 'navel', 'closed_mouth',
            '1girl', 'solo', 'looking_at_viewer', 'blush', 'smile', 'navel', 'closed_mouth',
            'nipples', 'collarbone', 'nude', 'sweat', 'pussy', 'spread_legs', 'armpits', 'stomach', 'arms_up',
            'completely_nude', 'uncensored', 'pussy_juice', 'feet_out_of_frame', 'on_bed', 'arms_behind_head',
            'breasts_apart', 'clitoris'