Commit 83265853 authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): add docs

parent f2697942
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -14,3 +14,7 @@ imgutils.tagging
    deepdanbooru
    format
    overlap
    blacklist
    character
    order
+70 −0
Original line number Diff line number Diff line
"""
Overview:
    Detect and drop some blacklisted tags, which are listed `here <https://huggingface.co/datasets/alea31415/tag_filtering/blob/main/blacklist_tags.txt>`_.
"""
from functools import lru_cache
from typing import Union, List, Mapping, Set, Optional

@@ -6,6 +10,12 @@ from huggingface_hub import hf_hub_download

@lru_cache()
def _load_online_blacklist() -> List[str]:
    """
    Load the online blacklist tags from the specified dataset repository.

    :return: List of blacklisted tags.
    :rtype: List[str]
    """
    with open(hf_hub_download(
            'alea31415/tag_filtering',
            'blacklist_tags.txt',
@@ -15,6 +25,16 @@ def _load_online_blacklist() -> List[str]:


def _is_blacklisted(tag: str, blacklist: Set[str]):
    """
    Check if a tag is blacklisted.

    :param tag: Tag to be checked.
    :type tag: str
    :param blacklist: Set of blacklisted tags.
    :type blacklist: Set[str]
    :return: True if the tag is blacklisted, False otherwise.
    :rtype: bool
    """
    return (tag in blacklist or
            tag.replace('_', ' ') in blacklist or
            tag.replace(' ', '_') in blacklist)
@@ -22,16 +42,66 @@ def _is_blacklisted(tag: str, blacklist: Set[str]):

@lru_cache()
def _online_blacklist_set() -> Set[str]:
    """
    Get the online blacklist as a set.

    :return: Set of blacklisted tags.
    :rtype: Set[str]
    """
    return set(_load_online_blacklist())


def is_blacklisted(tags: str):
    """
    Check if any of the given tags are blacklisted.

    :param tags: Tags to be checked.
    :type tags: str
    :return: True if any tag is blacklisted, False otherwise.
    :rtype: bool

    Examples::
        >>> from imgutils.tagging import is_blacklisted
        >>>
        >>> is_blacklisted('cosplay')
        True
        >>> is_blacklisted('no_eyewear')
        True
        >>> is_blacklisted('no eyewear')  # span does not matter
        True
        >>> is_blacklisted('red_hair')
        False
    """
    return _is_blacklisted(tags, _online_blacklist_set())


def drop_blacklisted_tags(tags: Union[List[str], Mapping[str, float]],
                          use_presets: bool = True, custom_blacklist: Optional[List[str]] = None) \
        -> Union[List[str], Mapping[str, float]]:
    """
    Drop blacklisted tags from the given list or mapping of tags.

    :param tags: List or mapping of tags to be filtered.
    :type tags: Union[List[str], Mapping[str, float]]
    :param use_presets: Whether to use the online blacklist presets, defaults to True.
    :type use_presets: bool, optional
    :param custom_blacklist: Custom blacklist to be used, defaults to None.
    :type custom_blacklist: Optional[List[str]], optional
    :return: Filtered list or mapping of tags without the blacklisted ones.
    :rtype: Union[List[str], Mapping[str, float]]
    :raises TypeError: If the input tags are neither a list nor a dictionary.

    Examples::
        >>> from imgutils.tagging import drop_blacklisted_tags
        >>>
        >>> drop_blacklisted_tags({
        ...     'solo': 1.0, '1girl': 0.95,
        ...     'cosplay': 0.7, 'no_eyewear': 0.6,
        ... })
        {'solo': 1.0, '1girl': 0.95}
        >>> drop_blacklisted_tags(['solo', '1girl', 'cosplay', 'no_eyewear'])
        ['solo', '1girl']
    """
    blacklist = []
    if use_presets:
        blacklist.extend(_load_online_blacklist())
+91 −0
Original line number Diff line number Diff line
"""
Overview:
    Detect and drop character-related basic tags.
"""
import re
from typing import Union, List, Mapping

@@ -18,10 +22,28 @@ _CHAR_PREFIXES = [


def _split_to_words(text: str) -> List[str]:
    """
    Split a string into words and return them in lowercase.

    :param text: The input text to split.
    :type text: str
    :return: List of lowercase words.
    :rtype: List[str]
    """
    return [word.lower() for word in re.split(r'[\W_]+', text) if word]


def _match_suffix(tag: str, suffix: str):
    """
    Check if a tag matches a given suffix.

    :param tag: The tag to check.
    :type tag: str
    :param suffix: The suffix to match.
    :type suffix: str
    :return: True if the tag matches the suffix, False otherwise.
    :rtype: bool
    """
    tag_words = _split_to_words(tag)
    suffix_words = _split_to_words(suffix)
    all_suffixes = [suffix_words]
@@ -36,12 +58,32 @@ def _match_suffix(tag: str, suffix: str):


def _match_prefix(tag: str, prefix: str):
    """
    Check if a tag matches a given prefix.

    :param tag: The tag to check.
    :type tag: str
    :param prefix: The prefix to match.
    :type prefix: str
    :return: True if the tag matches the prefix, False otherwise.
    :rtype: bool
    """
    tag_words = _split_to_words(tag)
    prefix_words = _split_to_words(prefix)
    return tag_words[:len(prefix_words)] == prefix_words


def _match_same(tag: str, expected: str):
    """
    Check if a tag matches another tag, considering singular and plural forms.

    :param tag: The tag to check.
    :type tag: str
    :param expected: The expected tag.
    :type expected: str
    :return: True if the tag matches the expected tag, False otherwise.
    :rtype: bool
    """
    a = _split_to_words(tag)
    as_ = [a, [*a[:-1], singular_form(a[-1])], [*a[:-1], plural_form(a[-1])]]
    as_ = set([tuple(item) for item in as_])
@@ -54,6 +96,32 @@ def _match_same(tag: str, expected: str):


def is_basic_character_tag(tag: str) -> bool:
    """
    Check if a tag is a basic character tag by matching with predefined whitelisted and blacklisted patterns.

    :param tag: The tag to check.
    :type tag: str
    :return: True if the tag is a basic character tag, False otherwise.
    :rtype: bool

    Examples::
        >>> from imgutils.tagging import is_basic_character_tag
        >>>
        >>> is_basic_character_tag('red hair')
        True
        >>> is_basic_character_tag('red_hair')  # span doesn't matter
        True
        >>> is_basic_character_tag('cat ears')  # singular
        True
        >>> is_basic_character_tag('cat ear')  # plural
        True
        >>> is_basic_character_tag('chair')  # only whole word will be matched
        False
        >>> is_basic_character_tag('hear')  # only whole word will be matched
        False
        >>> is_basic_character_tag('dress')
        False
    """
    if any(_match_same(tag, wl_tag) for wl_tag in _CHAR_WHITELIST):
        return False
    else:
@@ -62,6 +130,29 @@ def is_basic_character_tag(tag: str) -> bool:


def drop_basic_character_tags(tags: Union[List[str], Mapping[str, float]]) -> Union[List[str], Mapping[str, float]]:
    """
    Drop basic character tags from the given list or mapping of tags.

    :param tags: List or mapping of tags to be filtered.
    :type tags: Union[List[str], Mapping[str, float]]
    :return: Filtered list or mapping of tags without the basic character tags.
    :rtype: Union[List[str], Mapping[str, float]]
    :raises TypeError: If the input tags are neither a list nor a dictionary.

    Examples::
        >>> from imgutils.tagging import drop_basic_character_tags
        >>>
        >>> drop_basic_character_tags({
        ...     '1girl': 1.0, 'solo': 0.95,
        ...     'red_hair': 0.7, 'cat ears': 0.6,
        ...     'chair': 0.86, 'hear': 0.72,
        ... })
        {'1girl': 1.0, 'solo': 0.95, 'chair': 0.86, 'hear': 0.72}
        >>> drop_basic_character_tags([
        ...     '1girl', 'solo', 'red_hair', 'cat ears', 'chair', 'hear'
        ... ])
        ['1girl', 'solo', 'chair', 'hear']
    """
    if isinstance(tags, dict):
        return {tag: value for tag, value in tags.items() if not is_basic_character_tag(tag)}
    elif isinstance(tags, list):
+49 −0
Original line number Diff line number Diff line
@@ -10,6 +10,55 @@ except (ImportError, ModuleNotFoundError):

def sort_tags(tags: Union[List[str], Mapping[str, float]],
              mode: Literal['original', 'shuffle', 'score'] = 'score') -> List[str]:
    """
    Sort the input list or mapping of tags by specified mode.

    Tags can represent people counts (e.g., '1girl', '2boys'), and 'solo' tags.

    :param tags: List or mapping of tags to be sorted.
    :type tags: Union[List[str], Mapping[str, float]]
    :param mode: The mode for sorting the tags. Options: 'original' (original order),
                 'shuffle' (random shuffle), 'score' (sorted by score if available).
    :type mode: Literal['original', 'shuffle', 'score']
    :return: Sorted list of tags based on the specified mode.
    :rtype: List[str]
    :raises ValueError: If an unknown sort mode is provided.
    :raises TypeError: If the input tags are of unsupported type or if mode is 'score'
                       and the input is a list (as it does not have scores).

    Examples:
        Sorting tags in original order:

        >>> from imgutils.tagging import sort_tags
        >>>
        >>> tags = ['1girls', 'solo', 'red_hair', 'cat ears']
        >>> sort_tags(tags, mode='original')
        ['solo', '1girls', 'red_hair', 'cat ears']
        >>>
        >>> tags = {'1girls': 0.9, 'solo': 0.95, 'red_hair': 1.0, 'cat_ears': 0.92}
        >>> sort_tags(tags, mode='original')
        ['solo', '1girls', 'red_hair', 'cat_ears']

        Sorting tags by score (for a mapping of tags with scores):

        >>> from imgutils.tagging import sort_tags
        >>>
        >>> tags = {'1girls': 0.9, 'solo': 0.95, 'red_hair': 1.0, 'cat_ears': 0.92}
        >>> sort_tags(tags)
        ['solo', '1girls', 'red_hair', 'cat_ears']

        Shuffling tags (output is not unique)

        >>> from imgutils.tagging import sort_tags
        >>>
        >>> tags = ['1girls', 'solo', 'red_hair', 'cat ears']
        >>> sort_tags(tags, mode='shuffle')
        ['solo', '1girls', 'red_hair', 'cat ears']
        >>>
        >>> tags = {'1girls': 0.9, 'solo': 0.95, 'red_hair': 1.0, 'cat_ears': 0.92}
        >>> sort_tags(tags, mode='shuffle')
        ['solo', '1girls', 'cat_ears', 'red_hair']
    """
    if mode not in {'original', 'shuffle', 'score'}:
        raise ValueError(f'Unknown sort_mode, \'original\', '
                         f'\'shuffle\' or \'score\' expected but {mode!r} found.')