Loading imgutils/tagging/camie.py +59 −7 Original line number Diff line number Diff line Loading @@ -215,22 +215,74 @@ def get_camie_tags( """ Extract tags from an image using the Camie model. :param image: Input image (can be path, URL, or image data) :param image: Input image (can be path, URL, or image data). :type image: ImageTyping :param model_name: Name of the Camie model to use :param model_name: Name of the Camie model to use. :type model_name: str :param mode: Prediction mode affecting threshold values :param mode: Prediction mode affecting threshold values. :type mode: CamieModeTyping :param thresholds: Custom thresholds for tag selection :param thresholds: Custom thresholds for tag selection. :type thresholds: Optional[Union[float, Dict[str, float]]] :param no_underline: Whether to remove underscores from tag names :param no_underline: Whether to remove underscores from tag names. Default is ``False``. :type no_underline: bool :param drop_overlap: Whether to remove overlapping tags :param drop_overlap: Whether to remove overlapping tags. Default is ``False``. :type drop_overlap: bool :param fmt: Format specification for output :param fmt: Format specification for output. Default is ``('rating', 'general', 'character')``. :type fmt: Any :return: Dictionary of extracted tags and embeddings :rtype: Dict[str, Any] .. note:: The fmt argument can include the following keys: - ``rating``: a dict containing ratings and their confidences - ``general``: a dict containing general tags and their confidences - ``character``: a dict containing character tags and their confidences - ``copyright``: a dict containing copyright tags and their confidences - ``artist``: a dict containing artist tags and their confidences - ``meta``: a dict containing meta tags and their confidences - ``year``: a dict containing year tags and their confidences - ``tag``: a dict containing all tags (including general, character, copyright, artist, meta, year, not including rating) and their confidences - ``embedding``: a 1-dim embedding of image, recommended for index building after L2 normalization - ``prediction``: a 1-dim prediction result of image You can extract embedding of the given image with the following code >>> from imgutils.tagging import get_camie_tags >>> >>> embedding = get_camie_tags('skadi.jpg', fmt='embedding') >>> embedding.shape (1280, ) This embedding is valuable for constructing indices that enable rapid querying of images based on visual features within large-scale datasets. .. warning:: From our testings, other tag types (year, meta, artist, copyright) have shown limited accuracy. Especially for artist tags, just a bit better than ``np.random.randn``. So these tag types are not included in default ``fmt``. Example: Here are some images for example .. image:: tagging_demo.plot.py.svg :align: center >>> rating, features, chars = get_camie_tags('skadi.jpg') >>> rating {'general': 0.04246556758880615, 'sensitive': 0.6936423778533936, 'questionable': 0.23721203207969666, 'explicit': 0.033293724060058594} >>> features {'1girl': 0.8412569165229797, 'blush': 0.38029077649116516, 'breasts': 0.618192195892334, 'cowboy_shot': 0.37446439266204834, 'large_breasts': 0.5698797702789307, 'long_hair': 0.7119565010070801, 'looking_at_viewer': 0.5252856612205505, 'shirt': 0.46417444944381714, 'solo': 0.5428758859634399, 'standing': 0.34731733798980713, 'tail': 0.3911612927913666, 'thigh_gap': 0.2932726740837097, 'thighs': 0.4544200003147125, 'very_long_hair': 0.44711941480636597, 'ass': 0.2854885458946228, 'outdoors': 0.6344638466835022, 'red_eyes': 0.611354410648346, 'day': 0.564970850944519, 'hair_between_eyes': 0.4444340467453003, 'holding': 0.35846662521362305, 'parted_lips': 0.3867686092853546, 'blue_sky': 0.3723931908607483, 'cloud': 0.31086698174476624, 'short_sleeves': 0.43279752135276794, 'sky': 0.3896197974681854, 'gloves': 0.6638736724853516, 'grey_hair': 0.5094802975654602, 'sweat': 0.4867050349712372, 'navel': 0.6593714952468872, 'crop_top': 0.5243107676506042, 'shorts': 0.4374789893627167, 'artist_name': 0.3754707872867584, 'midriff': 0.6238733530044556, 'ass_visible_through_thighs': 0.31088054180145264, 'gym_uniform': 0.37657681107521057, 'black_shirt': 0.3012588620185852, 'watermark': 0.5147127509117126, 'web_address': 0.6296812295913696, 'short_shorts': 0.29214906692504883, 'black_shorts': 0.37801358103752136, 'buruma': 0.536261260509491, 'bike_shorts': 0.35828399658203125, 'black_gloves': 0.4156728982925415, 'sportswear': 0.44427722692489624, 'baseball_bat': 0.2838006019592285, 'crop_top_overhang': 0.49192047119140625, 'stomach': 0.36012423038482666, 'black_buruma': 0.3422132134437561, 'official_alternate_costume': 0.2783987522125244, 'baseball': 0.38377970457077026, 'baseball_mitt': 0.32592540979385376, 'cropped_shirt': 0.35402947664260864, 'holding_baseball_bat': 0.2758416533470154, 'black_sports_bra': 0.3463800549507141, 'sports_bra': 0.28466159105300903, 'exercising': 0.2603980302810669, 'bike_jersey': 0.2661605477333069, 'patreon_username': 0.7087235450744629, 'patreon_logo': 0.560276210308075} >>> chars {'skadi_(arknights)': 0.5921452641487122} >>> >>> rating, features, chars = get_camie_tags('hutao.jpg') >>> rating {'general': 0.41121846437454224, 'sensitive': 0.4002530574798584, 'questionable': 0.03438958525657654, 'explicit': 0.04617959260940552} >>> features {'1girl': 0.8312125205993652, 'blush': 0.3996567726135254, 'cowboy_shot': 0.28660568594932556, 'long_hair': 0.7184156775474548, 'long_sleeves': 0.4706878066062927, 'looking_at_viewer': 0.5503140687942505, 'school_uniform': 0.365602970123291, 'shirt': 0.41183334589004517, 'sidelocks': 0.28638553619384766, 'smile': 0.3707748055458069, 'solo': 0.520854115486145, 'standing': 0.2960333526134491, 'tongue': 0.6556028127670288, 'tongue_out': 0.6966925859451294, 'very_long_hair': 0.5526134371757507, 'skirt': 0.6872812509536743, 'brown_hair': 0.5945607423782349, 'hair_ornament': 0.4464661478996277, 'hair_ribbon': 0.3646523952484131, 'outdoors': 0.37938451766967773, 'red_eyes': 0.5426545143127441, 'ribbon': 0.3027467727661133, 'bag': 0.8986430168151855, 'hair_between_eyes': 0.337802529335022, 'holding': 0.38589367270469666, 'pleated_skirt': 0.6475872993469238, 'school_bag': 0.666648805141449, 'ahoge': 0.4749193489551544, 'white_shirt': 0.27104783058166504, 'closed_mouth': 0.28101325035095215, 'collared_shirt': 0.37030768394470215, 'miniskirt': 0.32576680183410645, ':p': 0.4337637424468994, 'alternate_costume': 0.42441293597221375, 'black_skirt': 0.34694597125053406, 'twintails': 0.5711237192153931, 'open_clothes': 0.31017544865608215, 'nail_polish': 0.534726083278656, 'jacket': 0.4544385075569153, 'open_jacket': 0.27831193804740906, 'flower': 0.45064714550971985, 'plaid_clothes': 0.5494365096092224, 'plaid_skirt': 0.610480546951294, 'red_flower': 0.35928308963775635, 'contemporary': 0.37732189893722534, 'backpack': 0.5575172305107117, 'fingernails': 0.27776333689689636, 'cardigan': 0.3264558017253876, 'blue_jacket': 0.31882336735725403, 'ghost': 0.5534622073173523, 'red_nails': 0.38771501183509827, ':q': 0.3758758008480072, 'hair_flower': 0.39574217796325684, 'charm_(object)': 0.5394986271858215, 'handbag': 0.37014907598495483, 'black_bag': 0.44918346405029297, 'shoulder_bag': 0.5881174802780151, 'symbol-shaped_pupils': 0.5163478255271912, 'blue_cardigan': 0.28089386224746704, 'black_nails': 0.42480990290641785, 'bag_charm': 0.5010414123535156, 'plum_blossoms': 0.27618563175201416, 'flower-shaped_pupils': 0.5317837595939636} >>> chars {'hu_tao_(genshin_impact)': 0.8859397172927856, 'boo_tao_(genshin_impact)': 0.7348971366882324} """ names = vnames(fmt) need_full = False Loading Loading
imgutils/tagging/camie.py +59 −7 Original line number Diff line number Diff line Loading @@ -215,22 +215,74 @@ def get_camie_tags( """ Extract tags from an image using the Camie model. :param image: Input image (can be path, URL, or image data) :param image: Input image (can be path, URL, or image data). :type image: ImageTyping :param model_name: Name of the Camie model to use :param model_name: Name of the Camie model to use. :type model_name: str :param mode: Prediction mode affecting threshold values :param mode: Prediction mode affecting threshold values. :type mode: CamieModeTyping :param thresholds: Custom thresholds for tag selection :param thresholds: Custom thresholds for tag selection. :type thresholds: Optional[Union[float, Dict[str, float]]] :param no_underline: Whether to remove underscores from tag names :param no_underline: Whether to remove underscores from tag names. Default is ``False``. :type no_underline: bool :param drop_overlap: Whether to remove overlapping tags :param drop_overlap: Whether to remove overlapping tags. Default is ``False``. :type drop_overlap: bool :param fmt: Format specification for output :param fmt: Format specification for output. Default is ``('rating', 'general', 'character')``. :type fmt: Any :return: Dictionary of extracted tags and embeddings :rtype: Dict[str, Any] .. note:: The fmt argument can include the following keys: - ``rating``: a dict containing ratings and their confidences - ``general``: a dict containing general tags and their confidences - ``character``: a dict containing character tags and their confidences - ``copyright``: a dict containing copyright tags and their confidences - ``artist``: a dict containing artist tags and their confidences - ``meta``: a dict containing meta tags and their confidences - ``year``: a dict containing year tags and their confidences - ``tag``: a dict containing all tags (including general, character, copyright, artist, meta, year, not including rating) and their confidences - ``embedding``: a 1-dim embedding of image, recommended for index building after L2 normalization - ``prediction``: a 1-dim prediction result of image You can extract embedding of the given image with the following code >>> from imgutils.tagging import get_camie_tags >>> >>> embedding = get_camie_tags('skadi.jpg', fmt='embedding') >>> embedding.shape (1280, ) This embedding is valuable for constructing indices that enable rapid querying of images based on visual features within large-scale datasets. .. warning:: From our testings, other tag types (year, meta, artist, copyright) have shown limited accuracy. Especially for artist tags, just a bit better than ``np.random.randn``. So these tag types are not included in default ``fmt``. Example: Here are some images for example .. image:: tagging_demo.plot.py.svg :align: center >>> rating, features, chars = get_camie_tags('skadi.jpg') >>> rating {'general': 0.04246556758880615, 'sensitive': 0.6936423778533936, 'questionable': 0.23721203207969666, 'explicit': 0.033293724060058594} >>> features {'1girl': 0.8412569165229797, 'blush': 0.38029077649116516, 'breasts': 0.618192195892334, 'cowboy_shot': 0.37446439266204834, 'large_breasts': 0.5698797702789307, 'long_hair': 0.7119565010070801, 'looking_at_viewer': 0.5252856612205505, 'shirt': 0.46417444944381714, 'solo': 0.5428758859634399, 'standing': 0.34731733798980713, 'tail': 0.3911612927913666, 'thigh_gap': 0.2932726740837097, 'thighs': 0.4544200003147125, 'very_long_hair': 0.44711941480636597, 'ass': 0.2854885458946228, 'outdoors': 0.6344638466835022, 'red_eyes': 0.611354410648346, 'day': 0.564970850944519, 'hair_between_eyes': 0.4444340467453003, 'holding': 0.35846662521362305, 'parted_lips': 0.3867686092853546, 'blue_sky': 0.3723931908607483, 'cloud': 0.31086698174476624, 'short_sleeves': 0.43279752135276794, 'sky': 0.3896197974681854, 'gloves': 0.6638736724853516, 'grey_hair': 0.5094802975654602, 'sweat': 0.4867050349712372, 'navel': 0.6593714952468872, 'crop_top': 0.5243107676506042, 'shorts': 0.4374789893627167, 'artist_name': 0.3754707872867584, 'midriff': 0.6238733530044556, 'ass_visible_through_thighs': 0.31088054180145264, 'gym_uniform': 0.37657681107521057, 'black_shirt': 0.3012588620185852, 'watermark': 0.5147127509117126, 'web_address': 0.6296812295913696, 'short_shorts': 0.29214906692504883, 'black_shorts': 0.37801358103752136, 'buruma': 0.536261260509491, 'bike_shorts': 0.35828399658203125, 'black_gloves': 0.4156728982925415, 'sportswear': 0.44427722692489624, 'baseball_bat': 0.2838006019592285, 'crop_top_overhang': 0.49192047119140625, 'stomach': 0.36012423038482666, 'black_buruma': 0.3422132134437561, 'official_alternate_costume': 0.2783987522125244, 'baseball': 0.38377970457077026, 'baseball_mitt': 0.32592540979385376, 'cropped_shirt': 0.35402947664260864, 'holding_baseball_bat': 0.2758416533470154, 'black_sports_bra': 0.3463800549507141, 'sports_bra': 0.28466159105300903, 'exercising': 0.2603980302810669, 'bike_jersey': 0.2661605477333069, 'patreon_username': 0.7087235450744629, 'patreon_logo': 0.560276210308075} >>> chars {'skadi_(arknights)': 0.5921452641487122} >>> >>> rating, features, chars = get_camie_tags('hutao.jpg') >>> rating {'general': 0.41121846437454224, 'sensitive': 0.4002530574798584, 'questionable': 0.03438958525657654, 'explicit': 0.04617959260940552} >>> features {'1girl': 0.8312125205993652, 'blush': 0.3996567726135254, 'cowboy_shot': 0.28660568594932556, 'long_hair': 0.7184156775474548, 'long_sleeves': 0.4706878066062927, 'looking_at_viewer': 0.5503140687942505, 'school_uniform': 0.365602970123291, 'shirt': 0.41183334589004517, 'sidelocks': 0.28638553619384766, 'smile': 0.3707748055458069, 'solo': 0.520854115486145, 'standing': 0.2960333526134491, 'tongue': 0.6556028127670288, 'tongue_out': 0.6966925859451294, 'very_long_hair': 0.5526134371757507, 'skirt': 0.6872812509536743, 'brown_hair': 0.5945607423782349, 'hair_ornament': 0.4464661478996277, 'hair_ribbon': 0.3646523952484131, 'outdoors': 0.37938451766967773, 'red_eyes': 0.5426545143127441, 'ribbon': 0.3027467727661133, 'bag': 0.8986430168151855, 'hair_between_eyes': 0.337802529335022, 'holding': 0.38589367270469666, 'pleated_skirt': 0.6475872993469238, 'school_bag': 0.666648805141449, 'ahoge': 0.4749193489551544, 'white_shirt': 0.27104783058166504, 'closed_mouth': 0.28101325035095215, 'collared_shirt': 0.37030768394470215, 'miniskirt': 0.32576680183410645, ':p': 0.4337637424468994, 'alternate_costume': 0.42441293597221375, 'black_skirt': 0.34694597125053406, 'twintails': 0.5711237192153931, 'open_clothes': 0.31017544865608215, 'nail_polish': 0.534726083278656, 'jacket': 0.4544385075569153, 'open_jacket': 0.27831193804740906, 'flower': 0.45064714550971985, 'plaid_clothes': 0.5494365096092224, 'plaid_skirt': 0.610480546951294, 'red_flower': 0.35928308963775635, 'contemporary': 0.37732189893722534, 'backpack': 0.5575172305107117, 'fingernails': 0.27776333689689636, 'cardigan': 0.3264558017253876, 'blue_jacket': 0.31882336735725403, 'ghost': 0.5534622073173523, 'red_nails': 0.38771501183509827, ':q': 0.3758758008480072, 'hair_flower': 0.39574217796325684, 'charm_(object)': 0.5394986271858215, 'handbag': 0.37014907598495483, 'black_bag': 0.44918346405029297, 'shoulder_bag': 0.5881174802780151, 'symbol-shaped_pupils': 0.5163478255271912, 'blue_cardigan': 0.28089386224746704, 'black_nails': 0.42480990290641785, 'bag_charm': 0.5010414123535156, 'plum_blossoms': 0.27618563175201416, 'flower-shaped_pupils': 0.5317837595939636} >>> chars {'hu_tao_(genshin_impact)': 0.8859397172927856, 'boo_tao_(genshin_impact)': 0.7348971366882324} """ names = vnames(fmt) need_full = False Loading