Loading docs/source/_libs/plot.py +1 −6 Original line number Diff line number Diff line Loading @@ -57,11 +57,6 @@ def image_plot(*images, save_as: str, columns=2, keep_axis: bool = False, figsiz for i, img in enumerate(images, start=0): xi, yi = i // columns, i % columns image, label = _image_input_process(img, autocensor) if rows == 1 and columns == 1: ax = axs elif rows == 1: ax = axs[yi] else: ax = axs[xi, yi] ax.imshow(image) ax.set_title(label) Loading docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py 0 → 100644 +31 −0 Original line number Diff line number Diff line import font from imgutils.data import load_image from imgutils.detect import detect_text from imgutils.detect.visual import detection_visualize from imgutils.ocr import ocr from plot import image_plot def _detect_with_ocr(img, *, max_size=None, **kwargs): img = load_image(img, mode='RGB', force_background='white') if max_size is not None and min(img.height, img.width) > max_size: r = max_size / min(img.height, img.width) img = img.resize(( int(round(img.width * r)), int(round(img.height * r)), )) return detection_visualize(img, ocr(img, **kwargs), fp=font.get_cn_fp()) def _detect_with_deprecated(img, **kwargs): return detection_visualize(img, detect_text(img, **kwargs)) if __name__ == '__main__': image_plot( (_detect_with_deprecated('text/ml2.jpg'), 'detect_text'), (_detect_with_ocr('text/ml2.jpg'), 'detect_text_with_ocr'), columns=2, figsize=(13, 3.8), ) docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py.svg 0 → 100644 +299 −0 File added.Preview size limit exceeded, changes collapsed. Show changes imgutils/detect/text.py +18 −0 Original line number Diff line number Diff line Loading @@ -12,14 +12,26 @@ Overview: .. image:: text_detect_benchmark.plot.py.svg :align: center .. warning:: This module has been deprecated and will be removed in the future. It is recommended to migrate to the :func:`imgutils.ocr.detect_text_with_ocr` function as soon as possible. This function uses a higher-quality text detection model provided by PaddleOCR, resulting in improved performance and higher efficiency. .. image:: text_detect_deprecate_demo.plot.py.svg :align: center """ from functools import lru_cache from typing import List, Tuple, Optional import cv2 import numpy as np from deprecation import deprecated from huggingface_hub import hf_hub_download from ..config.meta import __VERSION__ from ..data import ImageTyping, load_image from ..utils import open_onnx_model Loading Loading @@ -106,6 +118,8 @@ def _get_bounding_box_of_text(image: ImageTyping, model: str, threshold: float) return bboxes @deprecated(deprecated_in="0.2.10", removed_in="0.4", current_version=__VERSION__, details="Use the new function :func:`imgutils.ocr.detect_text_with_ocr` instead") def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: float = 0.05, max_area_size: Optional[int] = 640): """ Loading @@ -123,6 +137,10 @@ def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: floa :type max_area_size: Optional[int] :return: List of detected text bounding boxes, labels, and scores. :rtype: List[Tuple[Tuple[int, int, int, int], str, float]] .. warning:: This function is deprecated, and it will be removed from imgutils in the future. Please migrate to :func:`imgutils.ocr.detect_text_with_ocr` as soon as possible. """ image = load_image(image) if max_area_size is not None and image.width * image.height >= max_area_size ** 2: Loading imgutils/ocr/entry.py +19 −5 Original line number Diff line number Diff line Loading @@ -87,14 +87,21 @@ def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_DET_MODEL, :return: A list of detected text boxes, label (always ``text``), and their confidence scores. :rtype: List[Tuple[Tuple[int, int, int, int], str, float]] .. note:: If you need to extract the actual text content, use the :func:`ocr` function. Examples:: >>> from imgutils.ocr import detect_text_with_ocr >>> >>> detect_text_with_ocr('comic.jpg') [((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)] [((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)] .. note:: If you need to extract the actual text content, use the :func:`ocr` function. """ retval = [] for box, _, score in _detect_text(image, model, heat_threshold, box_threshold, max_candidates, unclip_ratio): Loading Loading @@ -135,7 +142,14 @@ def ocr(image: ImageTyping, detect_model: str = _DEFAULT_DET_MODEL, >>> from imgutils.ocr import ocr >>> >>> ocr('comic.jpg') [((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL) GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)] [((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL) GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)] By default, the text recognition model used is `ch_PP-OCRv4_rec`. This recognition model has good recognition capabilities for both Chinese and English. Loading Loading
docs/source/_libs/plot.py +1 −6 Original line number Diff line number Diff line Loading @@ -57,11 +57,6 @@ def image_plot(*images, save_as: str, columns=2, keep_axis: bool = False, figsiz for i, img in enumerate(images, start=0): xi, yi = i // columns, i % columns image, label = _image_input_process(img, autocensor) if rows == 1 and columns == 1: ax = axs elif rows == 1: ax = axs[yi] else: ax = axs[xi, yi] ax.imshow(image) ax.set_title(label) Loading
docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py 0 → 100644 +31 −0 Original line number Diff line number Diff line import font from imgutils.data import load_image from imgutils.detect import detect_text from imgutils.detect.visual import detection_visualize from imgutils.ocr import ocr from plot import image_plot def _detect_with_ocr(img, *, max_size=None, **kwargs): img = load_image(img, mode='RGB', force_background='white') if max_size is not None and min(img.height, img.width) > max_size: r = max_size / min(img.height, img.width) img = img.resize(( int(round(img.width * r)), int(round(img.height * r)), )) return detection_visualize(img, ocr(img, **kwargs), fp=font.get_cn_fp()) def _detect_with_deprecated(img, **kwargs): return detection_visualize(img, detect_text(img, **kwargs)) if __name__ == '__main__': image_plot( (_detect_with_deprecated('text/ml2.jpg'), 'detect_text'), (_detect_with_ocr('text/ml2.jpg'), 'detect_text_with_ocr'), columns=2, figsize=(13, 3.8), )
docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py.svg 0 → 100644 +299 −0 File added.Preview size limit exceeded, changes collapsed. Show changes
imgutils/detect/text.py +18 −0 Original line number Diff line number Diff line Loading @@ -12,14 +12,26 @@ Overview: .. image:: text_detect_benchmark.plot.py.svg :align: center .. warning:: This module has been deprecated and will be removed in the future. It is recommended to migrate to the :func:`imgutils.ocr.detect_text_with_ocr` function as soon as possible. This function uses a higher-quality text detection model provided by PaddleOCR, resulting in improved performance and higher efficiency. .. image:: text_detect_deprecate_demo.plot.py.svg :align: center """ from functools import lru_cache from typing import List, Tuple, Optional import cv2 import numpy as np from deprecation import deprecated from huggingface_hub import hf_hub_download from ..config.meta import __VERSION__ from ..data import ImageTyping, load_image from ..utils import open_onnx_model Loading Loading @@ -106,6 +118,8 @@ def _get_bounding_box_of_text(image: ImageTyping, model: str, threshold: float) return bboxes @deprecated(deprecated_in="0.2.10", removed_in="0.4", current_version=__VERSION__, details="Use the new function :func:`imgutils.ocr.detect_text_with_ocr` instead") def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: float = 0.05, max_area_size: Optional[int] = 640): """ Loading @@ -123,6 +137,10 @@ def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: floa :type max_area_size: Optional[int] :return: List of detected text bounding boxes, labels, and scores. :rtype: List[Tuple[Tuple[int, int, int, int], str, float]] .. warning:: This function is deprecated, and it will be removed from imgutils in the future. Please migrate to :func:`imgutils.ocr.detect_text_with_ocr` as soon as possible. """ image = load_image(image) if max_area_size is not None and image.width * image.height >= max_area_size ** 2: Loading
imgutils/ocr/entry.py +19 −5 Original line number Diff line number Diff line Loading @@ -87,14 +87,21 @@ def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_DET_MODEL, :return: A list of detected text boxes, label (always ``text``), and their confidence scores. :rtype: List[Tuple[Tuple[int, int, int, int], str, float]] .. note:: If you need to extract the actual text content, use the :func:`ocr` function. Examples:: >>> from imgutils.ocr import detect_text_with_ocr >>> >>> detect_text_with_ocr('comic.jpg') [((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)] [((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)] .. note:: If you need to extract the actual text content, use the :func:`ocr` function. """ retval = [] for box, _, score in _detect_text(image, model, heat_threshold, box_threshold, max_candidates, unclip_ratio): Loading Loading @@ -135,7 +142,14 @@ def ocr(image: ImageTyping, detect_model: str = _DEFAULT_DET_MODEL, >>> from imgutils.ocr import ocr >>> >>> ocr('comic.jpg') [((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL) GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)] [((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL) GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)] By default, the text recognition model used is `ch_PP-OCRv4_rec`. This recognition model has good recognition capabilities for both Chinese and English. Loading