Commit 822b8820 authored by narugo1992's avatar narugo1992
Browse files

dev(narugo): better docs

parent 7f3c8597
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ Overview:
    .. warning::
        This module has been deprecated and will be removed in the future.

        It is recommended to migrate to the `imgutils.ocr.detect_text_with_ocr` function as soon as possible.
        It is recommended to migrate to the :func:`imgutils.ocr.detect_text_with_ocr` function as soon as possible.
        This function uses a higher-quality text detection model provided by PaddleOCR,
        resulting in improved performance and higher efficiency.

@@ -137,6 +137,10 @@ def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: floa
    :type max_area_size: Optional[int]
    :return: List of detected text bounding boxes, labels, and scores.
    :rtype: List[Tuple[Tuple[int, int, int, int], str, float]]

    .. warning::
        This function is deprecated, and it will be removed from imgutils in the future.
        Please migrate to :func:`imgutils.ocr.detect_text_with_ocr` as soon as possible.
    """
    image = load_image(image)
    if max_area_size is not None and image.width * image.height >= max_area_size ** 2:
+19 −5
Original line number Diff line number Diff line
@@ -87,14 +87,21 @@ def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_DET_MODEL,
    :return: A list of detected text boxes, label (always ``text``), and their confidence scores.
    :rtype: List[Tuple[Tuple[int, int, int, int], str, float]]

    .. note::
        If you need to extract the actual text content, use the :func:`ocr` function.

    Examples::
        >>> from imgutils.ocr import detect_text_with_ocr
        >>>
        >>> detect_text_with_ocr('comic.jpg')
        [((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)]
        [((742, 485, 809, 511), 'text', 0.9543377610144915),
         ((682, 98, 734, 124), 'text', 0.9309689495575223),
         ((716, 136, 836, 164), 'text', 0.9042856988923695),
         ((144, 455, 196, 485), 'text', 0.874083638387722),
         ((719, 455, 835, 488), 'text', 0.8628696346175078),
         ((124, 478, 214, 508), 'text', 0.848871771901487),
         ((1030, 557, 1184, 578), 'text', 0.8352495440618789),
         ((427, 129, 553, 154), 'text', 0.8249209443996619)]

    .. note::
        If you need to extract the actual text content, use the :func:`ocr` function.
    """
    retval = []
    for box, _, score in _detect_text(image, model, heat_threshold, box_threshold, max_candidates, unclip_ratio):
@@ -135,7 +142,14 @@ def ocr(image: ImageTyping, detect_model: str = _DEFAULT_DET_MODEL,
        >>> from imgutils.ocr import ocr
        >>>
        >>> ocr('comic.jpg')
        [((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL)  GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)]
        [((742, 485, 809, 511), 'MOB.', 0.9356705927336156),
         ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466),
         ((682, 98, 734, 124), 'BUT', 0.8730931912907247),
         ((144, 455, 196, 485), 'OH,', 0.8417627579351514),
         ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503),
         ((1030, 557, 1184, 578), '(EL)  GATO IBERICO', 0.7271127306351021),
         ((719, 455, 835, 488), "THAt'S △", 0.701928390168364),
         ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)]

        By default, the text recognition model used is `ch_PP-OCRv4_rec`.
        This recognition model has good recognition capabilities for both Chinese and English.