Commit ecff441c authored by tak2hu's avatar tak2hu
Browse files

really inefficient macocr, but hey it works

parent 5a7a8a55
Loading
Loading
Loading
Loading
+33 −18
Original line number Diff line number Diff line
@@ -246,34 +246,37 @@ class OCRMIT48pxCTC(OCRBase):
        self.chunk_size = chunk_size
        self.model.max_chunk_size = chunk_size
    
import sys, platform
if sys.platform == 'darwin' and platform.mac_ver()[0] >= '10.15':
import platform
if platform.mac_ver()[0] >= '10.15':
    from .macos_ocr import get_supported_languages
    langs = list(get_supported_languages()[0])
    APPLEVISIONFRAMEWORK = None
    @register_OCR('macos_ocr')
    class OCRApple(OCRBase):
        params = {
            'Language': {
            'language': {
                'type':'selector',
                'options': langs
                'options': list(get_supported_languages()[0]),
                'select': 'en-US',
            },
            # 'Recognition Level': {
            # 'recognition_level': {
            #     'type': 'selector',
            #     'options': [
            #         'Fast',
            #         'Accurate'
            #     ]
            # }
            #         'accurate',
            #         'fast',
            #     ],
            #     'select': 'accurate',
            # },
            'confidence_level': '0.1',
        }
        language = 'en-US'
        # recognition = 'Accurate'
        recognition = 'accurate'
        confidence = '0.1'

        def setup_ocr(self):
            global APPLEVISIONFRAMEWORK
            from .macos_ocr import AppleOCR
            if APPLEVISIONFRAMEWORK is None:
                self.model = APPLEVISIONFRAMEWORK = AppleOCR()
                self.model = APPLEVISIONFRAMEWORK = AppleOCR(lang=[self.language])
            else:
                self.model = APPLEVISIONFRAMEWORK

@@ -281,17 +284,29 @@ if sys.platform == 'darwin' and platform.mac_ver()[0] >= '10.15':
            return self.model(img)

        def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]):
            pass
            im_h, im_w = img.shape[:2]
            for blk in blk_list:
                x1, y1, x2, y2 = blk.xyxy
                if y2 < im_h and x2 < im_w and \
                    x1 > 0 and y1 > 0 and x1 < x2 and y1 < y2: 
                    blk.text = self.model(img[y1:y2, x1:x2])
                else:
                    logging.warning('invalid textbbox to target img')
                    blk.text = ['']

        def updateParam(self, param_key: str, param_content):
            super().updateParam(param_key, param_content)
            language = self.params['language']['select']
            # recognition = self.params['recognition level']['select']
            self.language = self.params['language']['select']
            self.model.lang = [self.language]

            # self.recognition = self.params['recognition_level']['select']
            # self.model.recog_level = self.recognition
            # self.params['language']['options'] = list(get_supported_languages(self.recognition)[0])

            self.language = language
            # self.recognition = recognition
            self.confidence = self.params['confidence_level']
            self.model.min_confidence = self.confidence

if sys.platform == 'win32':
if platform.system() == 'Windows':
    WINDOWSOCRENGINE = None
    @register_OCR('WindowsOCR')
    class OCRWindows(OCRBase):
+30 −24
Original line number Diff line number Diff line
# https://github.com/straussmaximilian/ocrmac/blob/main/ocrmac/ocrmac.py
# https://gist.github.com/RhetTbull/1c34fc07c95733642cffcd1ac587fc4c
# https://github.com/RhetTbull/textinator/blob/main/src/macvision.py

import Vision
import objc
import platform
from typing import Tuple
import numpy as np

# Vision.VNRequestTextRecognitionLevelAccurate  0
# Vision.VNRequestTextRecognitionLevelFast      1
# Vision.VNRecognizeTextRequestRevision1        1
# Vision.VNRecognizeTextRequestRevision2        2
# Vision.VNRecognizeTextRequestRevision3        3
from PIL import Image
from io import BytesIO

def get_revision_level():
    with objc.autorelease_pool():
@@ -26,25 +18,32 @@ def get_revision_level():
            revision = Vision.VNRecognizeTextRequestRevision1
        return revision

def get_supported_languages(recognition_level=0, revision=get_revision_level()) -> Tuple[Tuple[str], Tuple[str]]:
def get_supported_languages(recognition_level='accurate', revision=get_revision_level()) -> Tuple[Tuple[str], Tuple[str]]:
    """Get supported languages for text detection from Vision framework.

    Returns: Tuple of ((language code), (error))
    """        

    if recognition_level == 'fast':
        recognition_level = 1
    else:
        recognition_level = 0
    return Vision.VNRecognizeTextRequest.supportedRecognitionLanguagesForTextRecognitionLevel_revision_error_(
        recognition_level, revision, None
        )

def text_from_image(image: np.ndarray, recognition_level="accurate", language_preference=None):
def text_from_image(image: np.ndarray, language_preference=None, recognition_level='accurate'):
    recognition_level = recognition_level.lower()
    if language_preference == 'Auto':
        language_preference = None
    image = image.tobytes()

    img_buf = BytesIO()
    Image.fromarray(image).save(img_buf, format='PNG')

    with objc.autorelease_pool():
        req = Vision.VNRecognizeTextRequest.alloc().init()

        if recognition_level == "fast":
        if recognition_level == 'fast':
            req.setRecognitionLevel_(1)
        else:
            req.setRecognitionLevel_(0)
@@ -53,18 +52,18 @@ def text_from_image(image: np.ndarray, recognition_level="accurate", language_pr
            req.setRecognitionLanguages_(language_preference)

        handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(
            image, None
            img_buf.getvalue(), None
        )

        success = handler.performRequests_error_([req], None)
        res = []
        if success:
            for result in req.results():
                bbox = result.boundingBox()
                w, h = bbox.size.width, bbox.size.height
                x, y = bbox.origin.x, bbox.origin.y
                # bbox = result.boundingBox()
                # w, h = bbox.size.width, bbox.size.height
                # x, y = bbox.origin.x, bbox.origin.y

                res.append((result.text(), result.confidence(), [x, y, w, h]))
                res.append((result.text(), result.confidence())) #, [x, y, w, h]))

        req.dealloc()
        handler.dealloc()
@@ -73,8 +72,15 @@ def text_from_image(image: np.ndarray, recognition_level="accurate", language_pr


class AppleOCR:
    def __init__(self):
        pass

    def __call__(self, img) -> str:
        pass
 No newline at end of file
    def __init__(self, lang=[], recog_level='accurate', min_confidence='0.1'):
        self.lang = lang
        self.recog_level = recog_level 
        self.min_confidence = min_confidence

    def __call__(self, img: np.ndarray) -> str:
        result = []
        results = text_from_image(img, self.lang, self.recog_level)
        for res in results:
            if res[1] >= float(self.min_confidence):
                result.append(res[0])
        return '\n'.join(result)
 No newline at end of file