Commit a6982787 authored by tak2hu's avatar tak2hu
Browse files

Windows OCR, code taken from winocr, kinda hacky

parent f4d5ad04
Loading
Loading
Loading
Loading
+32 −3
Original line number Diff line number Diff line
@@ -258,6 +258,9 @@ if platform.mac_ver()[0] >= '10.15':
                'options': list(get_supported_languages()[0]),
                'select': 'en-US',
            },
            # While this does appear 
            # it doesn't update the languages available
            # different recog level, different available langs
            # 'recognition_level': {
            #     'type': 'selector',
            #     'options': [
@@ -306,10 +309,22 @@ if platform.mac_ver()[0] >= '10.15':
            self.confidence = self.params['confidence_level']
            self.model.min_confidence = self.confidence

if platform.system() == 'Windows':
if platform.system() == 'Windows' and platform.version() >= '10.0.10240.0':
    from .windows_ocr import get_supported_language_packs

    languages_display_name = [lang.display_name for lang in get_supported_language_packs()]
    languages_tag = [lang.language_tag for lang in get_supported_language_packs()]
    WINDOWSOCRENGINE = None
    @register_OCR('WindowsOCR')
    @register_OCR('windows_ocr')
    class OCRWindows(OCRBase):
        params = {
            'language': {
                'type':'selector',
                'options': languages_display_name,
                'select': languages_display_name[0],
            }
        }
        language = languages_display_name[0]

        def setup_ocr(self):
            global WINDOWSOCRENGINE
@@ -323,4 +338,18 @@ if platform.system() == 'Windows':
            self.engine(img)

        def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]) -> None:
            pass
 No newline at end of file
            im_h, im_w = img.shape[:2]
            for blk in blk_list:
                x1, y1, x2, y2 = blk.xyxy
                if y2 < im_h and x2 < im_w and \
                    x1 > 0 and y1 > 0 and x1 < x2 and y1 < y2: 
                    blk.text = self.engine(img[y1:y2, x1:x2])
                else:
                    logging.warning('invalid textbbox to target img')
                    blk.text = ['']
        
        def updateParam(self, param_key: str, param_content):
            super().updateParam(param_key, param_content)
            self.language = self.params['language']['select']
            tag_name = languages_tag[languages_display_name.index(self.language)]
            self.engine.lang = tag_name
 No newline at end of file
+15 −24
Original line number Diff line number Diff line
# https://gist.github.com/dantmnf/23f060278585d6243ffd9b0c538beab2
# https://github.com/GitHub30/winocr/blob/main/winocr.py
# https://learn.microsoft.com/en-us/windows/powertoys/text-extractor#how-to-query-for-ocr-language-packs

from winsdk.windows.media.ocr import OcrEngine
from winsdk.windows.globalization import Language
from winsdk.windows.storage.streams import DataWriter
from winsdk.windows.graphics.imaging import SoftwareBitmap, BitmapPixelFormat

import platform
from distutils.version import LooseVersion

def does_windows_version_support_this():
    pass

import numpy as np
import cv2, asyncio

def get_supported_language_packs():
    pass

def get_installed_ocr_language_packs():
    pass

def install_ocr_language_pack():
    pass
    return list(OcrEngine.available_recognizer_languages)

def uninstall_ocr_language_pack():
    pass
def ocr(byte, width, height, lang='en'):
    writer = DataWriter()
    writer.write_bytes(byte)
    sb = SoftwareBitmap.create_copy_from_buffer(writer.detach_buffer(), BitmapPixelFormat.RGBA8, width, height)
    return OcrEngine.try_create_from_language(Language(lang)).recognize_async(sb)

def image2text():
    pass
async def coroutine(awaitable):
    return await awaitable 

class WindowsOCR:
    def __init__(self):
        pass
    lang = get_supported_language_packs()[0].language_tag
    
    def __call__(self, img) -> str:
        pass
 No newline at end of file
    def __call__(self, img: np.ndarray) -> str:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
        w, h = img.shape[1], img.shape[0]
        return asyncio.run(coroutine(ocr(img.tobytes(), w, h, self.lang))).text
 No newline at end of file