Loading modules/ocr/__init__.py +33 −18 Original line number Diff line number Diff line Loading @@ -246,34 +246,37 @@ class OCRMIT48pxCTC(OCRBase): self.chunk_size = chunk_size self.model.max_chunk_size = chunk_size import sys, platform if sys.platform == 'darwin' and platform.mac_ver()[0] >= '10.15': import platform if platform.mac_ver()[0] >= '10.15': from .macos_ocr import get_supported_languages langs = list(get_supported_languages()[0]) APPLEVISIONFRAMEWORK = None @register_OCR('macos_ocr') class OCRApple(OCRBase): params = { 'Language': { 'language': { 'type':'selector', 'options': langs 'options': list(get_supported_languages()[0]), 'select': 'en-US', }, # 'Recognition Level': { # 'recognition_level': { # 'type': 'selector', # 'options': [ # 'Fast', # 'Accurate' # ] # } # 'accurate', # 'fast', # ], # 'select': 'accurate', # }, 'confidence_level': '0.1', } language = 'en-US' # recognition = 'Accurate' recognition = 'accurate' confidence = '0.1' def setup_ocr(self): global APPLEVISIONFRAMEWORK from .macos_ocr import AppleOCR if APPLEVISIONFRAMEWORK is None: self.model = APPLEVISIONFRAMEWORK = AppleOCR() self.model = APPLEVISIONFRAMEWORK = AppleOCR(lang=[self.language]) else: self.model = APPLEVISIONFRAMEWORK Loading @@ -281,17 +284,29 @@ if sys.platform == 'darwin' and platform.mac_ver()[0] >= '10.15': return self.model(img) def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]): pass im_h, im_w = img.shape[:2] for blk in blk_list: x1, y1, x2, y2 = blk.xyxy if y2 < im_h and x2 < im_w and \ x1 > 0 and y1 > 0 and x1 < x2 and y1 < y2: blk.text = self.model(img[y1:y2, x1:x2]) else: logging.warning('invalid textbbox to target img') blk.text = [''] def updateParam(self, param_key: str, param_content): super().updateParam(param_key, param_content) language = self.params['language']['select'] # recognition = self.params['recognition level']['select'] self.language = self.params['language']['select'] self.model.lang = [self.language] # self.recognition = self.params['recognition_level']['select'] # self.model.recog_level = self.recognition # self.params['language']['options'] = list(get_supported_languages(self.recognition)[0]) self.language = language # self.recognition = recognition self.confidence = self.params['confidence_level'] self.model.min_confidence = self.confidence if sys.platform == 'win32': if platform.system() == 'Windows': WINDOWSOCRENGINE = None @register_OCR('WindowsOCR') class OCRWindows(OCRBase): Loading modules/ocr/macos_ocr.py +30 −24 Original line number Diff line number Diff line # https://github.com/straussmaximilian/ocrmac/blob/main/ocrmac/ocrmac.py # https://gist.github.com/RhetTbull/1c34fc07c95733642cffcd1ac587fc4c # https://github.com/RhetTbull/textinator/blob/main/src/macvision.py import Vision import objc import platform from typing import Tuple import numpy as np # Vision.VNRequestTextRecognitionLevelAccurate 0 # Vision.VNRequestTextRecognitionLevelFast 1 # Vision.VNRecognizeTextRequestRevision1 1 # Vision.VNRecognizeTextRequestRevision2 2 # Vision.VNRecognizeTextRequestRevision3 3 from PIL import Image from io import BytesIO def get_revision_level(): with objc.autorelease_pool(): Loading @@ -26,25 +18,32 @@ def get_revision_level(): revision = Vision.VNRecognizeTextRequestRevision1 return revision def get_supported_languages(recognition_level=0, revision=get_revision_level()) -> Tuple[Tuple[str], Tuple[str]]: def get_supported_languages(recognition_level='accurate', revision=get_revision_level()) -> Tuple[Tuple[str], Tuple[str]]: """Get supported languages for text detection from Vision framework. Returns: Tuple of ((language code), (error)) """ if recognition_level == 'fast': recognition_level = 1 else: recognition_level = 0 return Vision.VNRecognizeTextRequest.supportedRecognitionLanguagesForTextRecognitionLevel_revision_error_( recognition_level, revision, None ) def text_from_image(image: np.ndarray, recognition_level="accurate", language_preference=None): def text_from_image(image: np.ndarray, language_preference=None, recognition_level='accurate'): recognition_level = recognition_level.lower() if language_preference == 'Auto': language_preference = None image = image.tobytes() img_buf = BytesIO() Image.fromarray(image).save(img_buf, format='PNG') with objc.autorelease_pool(): req = Vision.VNRecognizeTextRequest.alloc().init() if recognition_level == "fast": if recognition_level == 'fast': req.setRecognitionLevel_(1) else: req.setRecognitionLevel_(0) Loading @@ -53,18 +52,18 @@ def text_from_image(image: np.ndarray, recognition_level="accurate", language_pr req.setRecognitionLanguages_(language_preference) handler = Vision.VNImageRequestHandler.alloc().initWithData_options_( image, None img_buf.getvalue(), None ) success = handler.performRequests_error_([req], None) res = [] if success: for result in req.results(): bbox = result.boundingBox() w, h = bbox.size.width, bbox.size.height x, y = bbox.origin.x, bbox.origin.y # bbox = result.boundingBox() # w, h = bbox.size.width, bbox.size.height # x, y = bbox.origin.x, bbox.origin.y res.append((result.text(), result.confidence(), [x, y, w, h])) res.append((result.text(), result.confidence())) #, [x, y, w, h])) req.dealloc() handler.dealloc() Loading @@ -73,8 +72,15 @@ def text_from_image(image: np.ndarray, recognition_level="accurate", language_pr class AppleOCR: def __init__(self): pass def __call__(self, img) -> str: pass No newline at end of file def __init__(self, lang=[], recog_level='accurate', min_confidence='0.1'): self.lang = lang self.recog_level = recog_level self.min_confidence = min_confidence def __call__(self, img: np.ndarray) -> str: result = [] results = text_from_image(img, self.lang, self.recog_level) for res in results: if res[1] >= float(self.min_confidence): result.append(res[0]) return '\n'.join(result) No newline at end of file Loading
modules/ocr/__init__.py +33 −18 Original line number Diff line number Diff line Loading @@ -246,34 +246,37 @@ class OCRMIT48pxCTC(OCRBase): self.chunk_size = chunk_size self.model.max_chunk_size = chunk_size import sys, platform if sys.platform == 'darwin' and platform.mac_ver()[0] >= '10.15': import platform if platform.mac_ver()[0] >= '10.15': from .macos_ocr import get_supported_languages langs = list(get_supported_languages()[0]) APPLEVISIONFRAMEWORK = None @register_OCR('macos_ocr') class OCRApple(OCRBase): params = { 'Language': { 'language': { 'type':'selector', 'options': langs 'options': list(get_supported_languages()[0]), 'select': 'en-US', }, # 'Recognition Level': { # 'recognition_level': { # 'type': 'selector', # 'options': [ # 'Fast', # 'Accurate' # ] # } # 'accurate', # 'fast', # ], # 'select': 'accurate', # }, 'confidence_level': '0.1', } language = 'en-US' # recognition = 'Accurate' recognition = 'accurate' confidence = '0.1' def setup_ocr(self): global APPLEVISIONFRAMEWORK from .macos_ocr import AppleOCR if APPLEVISIONFRAMEWORK is None: self.model = APPLEVISIONFRAMEWORK = AppleOCR() self.model = APPLEVISIONFRAMEWORK = AppleOCR(lang=[self.language]) else: self.model = APPLEVISIONFRAMEWORK Loading @@ -281,17 +284,29 @@ if sys.platform == 'darwin' and platform.mac_ver()[0] >= '10.15': return self.model(img) def ocr_blk_list(self, img: np.ndarray, blk_list: List[TextBlock]): pass im_h, im_w = img.shape[:2] for blk in blk_list: x1, y1, x2, y2 = blk.xyxy if y2 < im_h and x2 < im_w and \ x1 > 0 and y1 > 0 and x1 < x2 and y1 < y2: blk.text = self.model(img[y1:y2, x1:x2]) else: logging.warning('invalid textbbox to target img') blk.text = [''] def updateParam(self, param_key: str, param_content): super().updateParam(param_key, param_content) language = self.params['language']['select'] # recognition = self.params['recognition level']['select'] self.language = self.params['language']['select'] self.model.lang = [self.language] # self.recognition = self.params['recognition_level']['select'] # self.model.recog_level = self.recognition # self.params['language']['options'] = list(get_supported_languages(self.recognition)[0]) self.language = language # self.recognition = recognition self.confidence = self.params['confidence_level'] self.model.min_confidence = self.confidence if sys.platform == 'win32': if platform.system() == 'Windows': WINDOWSOCRENGINE = None @register_OCR('WindowsOCR') class OCRWindows(OCRBase): Loading
modules/ocr/macos_ocr.py +30 −24 Original line number Diff line number Diff line # https://github.com/straussmaximilian/ocrmac/blob/main/ocrmac/ocrmac.py # https://gist.github.com/RhetTbull/1c34fc07c95733642cffcd1ac587fc4c # https://github.com/RhetTbull/textinator/blob/main/src/macvision.py import Vision import objc import platform from typing import Tuple import numpy as np # Vision.VNRequestTextRecognitionLevelAccurate 0 # Vision.VNRequestTextRecognitionLevelFast 1 # Vision.VNRecognizeTextRequestRevision1 1 # Vision.VNRecognizeTextRequestRevision2 2 # Vision.VNRecognizeTextRequestRevision3 3 from PIL import Image from io import BytesIO def get_revision_level(): with objc.autorelease_pool(): Loading @@ -26,25 +18,32 @@ def get_revision_level(): revision = Vision.VNRecognizeTextRequestRevision1 return revision def get_supported_languages(recognition_level=0, revision=get_revision_level()) -> Tuple[Tuple[str], Tuple[str]]: def get_supported_languages(recognition_level='accurate', revision=get_revision_level()) -> Tuple[Tuple[str], Tuple[str]]: """Get supported languages for text detection from Vision framework. Returns: Tuple of ((language code), (error)) """ if recognition_level == 'fast': recognition_level = 1 else: recognition_level = 0 return Vision.VNRecognizeTextRequest.supportedRecognitionLanguagesForTextRecognitionLevel_revision_error_( recognition_level, revision, None ) def text_from_image(image: np.ndarray, recognition_level="accurate", language_preference=None): def text_from_image(image: np.ndarray, language_preference=None, recognition_level='accurate'): recognition_level = recognition_level.lower() if language_preference == 'Auto': language_preference = None image = image.tobytes() img_buf = BytesIO() Image.fromarray(image).save(img_buf, format='PNG') with objc.autorelease_pool(): req = Vision.VNRecognizeTextRequest.alloc().init() if recognition_level == "fast": if recognition_level == 'fast': req.setRecognitionLevel_(1) else: req.setRecognitionLevel_(0) Loading @@ -53,18 +52,18 @@ def text_from_image(image: np.ndarray, recognition_level="accurate", language_pr req.setRecognitionLanguages_(language_preference) handler = Vision.VNImageRequestHandler.alloc().initWithData_options_( image, None img_buf.getvalue(), None ) success = handler.performRequests_error_([req], None) res = [] if success: for result in req.results(): bbox = result.boundingBox() w, h = bbox.size.width, bbox.size.height x, y = bbox.origin.x, bbox.origin.y # bbox = result.boundingBox() # w, h = bbox.size.width, bbox.size.height # x, y = bbox.origin.x, bbox.origin.y res.append((result.text(), result.confidence(), [x, y, w, h])) res.append((result.text(), result.confidence())) #, [x, y, w, h])) req.dealloc() handler.dealloc() Loading @@ -73,8 +72,15 @@ def text_from_image(image: np.ndarray, recognition_level="accurate", language_pr class AppleOCR: def __init__(self): pass def __call__(self, img) -> str: pass No newline at end of file def __init__(self, lang=[], recog_level='accurate', min_confidence='0.1'): self.lang = lang self.recog_level = recog_level self.min_confidence = min_confidence def __call__(self, img: np.ndarray) -> str: result = [] results = text_from_image(img, self.lang, self.recog_level) for res in results: if res[1] >= float(self.min_confidence): result.append(res[0]) return '\n'.join(result) No newline at end of file