Files
AoEBot/aoe_recognition_ocr.py

28 lines
972 B
Python

# C:\Program Files\Tesseract-OCR\tesseract.exe
import os
os.environ['path'] += ";C:\\Program Files\\Tesseract-OCR"
import cv2
import pytesseract
import numpy as np
import re
# https://towardsdatascience.com/optical-character-recognition-ocr-with-less-than-12-lines-of-code-using-python-48404218cccb
def recognize_text(img_pil):
#img = cv2.imread('ocr_test.png')
img = np.asarray(img_pil)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray, img_bin = cv2.threshold(gray,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)
gray = cv2.bitwise_not(img_bin)
kernel = np.ones((2, 1), np.uint8)
img = cv2.erode(gray, kernel, iterations=1)
img = cv2.dilate(img, kernel, iterations=1)
out_below = pytesseract.image_to_string(img)
out_below = re.sub(r'[^a-zA-Z0-9_\s]', '', out_below)
out_below = re.sub(r'[\s]', ' ', out_below).strip()
print("OCR OUTPUT:", out_below)
return out_below
#recognize_text(cv2.imread('ocr_test.png'))