ocr_extractor / ocr_script.py
fredcaixeta
comma
c83dfac
import numpy as np
import cv2
import pytesseract
from PIL import Image
def pil_to_cv_bgr(pil_img: Image.Image) -> np.ndarray:
arr = np.array(pil_img.convert("RGB")) # RGB
cv_bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR) # BGR p/ OpenCV
return cv_bgr
def getSkewAngle(cvImage: np.ndarray) -> float:
newImage = cvImage.copy()
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9, 9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
dilate = cv2.dilate(thresh, kernel, iterations=2)
contours, _ = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
if not contours:
return 0.0
largestContour = contours[0]
minAreaRect = cv2.minAreaRect(largestContour)
angle = minAreaRect[-1]
if angle < -45:
angle = 90 + angle
if angle == 90:
return 0.0
return -1.0 * angle
def rotateImage(cvImage: np.ndarray, angle: float) -> np.ndarray:
newImage = cvImage.copy()
(h, w) = newImage.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return newImage
def deskew_from_pil(pil_img: Image.Image) -> np.ndarray:
cv_img = pil_to_cv_bgr(pil_img)
angle = getSkewAngle(cv_img)
return rotateImage(cv_img, angle)
def ocr_tesseract_only(pil_img: Image.Image, lang="por", config="--psm 6 --oem 3") -> str:
cv_img = deskew_from_pil(pil_img)
# Converter BGR -> RGB para PIL antes do Tesseract (pytesseract aceita PIL/numpy RGB)
cv_rgb = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
pil_for_ocr = Image.fromarray(cv_rgb)
text = pytesseract.image_to_string(pil_for_ocr, lang=lang, config=config)
return text