ocr_extractor / app.py
fredcaixeta
20b
7efb907
import gradio as gr
from ocr_script import ocr_tesseract_only
import uuid
import os
from dotenv import load_dotenv
from pydantic_ai import Agent, RunContext
from pydantic_ai.usage import UsageLimits
from pydantic_ai.models.groq import GroqModel
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")
# Modelo Groq via Pydantic AI
model = GroqModel(model_name="openai/gpt-oss-20b")
def respond(message, history, user_id, ocr_text):
# Garantir que o system prompt seja o texto OCR atual
system_prompt_text = ocr_text or "Nenhum texto OCR disponível."
search_agent = Agent(model, system_prompt=system_prompt_text)
result = search_agent.run_sync(str(message))
return result.output
with gr.Blocks() as demo:
with gr.Tabs():
with gr.Tab("Text OCR Tesseract only"):
ocr_state = gr.State("") # Armazena o texto OCR para uso no chat
with gr.Row():
img_in = gr.Image(label="Imagem (png, jpg, jpeg)", type="pil")
txt_out = gr.Textbox(label="Texto OCR", lines=12)
def run_ocr(img):
text = ocr_tesseract_only(img)
return text, text
img_in.change(fn=run_ocr, inputs=img_in, outputs=[txt_out, ocr_state])
with gr.Tab("Chat"):
user_id = gr.State(str(uuid.uuid4()))
gr.ChatInterface(
fn=respond,
additional_inputs=[user_id, ocr_state], # injeta o texto OCR no fn
type="messages",
title="Chat with AI Agent with Access to Extracted Data",
description="Envie perguntas sobre os dados extraídos.",
save_history=True,
examples=[
["What is the name of the invoice document available?"],
["Which document has the ID aZwfUT2Zs?"]
],
cache_examples=True,
)
demo.launch()