Gemini
feat: add detailed logging
01d5a5d
from pathlib import Path
import fitz # PyMuPDF
# from ...core.processor import BaseFileProcessor
from ...core.file_type import FileType
from ...core.decorators import processor_register
from ...core.exceptions import FileProcessingError
from ...document import Document, ProcessStatus
from lpm_kernel.file_data.processors.processor import BaseFileProcessor
@processor_register
class PDFProcessor(BaseFileProcessor):
SUPPORTED_TYPES = {FileType.PDF}
@classmethod
def _process_file(cls, file_path: Path, doc: Document) -> Document:
try:
with fitz.open(file_path) as pdf:
text = ""
for page in pdf:
text += page.get_text()
doc.raw_content = text
doc.extract_status = ProcessStatus.SUCCESS
except Exception as e:
doc.extract_status = ProcessStatus.FAILED
raise FileProcessingError(f"Failed to process PDF: {str(e)}")
return doc