Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from backend import process_request, get_pdf_files, save_result_to_file, extract_text_with_fitz, extract_text_with_docling, preview_image_processing, load_system_prompt, load_user_prompt, load_postprocess_prompt, process_request_preprocessing_only, process_request_postprocessing_only | |
| def create_ui(): | |
| """Create and configure the Gradio UI interface.""" | |
| with gr.Blocks( | |
| title="์ด๋ ฅ์ ๋ถ์ ์์คํ ", | |
| css=""" | |
| .main-container { max-width: 1400px; margin: 0 auto; } | |
| .section-header { margin-bottom: 15px; } | |
| .input-group { margin-bottom: 20px; } | |
| #batch_result_area, #result_area { | |
| min-height: 200px !important; | |
| } | |
| #batch_info { | |
| font-size: 0.85em; | |
| color: #666; | |
| margin-bottom: 10px; | |
| } | |
| /* ํธ์ง ์์ญ ์คํ์ผ */ | |
| .edit-area { | |
| border: 2px dashed #ccc; | |
| border-radius: 5px; | |
| background-color: #f9f9f9; | |
| } | |
| """, | |
| theme=gr.themes.Soft() | |
| ) as app: | |
| with gr.Column(elem_classes="main-container"): | |
| gr.Markdown("# ๐ ์ด๋ ฅ์ ๋ถ์ ์์คํ ", elem_classes="section-header") | |
| # ์๋จ ์์ญ: ํ์ผ ์ ํ + ๋ก๊ทธ ์ ๋ณด | |
| with gr.Row(equal_height=True): | |
| # ํ์ผ ์ ํ ์์ญ (์ผ์ชฝ, ์ปดํฉํธ) | |
| with gr.Column(scale=2): | |
| with gr.Group(): | |
| gr.Markdown("### ๐ ํ์ผ ์ ํ") | |
| pdf_files = get_pdf_files() | |
| default_pdf = "./resume_samples/pdf/text/๋ฆฌ๋ฉค๋ฒ-S3.pdf" if "./resume_samples/pdf/text/๋ฆฌ๋ฉค๋ฒ-S3.pdf" in pdf_files else (pdf_files[0] if pdf_files else None) | |
| pdf_dropdown = gr.Dropdown( | |
| label="PDF ํ์ผ ์ ํ", | |
| choices=pdf_files, | |
| value=default_pdf, | |
| interactive=True | |
| ) | |
| file_upload = gr.File( | |
| label="๋๋ ์ PDF ํ์ผ ์ ๋ก๋", | |
| file_types=[".pdf"], | |
| type="filepath" | |
| ) | |
| # ์ค์๊ฐ ์ํ ์ ๋ณด (์ค๋ฅธ์ชฝ) | |
| with gr.Column(scale=3): | |
| with gr.Group(): | |
| gr.Markdown("### ๐ ์ค์๊ฐ ์ํ ์ ๋ณด") | |
| status_log_output = gr.Textbox( | |
| label="์ฒ๋ฆฌ ์ํ", | |
| lines=6, | |
| max_lines=10, | |
| value="์์คํ ์ค๋น ์๋ฃ - ํ์ผ์ ์ ํํ๊ณ ๋ถ์์ ์์ํ์ธ์...", | |
| interactive=False, | |
| show_label=False | |
| ) | |
| gr.Markdown("---") | |
| with gr.Tabs(): | |
| # ๋ถ์ ํญ | |
| with gr.TabItem("๐ค AI ๋ถ์"): | |
| # ์ค์ ์์ญ | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=2): | |
| with gr.Group(): | |
| gr.Markdown("### 1๏ธโฃ ํ๋กฌํํธ ์ค์ ") | |
| system_prompt_input = gr.TextArea( | |
| label="์์คํ ํ๋กฌํํธ", | |
| value=load_system_prompt(), | |
| lines=5, | |
| placeholder="์์คํ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์..." | |
| ) | |
| prompt_input = gr.TextArea( | |
| label="์ฌ์ฉ์ ํ๋กฌํํธ (์ ์ฒ๋ฆฌ)", | |
| value=load_user_prompt(), | |
| lines=3, | |
| placeholder="์ ์ฒ๋ฆฌ์ฉ ์ฌ์ฉ์ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์..." | |
| ) | |
| postprocess_prompt_input = gr.TextArea( | |
| label="ํ์ฒ๋ฆฌ ํ๋กฌํํธ", | |
| value=load_postprocess_prompt(), | |
| lines=3, | |
| placeholder="ํ์ฒ๋ฆฌ์ฉ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์..." | |
| ) | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### 2๏ธโฃ ์ฒ๋ฆฌ ์ค์ ") | |
| use_images = gr.Checkbox( | |
| label="์ด๋ฏธ์ง๋ก ๋ณํํ์ฌ ์ฒ๋ฆฌ", | |
| value=True, | |
| info="PDF๋ฅผ ์ด๋ฏธ์ง๋ก ๋ณํํ์ฌ ๋น์ ๋ชจ๋ธ๋ก ๋ถ์" | |
| ) | |
| image_processing_mode = gr.Radio( | |
| choices=["๊ฐ๋ก ๋ณํฉ (2ํ์ด์ง์ฉ)", "์ธ๋ก ๋ณํฉ (2ํ์ด์ง์ฉ)", "๋ฑ๊ฐ ํ์ด์ง"], | |
| value="๊ฐ๋ก ๋ณํฉ (2ํ์ด์ง์ฉ)", | |
| label="์ด๋ฏธ์ง ์ฒ๋ฆฌ ๋ฐฉ์", | |
| info="ํ์ด์ง ๋ณํฉ ๋ฐฉ์ ์ ํ" | |
| ) | |
| overlap_merge_option = gr.Radio( | |
| choices=["์ผ๋ฐ ๋ณํฉ", "์ค๋ณต ๋ณํฉ (์ฌ๋ผ์ด๋ฉ ์๋์ฐ)"], | |
| value="์ผ๋ฐ ๋ณํฉ", | |
| label="๋ณํฉ ๋ฐฉ์", | |
| info="์ผ๋ฐ: (1,2), (3,4)... | ์ค๋ณต: (1,2), (2,3)...", | |
| visible=True | |
| ) | |
| batch_size_slider = gr.Slider( | |
| minimum=1, | |
| maximum=3, | |
| value=3, | |
| step=1, | |
| label="์ด๋ฏธ์ง ๋ฐฐ์น ํฌ๊ธฐ", | |
| info="ํ ๋ฒ์ ์ฒ๋ฆฌํ ์ด๋ฏธ์ง ์ฅ์ (1-3์ฅ)" | |
| ) | |
| use_docling = gr.Checkbox( | |
| label="ํ ์คํธ ํ์ฑ ํจ๊ป ์ํ", | |
| value=True, | |
| info="Docling์ผ๋ก PDF ํ ์คํธ ์ถ์ถ" | |
| ) | |
| # use_postprocess ์ฒดํฌ๋ฐ์ค ์ ๊ฑฐ - ์ด์ ๋ฒํผ์ผ๋ก ๋ถ๋ฆฌ | |
| # ์คํ ๋ฒํผ ์์ญ | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| output_filename = gr.Textbox( | |
| label="๊ฒฐ๊ณผ ํ์ผ ์ด๋ฆ (ํ์ฅ์ ์์ด)", | |
| value="result", | |
| placeholder="์ ์ฅํ ํ์ผ ์ด๋ฆ์ ์ ๋ ฅํ์ธ์" | |
| ) | |
| with gr.Column(scale=1): | |
| preprocessing_button = gr.Button( | |
| "๐ ์ ์ฒ๋ฆฌ ๋ถ์ ์์", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| postprocessing_button = gr.Button( | |
| "๐ฏ ํ์ฒ๋ฆฌ ๋ถ์ ์์", | |
| variant="secondary", | |
| size="lg" | |
| ) | |
| # ๊ฒฐ๊ณผ ์์ญ | |
| gr.Markdown("---") | |
| gr.Markdown("## ๐ ๋ถ์ ๊ฒฐ๊ณผ") | |
| with gr.Row(): | |
| # ๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ - ํธ์ง ๊ฐ๋ฅ | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### ๐ ๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ") | |
| batch_result_output = gr.Markdown( | |
| value="*๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
| elem_id="batch_result_area", | |
| show_label=False, | |
| ) | |
| # ๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์์ญ | |
| with gr.Row(): | |
| batch_edit_button = gr.Button( | |
| "โ๏ธ ํธ์ง", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| batch_save_button = gr.Button( | |
| "๐พ ์ ์ฅ", | |
| variant="primary", | |
| size="sm", | |
| visible=False | |
| ) | |
| batch_cancel_button = gr.Button( | |
| "โ ์ทจ์", | |
| variant="secondary", | |
| size="sm", | |
| visible=False | |
| ) | |
| batch_edit_area = gr.TextArea( | |
| value="", | |
| lines=15, | |
| max_lines=50, | |
| interactive=True, | |
| show_label=False, | |
| visible=False, | |
| placeholder="๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๋ฅผ ํธ์งํ์ธ์..." | |
| ) | |
| # ์ต์ข ๋ถ์ ๊ฒฐ๊ณผ - ์๋ ํฌ๊ธฐ ์กฐ์ | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### ๐ฏ ์ต์ข ๋ถ์ ๊ฒฐ๊ณผ") | |
| result_output = gr.Markdown( | |
| value="*์ต์ข ๋ถ์ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
| elem_id="result_area", | |
| show_label=False, | |
| ) | |
| # ์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์์ญ | |
| with gr.Row(): | |
| result_edit_button = gr.Button( | |
| "โ๏ธ ํธ์ง", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| result_save_button = gr.Button( | |
| "๐พ ์ ์ฅ", | |
| variant="primary", | |
| size="sm", | |
| visible=False | |
| ) | |
| result_cancel_button = gr.Button( | |
| "โ ์ทจ์", | |
| variant="secondary", | |
| size="sm", | |
| visible=False | |
| ) | |
| result_edit_area = gr.TextArea( | |
| value="", | |
| lines=15, | |
| max_lines=50, | |
| interactive=True, | |
| show_label=False, | |
| visible=False, | |
| placeholder="์ต์ข ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํธ์งํ์ธ์..." | |
| ) | |
| # ํ์ผ ์ ์ฅ ์์ญ | |
| with gr.Row(): | |
| save_button = gr.Button( | |
| "๐พ ๊ฒฐ๊ณผ ์ ์ฅ", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| save_message = gr.Markdown( | |
| value="", | |
| visible=False | |
| ) | |
| # ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํญ | |
| with gr.TabItem("๐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ"): | |
| # ์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์์ญ (์๋จ) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### ๐ผ๏ธ ์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ") | |
| preview_button_tab = gr.Button( | |
| "์ด๋ฏธ์ง ์ฒ๋ฆฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| image_preview_gallery_tab = gr.Gallery( | |
| label="์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง", | |
| show_label=False, | |
| columns=2, | |
| rows=2, | |
| height=350, | |
| value=[] | |
| ) | |
| gr.Markdown("---") | |
| # ํ ์คํธ ์ถ์ถ ๋น๊ต ์์ญ (ํ๋จ) | |
| gr.Markdown("### ๐ PDF ํ ์คํธ ์ถ์ถ ๋น๊ต", elem_classes="section-header") | |
| with gr.Row(equal_height=True): | |
| # ํ ์คํธ ๊ธฐ๋ฐ ์ถ์ถ | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("#### ๐ ํ ์คํธ ๊ธฐ๋ฐ ์ถ์ถ") | |
| gr.Markdown("*PDF์ ํ ์คํธ ๋ ์ด์ด์์ ์ง์ ์ถ์ถ*", elem_id="extract_info") | |
| text_extract_method = gr.Radio( | |
| choices=["Fitz (PyMuPDF)"], | |
| value="Fitz (PyMuPDF)", | |
| label="์ถ์ถ ๋ฐฉ์", | |
| info="๋น ๋ฅด๊ณ ๊ฐ๋ฒผ์ด ํ ์คํธ ์ถ์ถ" | |
| ) | |
| text_extract_btn = gr.Button( | |
| "๐ ํ ์คํธ ์ถ์ถ", | |
| variant="primary", | |
| size="sm" | |
| ) | |
| text_extract_result = gr.Markdown( | |
| value="*ํ ์คํธ ์ถ์ถ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
| elem_id="text_result_area" | |
| ) | |
| # OCR ๊ธฐ๋ฐ ์ถ์ถ | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("#### ๐ค OCR ๊ธฐ๋ฐ ์ถ์ถ") | |
| gr.Markdown("*์ด๋ฏธ์ง์์ ๊ดํ ๋ฌธ์ ์ธ์์ผ๋ก ์ถ์ถ*", elem_id="ocr_info") | |
| ocr_extract_btn = gr.Button( | |
| "๐ OCR ์ถ์ถ", | |
| variant="primary", | |
| size="sm" | |
| ) | |
| ocr_extract_result = gr.Markdown( | |
| value="*OCR ์ถ์ถ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
| elem_id="ocr_result_area" | |
| ) | |
| # ํตํฉ ๋น๊ต ๋ฒํผ | |
| with gr.Row(): | |
| compare_both_btn = gr.Button( | |
| "๐ ์์ชฝ ๋ชจ๋ ์ถ์ถํ์ฌ ๋น๊ต", | |
| variant="secondary", | |
| size="lg" | |
| ) | |
| # API ์์ฒญ ๋ฐ ๋ก๊ทธ ํญ | |
| with gr.TabItem("๐ API ์์ฒญ & ๋ก๊ทธ"): | |
| with gr.Row(equal_height=True): | |
| # API ์์ฒญ RAW ์ ๋ณด | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("#### ๐ค API ์์ฒญ (Raw)") | |
| api_request_output = gr.Code( | |
| value="๋ถ์ ์์ ์ ์ค์ API ์์ฒญ ๋ด์ฉ์ด ํ์๋ฉ๋๋ค", | |
| language="json", | |
| label=None, | |
| interactive=False | |
| ) | |
| # ํ ์คํธ ํ์ฑ ๊ฒฐ๊ณผ (์๋ ์ ๋ฐ์ดํธ) | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("#### ๐ ํ ์คํธ ํ์ฑ ๊ฒฐ๊ณผ (์ค์๊ฐ)") | |
| docling_output = gr.Code( | |
| value="PDF ํ ์คํธ ํ์ฑ ๊ฒฐ๊ณผ๊ฐ ์๋์ผ๋ก ํ์๋ฉ๋๋ค", | |
| language="markdown", | |
| label=None, | |
| interactive=False, | |
| lines=20 | |
| ) | |
| # === ์ด๋ฒคํธ ํธ๋ค๋ฌ ํจ์๋ค === | |
| def update_status_only(status_text): | |
| """์ํ ๋ก๊ทธ๋ง ์ ๋ฐ์ดํธํ๋ ํจ์ (๋ก๋ฉ ํจ๊ณผ ์์)""" | |
| return status_text | |
| # === ํธ์ง ๊ด๋ จ ํจ์๋ค === | |
| def start_batch_edit(batch_content): | |
| """๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์์""" | |
| return ( | |
| gr.update(visible=False), # edit button | |
| gr.update(visible=True), # save button | |
| gr.update(visible=True), # cancel button | |
| gr.update(visible=True, value=batch_content), # edit area | |
| gr.update(visible=False) # markdown display | |
| ) | |
| def save_batch_edit(edited_content): | |
| """๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์ ์ฅ""" | |
| return ( | |
| gr.update(visible=True), # edit button | |
| gr.update(visible=False), # save button | |
| gr.update(visible=False), # cancel button | |
| gr.update(visible=False), # edit area | |
| gr.update(visible=True, value=edited_content) # markdown display | |
| ) | |
| def cancel_batch_edit(): | |
| """๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์ทจ์""" | |
| return ( | |
| gr.update(visible=True), # edit button | |
| gr.update(visible=False), # save button | |
| gr.update(visible=False), # cancel button | |
| gr.update(visible=False), # edit area | |
| gr.update(visible=True) # markdown display | |
| ) | |
| def start_result_edit(result_content): | |
| """์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์์""" | |
| return ( | |
| gr.update(visible=False), # edit button | |
| gr.update(visible=True), # save button | |
| gr.update(visible=True), # cancel button | |
| gr.update(visible=True, value=result_content), # edit area | |
| gr.update(visible=False) # markdown display | |
| ) | |
| def save_result_edit(edited_content): | |
| """์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์ ์ฅ""" | |
| return ( | |
| gr.update(visible=True), # edit button | |
| gr.update(visible=False), # save button | |
| gr.update(visible=False), # cancel button | |
| gr.update(visible=False), # edit area | |
| gr.update(visible=True, value=edited_content) # markdown display | |
| ) | |
| def cancel_result_edit(): | |
| """์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์ทจ์""" | |
| return ( | |
| gr.update(visible=True), # edit button | |
| gr.update(visible=False), # save button | |
| gr.update(visible=False), # cancel button | |
| gr.update(visible=False), # edit area | |
| gr.update(visible=True) # markdown display | |
| ) | |
| def process_preprocessing_wrapper(*args): | |
| """์ ์ฒ๋ฆฌ๋ง ์ํํ๋ ๋ํผ ํจ์""" | |
| try: | |
| # ์ ์ฒ๋ฆฌ ํจ์์์ Generator ๊ฒฐ๊ณผ ์ถ์ถ | |
| generator = process_request_preprocessing_only(*args) | |
| final_result = None | |
| # Generator์ ๋ชจ๋ ์ค๊ฐ ๊ฒฐ๊ณผ๋ฅผ ์ฒ๋ฆฌํ๋ฉฐ ๋ง์ง๋ง ๊ฒฐ๊ณผ๋ฅผ ์ป์ | |
| for result in generator: | |
| if result and len(result) >= 5: | |
| batch_content, result_content, docling_output, status_log, api_request = result | |
| # ์ ์ฒ๋ฆฌ์์๋ ๋ฐฐ์น ๊ฒฐ๊ณผ๋ง ํ์, ์ต์ข ๊ฒฐ๊ณผ๋ ์๋ด ๋ฉ์์ง ์ ์ง | |
| yield batch_content, "*์ ์ฒ๋ฆฌ ์๋ฃ ํ ํ์ฒ๋ฆฌ ๋ฒํผ์ ๋๋ฌ์ฃผ์ธ์...*", docling_output, status_log, api_request | |
| final_result = result | |
| if final_result and len(final_result) >= 5: | |
| batch_content, result_content, docling_output, status_log, api_request = final_result | |
| # ์ ์ฒ๋ฆฌ ์๋ฃ ์ ์ต์ข ๊ฒฐ๊ณผ๋ ์๋ด ๋ฉ์์ง๋ก ์ ์ง | |
| yield batch_content, "*โ ์ ์ฒ๋ฆฌ ์๋ฃ! ํ์ฒ๋ฆฌ ๋ถ์ ๋ฒํผ์ ๋๋ฌ ์ต์ข ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ์ธ์.*", docling_output, status_log, api_request | |
| else: | |
| yield "๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", "์ ์ฒ๋ฆฌ๋ฅผ ๋จผ์ ์ํํด์ฃผ์ธ์.", "", "์ ์ฒ๋ฆฌ ์๋ฃ", "" | |
| except Exception as e: | |
| error_msg = f"์ ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}" | |
| print(f"์ ์ฒ๋ฆฌ ๋ํผ ํจ์ ์ค๋ฅ: {e}") | |
| yield "โ **์ ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์**", "์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", "", f"์ ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์: {str(e)}", "" | |
| def process_postprocessing_wrapper(batch_result, system_prompt, postprocess_prompt): | |
| """ํ์ฒ๋ฆฌ๋ง ์ํํ๋ ๋ํผ ํจ์""" | |
| try: | |
| # ๋ฐฐ์น ๊ฒฐ๊ณผ๊ฐ ๋น์ด์๊ฑฐ๋ ์ด๊ธฐ ๋ฉ์์ง์ธ ๊ฒฝ์ฐ ํ์ธ | |
| if not batch_result or batch_result.strip() == "*๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*": | |
| yield batch_result, "โ **๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค**\n\n์ ์ฒ๋ฆฌ๋ฅผ ๋จผ์ ์ํํด์ฃผ์ธ์.", "", "ํ์ฒ๋ฆฌ ์คํ ๋ถ๊ฐ: ๋ฐฐ์น ๊ฒฐ๊ณผ ์์", "" | |
| return | |
| # ํ์ฒ๋ฆฌ ํจ์์์ Generator ๊ฒฐ๊ณผ ์ถ์ถ | |
| generator = process_request_postprocessing_only(batch_result, system_prompt, postprocess_prompt) | |
| final_result = None | |
| # Generator์ ๋ชจ๋ ์ค๊ฐ ๊ฒฐ๊ณผ๋ฅผ ์ฒ๋ฆฌํ๋ฉฐ ๋ง์ง๋ง ๊ฒฐ๊ณผ๋ฅผ ์ป์ | |
| for result in generator: | |
| if result and len(result) >= 5: | |
| batch_content, result_content, docling_output, status_log, api_request = result | |
| # ์ค์๊ฐ์ผ๋ก ๊ฒฐ๊ณผ ์ ๋ฐ์ดํธ (๋ฐฐ์น ๊ฒฐ๊ณผ๋ ์ ์ง) | |
| yield batch_result, result_content, docling_output, status_log, api_request | |
| final_result = result | |
| if final_result and len(final_result) >= 5: | |
| batch_content, result_content, docling_output, status_log, api_request = final_result | |
| # ํ์ฒ๋ฆฌ ์๋ฃ ์ ๋ฐฐ์น ๊ฒฐ๊ณผ๋ ์ ์งํ๊ณ ์ต์ข ๊ฒฐ๊ณผ๋ง ์ ๋ฐ์ดํธ | |
| yield batch_result, result_content, docling_output, status_log, api_request | |
| else: | |
| yield batch_result, "ํ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", "", "ํ์ฒ๋ฆฌ ์๋ฃ", "" | |
| except Exception as e: | |
| error_msg = f"ํ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}" | |
| print(f"ํ์ฒ๋ฆฌ ๋ํผ ํจ์ ์ค๋ฅ: {e}") | |
| yield batch_result, f"โ **ํ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์**\n\n{error_msg}", "", f"ํ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์: {str(e)}", "" | |
| def on_save_button_click(result_content, filename): | |
| """๊ฒฐ๊ณผ ์ ์ฅ ์ฒ๋ฆฌ""" | |
| result = save_result_to_file(result_content, filename) | |
| return gr.update(value=result, visible=True) | |
| def extract_with_text_method(pdf_path, uploaded_file, method): | |
| """ํ ์คํธ ๊ธฐ๋ฐ ์ถ์ถ""" | |
| final_pdf_path = uploaded_file or pdf_path | |
| if not final_pdf_path: | |
| return "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์." | |
| return extract_text_with_fitz(final_pdf_path) | |
| def extract_with_ocr(pdf_path, uploaded_file): | |
| """OCR ๊ธฐ๋ฐ ์ถ์ถ""" | |
| final_pdf_path = uploaded_file or pdf_path | |
| if not final_pdf_path: | |
| return "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์." | |
| return extract_text_with_docling(final_pdf_path) | |
| def extract_both_methods(pdf_path, uploaded_file, text_method): | |
| """์์ชฝ ๋ชจ๋ ์ถ์ถ""" | |
| final_pdf_path = uploaded_file or pdf_path | |
| if not final_pdf_path: | |
| return "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์.", "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์." | |
| text_result = extract_with_text_method(pdf_path, uploaded_file, text_method) | |
| ocr_result = extract_with_ocr(pdf_path, uploaded_file) | |
| return text_result, ocr_result | |
| def preview_images(pdf_path, uploaded_file, processing_mode, use_images, overlap_option): | |
| """์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ""" | |
| if not use_images: | |
| return [] | |
| final_pdf_path = uploaded_file or pdf_path | |
| if not final_pdf_path: | |
| return [] | |
| try: | |
| return preview_image_processing(final_pdf_path, processing_mode, overlap_option) | |
| except Exception as e: | |
| print(f"๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ค๋ฅ: {e}") | |
| return [] | |
| def update_overlap_visibility(processing_mode): | |
| """๋ณํฉ ์ต์ ํ์/์จ๊น ์ ์ด""" | |
| return gr.update(visible="๋ณํฉ" in processing_mode) | |
| # === ์ด๋ฒคํธ ์ฐ๊ฒฐ === | |
| # ์ ์ฒ๋ฆฌ ๋ถ์ ๋ฒํผ | |
| preprocessing_button.click( | |
| fn=process_preprocessing_wrapper, | |
| inputs=[ | |
| prompt_input, system_prompt_input, use_images, use_docling, | |
| pdf_dropdown, file_upload, output_filename, | |
| image_processing_mode, overlap_merge_option, batch_size_slider | |
| ], | |
| outputs=[batch_result_output, result_output, docling_output, status_log_output, api_request_output], | |
| show_progress=True | |
| ) | |
| # ํ์ฒ๋ฆฌ ๋ถ์ ๋ฒํผ | |
| postprocessing_button.click( | |
| fn=process_postprocessing_wrapper, | |
| inputs=[batch_result_output, system_prompt_input, postprocess_prompt_input], | |
| outputs=[batch_result_output, result_output, docling_output, status_log_output, api_request_output], | |
| show_progress=True | |
| ) | |
| # ์ ์ฅ ๋ฒํผ | |
| save_button.click( | |
| fn=on_save_button_click, | |
| inputs=[result_output, output_filename], | |
| outputs=[save_message] | |
| ) | |
| # ์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ (๋ฏธ๋ฆฌ๋ณด๊ธฐ ํญ) | |
| preview_button_tab.click( | |
| fn=preview_images, | |
| inputs=[pdf_dropdown, file_upload, image_processing_mode, use_images, overlap_merge_option], | |
| outputs=[image_preview_gallery_tab] | |
| ) | |
| # ์ด๋ฏธ์ง ์ฒ๋ฆฌ ๋ชจ๋ ๋ณ๊ฒฝ ์ ์ค๋ณต ์ต์ ํ์/์จ๊น | |
| image_processing_mode.change( | |
| fn=update_overlap_visibility, | |
| inputs=[image_processing_mode], | |
| outputs=[overlap_merge_option] | |
| ) | |
| # ํ ์คํธ ์ถ์ถ ์ด๋ฒคํธ๋ค | |
| text_extract_btn.click( | |
| fn=extract_with_text_method, | |
| inputs=[pdf_dropdown, file_upload, text_extract_method], | |
| outputs=[text_extract_result] | |
| ) | |
| ocr_extract_btn.click( | |
| fn=extract_with_ocr, | |
| inputs=[pdf_dropdown, file_upload], | |
| outputs=[ocr_extract_result] | |
| ) | |
| compare_both_btn.click( | |
| fn=extract_both_methods, | |
| inputs=[pdf_dropdown, file_upload, text_extract_method], | |
| outputs=[text_extract_result, ocr_extract_result] | |
| ) | |
| # === ํธ์ง ๊ด๋ จ ์ด๋ฒคํธ ์ฐ๊ฒฐ === | |
| # ๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์ด๋ฒคํธ | |
| batch_edit_button.click( | |
| fn=start_batch_edit, | |
| inputs=[batch_result_output], | |
| outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output] | |
| ) | |
| batch_save_button.click( | |
| fn=save_batch_edit, | |
| inputs=[batch_edit_area], | |
| outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output] | |
| ) | |
| batch_cancel_button.click( | |
| fn=cancel_batch_edit, | |
| outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output] | |
| ) | |
| # ์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์ด๋ฒคํธ | |
| result_edit_button.click( | |
| fn=start_result_edit, | |
| inputs=[result_output], | |
| outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output] | |
| ) | |
| result_save_button.click( | |
| fn=save_result_edit, | |
| inputs=[result_edit_area], | |
| outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output] | |
| ) | |
| result_cancel_button.click( | |
| fn=cancel_result_edit, | |
| outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output] | |
| ) | |
| return app | |