Spaces:
Runtime error
Runtime error
| # app.py | |
| # FINAL CPU VERSION using a quantized model for maximum reliability on free hardware. | |
| # 1. Import necessary libraries | |
| import gradio as gr | |
| # **FIXED:** Import AutoModelForCausalLM from the main transformers library | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import torch | |
| # 2. Load the Quantized Language Model | |
| # This model is optimized to use less memory, making it stable on free CPUs. | |
| try: | |
| model_name_or_path = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ" | |
| # Load the tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) | |
| # Load the quantized model using the standard transformers class. | |
| # The installed 'optimum' and 'auto-gptq' libraries will handle the GPTQ format automatically. | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name_or_path, | |
| use_safetensors=True, | |
| trust_remote_code=False, | |
| device_map="auto" # Will automatically use CPU | |
| ) | |
| # Create the text generation pipeline | |
| generator = pipeline( | |
| task="text-generation", | |
| model=model, | |
| tokenizer=tokenizer | |
| ) | |
| print("Quantized model loaded successfully on CPU.") | |
| MODEL_LOADED = True | |
| except Exception as e: | |
| print(f"Error loading quantized model: {e}") | |
| generator = None | |
| MODEL_LOADED = False | |
| # 3. Define the core analysis function | |
| def analyze_document(document_text, query_text): | |
| """ | |
| Analyzes the document based on the query using the loaded LLM. | |
| """ | |
| if not MODEL_LOADED or generator is None: | |
| return {"error": "Model is not available. Please check the Space logs for errors."} | |
| # The chat-based prompt format for TinyLlama | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": """You are an expert AI assistant for a claims processing department. Your task is to analyze an insurance policy document and a user's query to make a decision. Based ONLY on the information in the Policy Document, determine if the request should be approved or rejected. Provide your final answer in a strict JSON format. The JSON object must contain three keys: "decision" (string, "Approved" or "Rejected"), "amount" (number, 0 if not applicable), and "justification" (string, explaining your reasoning and citing the policy). Do not use any information outside of the provided Policy Document.""" | |
| }, | |
| { | |
| "role": "user", | |
| "content": f""" | |
| **Policy Document (Source of Truth):** | |
| --- | |
| {document_text} | |
| --- | |
| **User Query:** | |
| --- | |
| {query_text} | |
| --- | |
| **JSON Response:** | |
| """ | |
| } | |
| ] | |
| prompt = generator.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| try: | |
| # Generate the response from the LLM | |
| outputs = generator( | |
| prompt, | |
| max_new_tokens=256, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_k=50, | |
| top_p=0.95 | |
| ) | |
| generated_text = outputs[0]["generated_text"] | |
| # Extract the JSON part from the model's full output | |
| json_start = generated_text.find('{') | |
| json_end = generated_text.rfind('}') + 1 | |
| if json_start != -1 and json_end > json_start: | |
| cleaned_json_str = generated_text[json_start:json_end] | |
| import json | |
| return json.loads(cleaned_json_str) | |
| else: | |
| return {"error": "Failed to generate valid JSON.", "raw_output": generated_text} | |
| except Exception as e: | |
| print(f"Error during analysis: {e}") | |
| return {"error": f"An error occurred during analysis: {str(e)}"} | |
| # 4. Create and launch the Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Policy Analysis API (CPU Version)") | |
| gr.Markdown("This Gradio app serves the backend for the RAG policy analysis system, optimized for CPU.") | |
| with gr.Row(): | |
| doc_input = gr.Textbox(lines=5, label="Document Text", placeholder="Paste the document text here...") | |
| query_input = gr.Textbox(label="Query Text", placeholder="Enter your query here...") | |
| output_json = gr.JSON(label="Analysis Result") | |
| analyze_btn = gr.Button("Analyze") | |
| analyze_btn.click( | |
| fn=analyze_document, | |
| inputs=[doc_input, query_input], | |
| outputs=output_json, | |
| api_name="analyze" | |
| ) | |
| demo.launch() | |