Spaces:

policy123
/

my-policy-analyser

Runtime error

App Files Files Community

policy123 commited on Jul 22, 2025

Commit

e8f876f

verified ·

1 Parent(s): 96970be

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -46

app.py CHANGED Viewed

@@ -7,39 +7,35 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from transformers import pipeline
 import torch
-import uvicorn # Add this import
 # 2. Initialize the FastAPI application
 app = FastAPI()
 # 3. Add CORS middleware
-# This allows our frontend (running on a different domain) to communicate with this backend.
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
     allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods
-    allow_headers=["*"],  # Allows all headers
 )
 # 4. Load the Language Model
-# We use a small, efficient model that runs well on free CPU hardware.
-# The pipeline will be created only once when the application starts.
 try:
-    # Using a distilled, smaller, but capable model from the community.
-    # It's specifically designed for summarization and Q&A tasks.
-    summarizer = pipeline(
-        "summarization",
-        model="Xenova/LaMini-Flan-T5-783M",
-        torch_dtype=torch.bfloat16 # Use a memory-efficient data type
     )
     print("Model loaded successfully.")
 except Exception as e:
     print(f"Error loading model: {e}")
-    summarizer = None
 # 5. Define the data model for the incoming request
-# This ensures the data we receive is in the correct format.
 class QueryRequest(BaseModel):
     document_text: str
     query_text: str
@@ -47,55 +43,66 @@ class QueryRequest(BaseModel):
 # 6. Define the API endpoint
 @app.post("/analyze")
 async def analyze_document(request: QueryRequest):
-    """
-    This endpoint receives a document and a query, constructs a prompt,
-    and uses the LLM to generate a structured JSON response.
-    """
-    if summarizer is None:
-        return {"error": "Model is not available."}
-    # This is the same prompt engineering we did before.
-    # We are asking the model to perform a specific task and return a JSON.
-    prompt = f"""
-        **CONTEXT:**
-        You are an expert AI assistant for a claims processing department. Your task is to analyze an insurance policy document and a user's query to make a decision. Based ONLY on the information in the Policy Document, determine if the request should be approved or rejected. Provide your final answer in a strict JSON format. The JSON object must contain three keys: "decision" (string, "Approved" or "Rejected"), "amount" (number, 0 if not applicable), and "justification" (string, explaining your reasoning and citing the policy). Do not use any information outside of the provided Policy Document.
-        **Policy Document (Source of Truth):**
-        ---
-        {request.document_text}
-        ---
-        **User Query:**
-        ---
-        {request.query_text}
-        ---
-        **JSON Response:**
-    """
     try:
         # Generate the response from the LLM
-        # We set max_length to get a reasonably sized response.
-        result = summarizer(prompt, max_length=512, clean_up_tokenization_spaces=True)
-        generated_text = result[0]['summary_text']
-        # The model might not return perfect JSON, so we clean it up.
-        # Find the start and end of the JSON object.
         json_start = generated_text.find('{')
         json_end = generated_text.rfind('}') + 1
-        if json_start != -1 and json_end != 0:
             cleaned_json = generated_text[json_start:json_end]
-            # The backend should return the JSON string directly, not a Python dict
-            # The frontend will parse it.
             return cleaned_json
         else:
-            # If no JSON is found, return the raw text with an error flag.
             return {"error": "Failed to generate valid JSON.", "raw_output": generated_text}
     except Exception as e:
         print(f"Error during analysis: {e}")
-        return {"error": f"An error occurred: {str(e)}"}
 # A simple root endpoint to confirm the server is running.
 @app.get("/")

 from pydantic import BaseModel
 from transformers import pipeline
 import torch
+import uvicorn
 # 2. Initialize the FastAPI application
 app = FastAPI()
 # 3. Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 # 4. Load the Language Model
+# **UPDATED:** Using a smaller, more efficient model to ensure it loads on free hardware.
 try:
+    generator = pipeline(
+        "text-generation",
+        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Switched to TinyLlama
+        torch_dtype=torch.bfloat16,
+        device_map="auto" # Automatically select device (CPU in this case)
     )
     print("Model loaded successfully.")
 except Exception as e:
     print(f"Error loading model: {e}")
+    generator = None
 # 5. Define the data model for the incoming request
 class QueryRequest(BaseModel):
     document_text: str
     query_text: str
 # 6. Define the API endpoint
 @app.post("/analyze")
 async def analyze_document(request: QueryRequest):
+    if generator is None:
+        return {"error": "Model is not available. It may have failed to load due to resource constraints."}
+    # **UPDATED:** Using a chat-based prompt format suitable for TinyLlama.
+    # This structure helps the model understand its role and the task better.
+    messages = [
+        {
+            "role": "system",
+            "content": """You are an expert AI assistant for a claims processing department. Your task is to analyze an insurance policy document and a user's query to make a decision. Based ONLY on the information in the Policy Document, determine if the request should be approved or rejected. Provide your final answer in a strict JSON format. The JSON object must contain three keys: "decision" (string, "Approved" or "Rejected"), "amount" (number, 0 if not applicable), and "justification" (string, explaining your reasoning and citing the policy). Do not use any information outside of the provided Policy Document."""
+        },
+        {
+            "role": "user",
+            "content": f"""
+            **Policy Document (Source of Truth):**
+            ---
+            {request.document_text}
+            ---
+            **User Query:**
+            ---
+            {request.query_text}
+            ---
+            **JSON Response:**
+            """
+        }
+    ]
+    # The prompt template for the model
+    prompt = generator.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
     try:
         # Generate the response from the LLM
+        outputs = generator(
+            prompt,
+            max_new_tokens=256, # Max tokens for the generated response
+            do_sample=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.95
+        )
+        generated_text = outputs[0]["generated_text"]
+        # The model's output will include our prompt, so we find the JSON part.
         json_start = generated_text.find('{')
         json_end = generated_text.rfind('}') + 1
+        if json_start != -1 and json_end > json_start:
             cleaned_json = generated_text[json_start:json_end]
             return cleaned_json
         else:
             return {"error": "Failed to generate valid JSON.", "raw_output": generated_text}
     except Exception as e:
         print(f"Error during analysis: {e}")
+        return {"error": f"An error occurred during analysis: {str(e)}"}
 # A simple root endpoint to confirm the server is running.
 @app.get("/")