Spaces:

policy123
/

my-policy-analyser

Runtime error

App Files Files Community

policy123 commited on Jul 22, 2025

Commit

95418e2

verified ·

1 Parent(s): c1d21d7

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -19

app.py CHANGED Viewed

@@ -1,29 +1,42 @@
 # app.py
-# FINAL VERSION using Gradio to be compatible with free ZeroGPU hardware.
 # 1. Import necessary libraries
 import gradio as gr
-from transformers import pipeline
 import torch
-# 2. Load the Language Model
-# This logic is the same, but it will now run reliably on the free GPU.
 try:
     generator = pipeline(
-        "text-generation",
-        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        torch_dtype=torch.bfloat16,
-        device_map="auto"
     )
-    print("Model loaded successfully on GPU.")
     MODEL_LOADED = True
 except Exception as e:
-    print(f"Error loading model: {e}")
     generator = None
     MODEL_LOADED = False
 # 3. Define the core analysis function
-# This function contains the prompt engineering and model inference logic.
 def analyze_document(document_text, query_text):
     """
     Analyzes the document based on the query using the loaded LLM.
@@ -79,7 +92,6 @@ def analyze_document(document_text, query_text):
         if json_start != -1 and json_end > json_start:
             cleaned_json_str = generated_text[json_start:json_end]
-            # Gradio's JSON component expects a Python dictionary, not a string
             import json
             return json.loads(cleaned_json_str)
         else:
@@ -90,10 +102,9 @@ def analyze_document(document_text, query_text):
         return {"error": f"An error occurred during analysis: {str(e)}"}
 # 4. Create and launch the Gradio Interface
-# This creates the web UI and API endpoint automatically.
 with gr.Blocks() as demo:
-    gr.Markdown("# Policy Analysis API")
-    gr.Markdown("This Gradio app serves the backend for the RAG policy analysis system.")
     with gr.Row():
         doc_input = gr.Textbox(lines=5, label="Document Text", placeholder="Paste the document text here...")
@@ -106,9 +117,7 @@ with gr.Blocks() as demo:
         fn=analyze_document,
         inputs=[doc_input, query_input],
         outputs=output_json,
-        api_name="analyze" # This creates the /api/analyze endpoint
     )
-# This will launch the Gradio app and make it accessible.
-# The `share=True` is not needed when running on Spaces.
-demo.launch()

 # app.py
+# FINAL CPU VERSION using a quantized model for maximum reliability on free hardware.
 # 1. Import necessary libraries
 import gradio as gr
+from transformers import AutoTokenizer, pipeline
+from optimum.gptq import AutoModelForCausalLM
 import torch
+# 2. Load the Quantized Language Model
+# This model is optimized to use less memory, making it stable on free CPUs.
 try:
+    model_name_or_path = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ"
+    # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+    # Load the quantized model
+    model = AutoModelForCausalLM.from_quantized(
+        model_name_or_path,
+        use_safetensors=True,
+        trust_remote_code=False,
+        device_map="auto" # Will automatically use CPU
+    )
+    # Create the text generation pipeline
     generator = pipeline(
+        task="text-generation",
+        model=model,
+        tokenizer=tokenizer
     )
+    print("Quantized model loaded successfully on CPU.")
     MODEL_LOADED = True
 except Exception as e:
+    print(f"Error loading quantized model: {e}")
     generator = None
     MODEL_LOADED = False
 # 3. Define the core analysis function
 def analyze_document(document_text, query_text):
     """
     Analyzes the document based on the query using the loaded LLM.
         if json_start != -1 and json_end > json_start:
             cleaned_json_str = generated_text[json_start:json_end]
             import json
             return json.loads(cleaned_json_str)
         else:
         return {"error": f"An error occurred during analysis: {str(e)}"}
 # 4. Create and launch the Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Policy Analysis API (CPU Version)")
+    gr.Markdown("This Gradio app serves the backend for the RAG policy analysis system, optimized for CPU.")
     with gr.Row():
         doc_input = gr.Textbox(lines=5, label="Document Text", placeholder="Paste the document text here...")
         fn=analyze_document,
         inputs=[doc_input, query_input],
         outputs=output_json,
+        api_name="analyze"
     )
+demo.launch()