policy123 commited on
Commit
95418e2
·
verified ·
1 Parent(s): c1d21d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -19
app.py CHANGED
@@ -1,29 +1,42 @@
1
  # app.py
2
- # FINAL VERSION using Gradio to be compatible with free ZeroGPU hardware.
3
 
4
  # 1. Import necessary libraries
5
  import gradio as gr
6
- from transformers import pipeline
 
7
  import torch
8
 
9
- # 2. Load the Language Model
10
- # This logic is the same, but it will now run reliably on the free GPU.
11
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  generator = pipeline(
13
- "text-generation",
14
- model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
15
- torch_dtype=torch.bfloat16,
16
- device_map="auto"
17
  )
18
- print("Model loaded successfully on GPU.")
19
  MODEL_LOADED = True
20
  except Exception as e:
21
- print(f"Error loading model: {e}")
22
  generator = None
23
  MODEL_LOADED = False
24
 
25
  # 3. Define the core analysis function
26
- # This function contains the prompt engineering and model inference logic.
27
  def analyze_document(document_text, query_text):
28
  """
29
  Analyzes the document based on the query using the loaded LLM.
@@ -79,7 +92,6 @@ def analyze_document(document_text, query_text):
79
 
80
  if json_start != -1 and json_end > json_start:
81
  cleaned_json_str = generated_text[json_start:json_end]
82
- # Gradio's JSON component expects a Python dictionary, not a string
83
  import json
84
  return json.loads(cleaned_json_str)
85
  else:
@@ -90,10 +102,9 @@ def analyze_document(document_text, query_text):
90
  return {"error": f"An error occurred during analysis: {str(e)}"}
91
 
92
  # 4. Create and launch the Gradio Interface
93
- # This creates the web UI and API endpoint automatically.
94
  with gr.Blocks() as demo:
95
- gr.Markdown("# Policy Analysis API")
96
- gr.Markdown("This Gradio app serves the backend for the RAG policy analysis system.")
97
 
98
  with gr.Row():
99
  doc_input = gr.Textbox(lines=5, label="Document Text", placeholder="Paste the document text here...")
@@ -106,9 +117,7 @@ with gr.Blocks() as demo:
106
  fn=analyze_document,
107
  inputs=[doc_input, query_input],
108
  outputs=output_json,
109
- api_name="analyze" # This creates the /api/analyze endpoint
110
  )
111
 
112
- # This will launch the Gradio app and make it accessible.
113
- # The `share=True` is not needed when running on Spaces.
114
- demo.launch()
 
1
  # app.py
2
+ # FINAL CPU VERSION using a quantized model for maximum reliability on free hardware.
3
 
4
  # 1. Import necessary libraries
5
  import gradio as gr
6
+ from transformers import AutoTokenizer, pipeline
7
+ from optimum.gptq import AutoModelForCausalLM
8
  import torch
9
 
10
+ # 2. Load the Quantized Language Model
11
+ # This model is optimized to use less memory, making it stable on free CPUs.
12
  try:
13
+ model_name_or_path = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ"
14
+
15
+ # Load the tokenizer
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
17
+
18
+ # Load the quantized model
19
+ model = AutoModelForCausalLM.from_quantized(
20
+ model_name_or_path,
21
+ use_safetensors=True,
22
+ trust_remote_code=False,
23
+ device_map="auto" # Will automatically use CPU
24
+ )
25
+
26
+ # Create the text generation pipeline
27
  generator = pipeline(
28
+ task="text-generation",
29
+ model=model,
30
+ tokenizer=tokenizer
 
31
  )
32
+ print("Quantized model loaded successfully on CPU.")
33
  MODEL_LOADED = True
34
  except Exception as e:
35
+ print(f"Error loading quantized model: {e}")
36
  generator = None
37
  MODEL_LOADED = False
38
 
39
  # 3. Define the core analysis function
 
40
  def analyze_document(document_text, query_text):
41
  """
42
  Analyzes the document based on the query using the loaded LLM.
 
92
 
93
  if json_start != -1 and json_end > json_start:
94
  cleaned_json_str = generated_text[json_start:json_end]
 
95
  import json
96
  return json.loads(cleaned_json_str)
97
  else:
 
102
  return {"error": f"An error occurred during analysis: {str(e)}"}
103
 
104
  # 4. Create and launch the Gradio Interface
 
105
  with gr.Blocks() as demo:
106
+ gr.Markdown("# Policy Analysis API (CPU Version)")
107
+ gr.Markdown("This Gradio app serves the backend for the RAG policy analysis system, optimized for CPU.")
108
 
109
  with gr.Row():
110
  doc_input = gr.Textbox(lines=5, label="Document Text", placeholder="Paste the document text here...")
 
117
  fn=analyze_document,
118
  inputs=[doc_input, query_input],
119
  outputs=output_json,
120
+ api_name="analyze"
121
  )
122
 
123
+ demo.launch()