Spaces:

gworley3
/

kakuora

Running

App Files Files Community

gworley3 commited on 11 days ago

Commit

a64e29c

verified ·

1 Parent(s): e4ccc65

Upload 2 files

Browse files

Files changed (2) hide show

app.py +172 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""
+Zen Oracle - Gradio App for HuggingFace Spaces
+Provides both a web UI and JSON API endpoint.
+API: POST /api/predict with {"data": ["question", "style"]}
+"""
+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
+import os
+# Interpretation prompt
+INTERPRETATION_PROMPT = "You are an old zen master reading a koan. A student asked: \"{question}\" The master replied: \"{answer}\". Write a short capping verse for this koan, 4-8 lines, and nothing else."
+# Global models (loaded once)
+oracle_model = None
+oracle_tokenizer = None
+interpreter_model = None
+interpreter_tokenizer = None
+def load_models():
+    """Load models on startup."""
+    global oracle_model, oracle_tokenizer, interpreter_model, interpreter_tokenizer
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    # Load oracle model (fine-tuned Flan-T5-small)
+    print("Loading oracle model...")
+    checkpoint_path = os.environ.get("ORACLE_CHECKPOINT", "checkpoints/best_model.pt")
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        config = checkpoint.get('config', {})
+        model_name = config.get('model', {}).get('name', 'google/flan-t5-small')
+        oracle_tokenizer = AutoTokenizer.from_pretrained(model_name)
+        oracle_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+        oracle_model.load_state_dict(checkpoint['model_state_dict'])
+    else:
+        # Fallback to base model if no checkpoint
+        print("No checkpoint found, using base Flan-T5-small")
+        oracle_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-small')
+        oracle_model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-small')
+    oracle_model.to(device)
+    oracle_model.eval()
+    # Load interpreter model (Qwen2.5-1.5B)
+    print("Loading interpreter model...")
+    interpreter_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+    interpreter_model = AutoModelForCausalLM.from_pretrained(
+        "Qwen/Qwen2.5-1.5B-Instruct",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    )
+    interpreter_model.to(device)
+    interpreter_model.eval()
+    print("Models loaded!")
+def generate_answer(question: str) -> str:
+    """Generate zen answer from oracle."""
+    device = next(oracle_model.parameters()).device
+    inputs = oracle_tokenizer(question, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = oracle_model.generate(
+            **inputs,
+            max_new_tokens=150,
+            temperature=0.8,
+            top_p=0.85,
+            top_k=40,
+            repetition_penalty=1.3,
+            do_sample=True,
+            pad_token_id=oracle_tokenizer.pad_token_id,
+            eos_token_id=oracle_tokenizer.eos_token_id
+        )
+    return oracle_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def generate_interpretation(question: str, answer: str) -> str:
+    """Generate interpretation using Qwen2.5."""
+    device = next(interpreter_model.parameters()).device
+    prompt = INTERPRETATION_PROMPT.format(question=question, answer=answer)
+    messages = [{"role": "user", "content": prompt}]
+    formatted_prompt = interpreter_tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = interpreter_tokenizer(
+        formatted_prompt, return_tensors="pt", max_length=512, truncation=True
+    ).to(device)
+    with torch.no_grad():
+        outputs = interpreter_model.generate(
+            **inputs,
+            max_new_tokens=256,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=interpreter_tokenizer.pad_token_id,
+            eos_token_id=interpreter_tokenizer.eos_token_id
+        )
+    response = interpreter_tokenizer.decode(
+        outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True
+    )
+    return response.strip()
+def consult_oracle(question: str) -> dict:
+    """
+    Consult the zen oracle.
+    Returns a dict with question, answer, and interpretation.
+    """
+    if not question.strip():
+        return {"error": "Please enter a question"}
+    answer = generate_answer(question)
+    interpretation = generate_interpretation(question, answer)
+    return {
+        "question": question,
+        "answer": answer,
+        "interpretation": interpretation
+    }
+def gradio_consult(question: str) -> tuple:
+    """Gradio interface function."""
+    result = consult_oracle(question)
+    if "error" in result:
+        return result["error"], ""
+    return result["answer"], result["interpretation"]
+# Load models on import
+load_models()
+# Create Gradio interface
+demo = gr.Interface(
+    fn=gradio_consult,
+    inputs=[
+        gr.Textbox(
+            label="Your Question",
+            placeholder="What is the mind of no mind?",
+            lines=2
+        )
+    ],
+    outputs=[
+        gr.Textbox(label="Kaku-ora's Answer"),
+        gr.Textbox(label="Sage Interpretation", lines=6)
+    ],
+    title="Kaku-ora",
+    description="Ask the oracle. Receive sage advice.",
+    examples=[
+        ["What is the meaning of life?"],
+        ["What is Buddha?"],
+        ["How do I find peace?"],
+    ],
+    api_name="consult"  # API endpoint: /api/consult
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch>=2.0.0
+transformers>=4.45.0
+gradio>=4.0.0
+tqdm>=4.65.0
+PyYAML>=6.0