Spaces:

rishabhsetiya
/

CAIAssignmentGradio

Sleeping

App Files Files Community

rishabhsetiya commited on Aug 24

Commit

78fe0f4

verified ·

1 Parent(s): c4e9be0

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -4

app.py CHANGED Viewed

@@ -1,7 +1,170 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import os
+import torch
+import math
+import pandas as pd
+import torch.nn as nn
+import torch.nn.functional as F
+from datasets import Dataset
+import transformers
+from transformers import AutoModelForCausalLM, DataCollatorForLanguageModeling
+from peft import LoraConfig, get_peft_model
 import gradio as gr
+# -----------------------------
+# ENVIRONMENT / CACHE
+# -----------------------------
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
+os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface_cache"
+os.environ["HF_METRICS_CACHE"] = "/tmp/huggingface_cache"
+os.environ["WANDB_MODE"] = "disabled"
+# -----------------------------
+# SETTINGS
+# -----------------------------
+MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+HF_TOKEN = os.getenv("HF_TOKEN")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# -----------------------------
+# LOAD TOKENIZER
+# -----------------------------
+tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_ID)
+# -----------------------------
+# LoRA / MoE Modules
+# -----------------------------
+class LoraLinear(nn.Module):
+    def __init__(self, in_features, out_features, r=8, lora_alpha=16, lora_dropout=0.05, bias=False):
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.r = r
+        self.scaling = lora_alpha / r if r > 0 else 1.0
+        self.weight = nn.Parameter(torch.empty(out_features, in_features), requires_grad=False)
+        self.bias = nn.Parameter(torch.zeros(out_features), requires_grad=False) if bias else None
+        if r > 0:
+            self.lora_A = nn.Parameter(torch.zeros((r, in_features)))
+            self.lora_B = nn.Parameter(torch.zeros((out_features, r)))
+            nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
+            nn.init.zeros_(self.lora_B)
+            self.lora_dropout = nn.Dropout(p=lora_dropout)
+        else:
+            self.lora_A, self.lora_B, self.lora_dropout = None, None, None
+    def forward(self, x):
+        result = F.linear(x, self.weight, self.bias)
+        if self.r > 0:
+            lora_out = self.lora_dropout(x) @ self.lora_A.T @ self.lora_B.T
+            result = result + self.scaling * lora_out
+        return result
+class MoELoRALinear(nn.Module):
+    def __init__(self, base_linear, r, num_experts=2, k=1, lora_alpha=16, lora_dropout=0.05):
+        super().__init__()
+        self.base_linear = base_linear
+        self.num_experts = num_experts
+        self.k = k
+        self.experts = nn.ModuleList([
+            LoraLinear(base_linear.in_features, base_linear.out_features, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout)
+            for _ in range(num_experts)
+        ])
+        self.gate = nn.Linear(base_linear.in_features, num_experts)
+    def forward(self, x):
+        base_out = self.base_linear(x)
+        gate_scores = torch.softmax(self.gate(x), dim=-1)
+        expert_out = 0
+        for i, expert in enumerate(self.experts):
+            expert_out += gate_scores[..., i:i+1] * expert(x)
+        return base_out + expert_out
+def replace_proj_with_moe_lora(model, r=8, num_experts=2, k=1, lora_alpha=16, lora_dropout=0.05):
+    for layer in model.model.layers:
+        for proj_name in ["up_proj", "down_proj"]:
+            old = getattr(layer.mlp, proj_name)
+            moe = MoELoRALinear(
+                base_linear=old,
+                r=r,
+                num_experts=num_experts,
+                k=k,
+                lora_alpha=lora_alpha,
+                lora_dropout=lora_dropout,
+            ).to(next(old.parameters()).device)
+            setattr(layer.mlp, proj_name, moe)
+    return model
+# -----------------------------
+# Load / Prepare Model & Dataset
+# -----------------------------
+def preprocess(example):
+    tokens = tokenizer(example['text'], truncation=True, padding=False)
+    text = example['text']
+    assistant_index = text.find("<|assistant|>")
+    prefix_ids = tokenizer(text[:assistant_index], add_special_tokens=False)['input_ids']
+    prefix_len = len(prefix_ids)
+    labels = tokens['input_ids'].copy()
+    labels[:prefix_len] = [-100] * prefix_len
+    tokens['labels'] = labels
+    return tokens
+def load_model(model_id):
+    # Hardcoded dataset if file not present
+    data = [
+        {"question": "What were MakeMyTrip's total assets as of March 31, 2024?",
+         "answer": "MakeMyTrip's total assets as of March 31, 2024 were USD 1,660,077 thousand."},
+        {"question": "What was MakeMyTrip's total revenue for the year ended March 31, 2025?",
+         "answer": "MakeMyTrip's total revenue for the year ended March 31, 2025 was USD 978,336 thousand."},
+    ]
+    df = pd.DataFrame(data)
+    training_data = []
+    system_prompt = "You are a helpful assistant that provides financial data from MakeMyTrip reports."
+    for index, row in df.iterrows():
+        training_data.append({"text": f"<|system|>\n{system_prompt}</s>\n<|user|>\n{row['question']}</s>\n<|assistant|>\n{row['answer']}</s>"})
+    dataset = Dataset.from_list(training_data)
+    tokenized_dataset = dataset.map(preprocess, remove_columns=["text"])
+    base_model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to(device)
+    model = replace_proj_with_moe_lora(base_model)
+    peft_config = LoraConfig(r=8, lora_alpha=16, lora_dropout=0.05, target_modules=["o_proj"], bias="none", task_type="CAUSAL_LM")
+    model = get_peft_model(model, peft_config)
+    model.eval()
+    return model
+model = load_model(MODEL_ID)
+# -----------------------------
+# Gradio Interface
+# -----------------------------
+def generate_answer(prompt, max_tokens):
+    if prompt.strip() == "":
+        return "Please enter a prompt!"
+    system_prompt = "You are a helpful assistant that provides financial data from MakeMyTrip reports."
+    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
+    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(input_text, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            do_sample=True,
+            top_p=0.9,
+            temperature=0.7,
+        )
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return answer
+iface = gr.Interface(
+    fn=generate_answer,
+    inputs=[
+        gr.Textbox(label="Enter your question:", lines=5, placeholder="Type your question here..."),
+        gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Max tokens to generate")
+    ],
+    outputs=gr.Textbox(label="Generated Answer"),
+    title="Chat with My Fine-Tuned Model 🤖",
+    description="This app allows you to ask questions about MakeMyTrip's financial data."
+)
+iface.launch()