rishabhsetiya commited on
Commit
78fe0f4
·
verified ·
1 Parent(s): c4e9be0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -4
app.py CHANGED
@@ -1,7 +1,170 @@
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ import os
2
+ import torch
3
+ import math
4
+ import pandas as pd
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ from datasets import Dataset
8
+ import transformers
9
+ from transformers import AutoModelForCausalLM, DataCollatorForLanguageModeling
10
+ from peft import LoraConfig, get_peft_model
11
  import gradio as gr
12
 
13
+ # -----------------------------
14
+ # ENVIRONMENT / CACHE
15
+ # -----------------------------
16
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
17
+ os.environ["HF_HOME"] = "/tmp/huggingface_cache"
18
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface_cache"
19
+ os.environ["HF_METRICS_CACHE"] = "/tmp/huggingface_cache"
20
+ os.environ["WANDB_MODE"] = "disabled"
21
+
22
+ # -----------------------------
23
+ # SETTINGS
24
+ # -----------------------------
25
+ MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
26
+ HF_TOKEN = os.getenv("HF_TOKEN")
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+
29
+ # -----------------------------
30
+ # LOAD TOKENIZER
31
+ # -----------------------------
32
+ tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_ID)
33
+
34
+ # -----------------------------
35
+ # LoRA / MoE Modules
36
+ # -----------------------------
37
+ class LoraLinear(nn.Module):
38
+ def __init__(self, in_features, out_features, r=8, lora_alpha=16, lora_dropout=0.05, bias=False):
39
+ super().__init__()
40
+ self.in_features = in_features
41
+ self.out_features = out_features
42
+ self.r = r
43
+ self.scaling = lora_alpha / r if r > 0 else 1.0
44
+ self.weight = nn.Parameter(torch.empty(out_features, in_features), requires_grad=False)
45
+ self.bias = nn.Parameter(torch.zeros(out_features), requires_grad=False) if bias else None
46
+
47
+ if r > 0:
48
+ self.lora_A = nn.Parameter(torch.zeros((r, in_features)))
49
+ self.lora_B = nn.Parameter(torch.zeros((out_features, r)))
50
+ nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
51
+ nn.init.zeros_(self.lora_B)
52
+ self.lora_dropout = nn.Dropout(p=lora_dropout)
53
+ else:
54
+ self.lora_A, self.lora_B, self.lora_dropout = None, None, None
55
+
56
+ def forward(self, x):
57
+ result = F.linear(x, self.weight, self.bias)
58
+ if self.r > 0:
59
+ lora_out = self.lora_dropout(x) @ self.lora_A.T @ self.lora_B.T
60
+ result = result + self.scaling * lora_out
61
+ return result
62
+
63
+ class MoELoRALinear(nn.Module):
64
+ def __init__(self, base_linear, r, num_experts=2, k=1, lora_alpha=16, lora_dropout=0.05):
65
+ super().__init__()
66
+ self.base_linear = base_linear
67
+ self.num_experts = num_experts
68
+ self.k = k
69
+ self.experts = nn.ModuleList([
70
+ LoraLinear(base_linear.in_features, base_linear.out_features, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout)
71
+ for _ in range(num_experts)
72
+ ])
73
+ self.gate = nn.Linear(base_linear.in_features, num_experts)
74
+
75
+ def forward(self, x):
76
+ base_out = self.base_linear(x)
77
+ gate_scores = torch.softmax(self.gate(x), dim=-1)
78
+ expert_out = 0
79
+ for i, expert in enumerate(self.experts):
80
+ expert_out += gate_scores[..., i:i+1] * expert(x)
81
+ return base_out + expert_out
82
+
83
+ def replace_proj_with_moe_lora(model, r=8, num_experts=2, k=1, lora_alpha=16, lora_dropout=0.05):
84
+ for layer in model.model.layers:
85
+ for proj_name in ["up_proj", "down_proj"]:
86
+ old = getattr(layer.mlp, proj_name)
87
+ moe = MoELoRALinear(
88
+ base_linear=old,
89
+ r=r,
90
+ num_experts=num_experts,
91
+ k=k,
92
+ lora_alpha=lora_alpha,
93
+ lora_dropout=lora_dropout,
94
+ ).to(next(old.parameters()).device)
95
+ setattr(layer.mlp, proj_name, moe)
96
+ return model
97
+
98
+ # -----------------------------
99
+ # Load / Prepare Model & Dataset
100
+ # -----------------------------
101
+ def preprocess(example):
102
+ tokens = tokenizer(example['text'], truncation=True, padding=False)
103
+ text = example['text']
104
+ assistant_index = text.find("<|assistant|>")
105
+ prefix_ids = tokenizer(text[:assistant_index], add_special_tokens=False)['input_ids']
106
+ prefix_len = len(prefix_ids)
107
+ labels = tokens['input_ids'].copy()
108
+ labels[:prefix_len] = [-100] * prefix_len
109
+ tokens['labels'] = labels
110
+ return tokens
111
+
112
+ def load_model(model_id):
113
+ # Hardcoded dataset if file not present
114
+ data = [
115
+ {"question": "What were MakeMyTrip's total assets as of March 31, 2024?",
116
+ "answer": "MakeMyTrip's total assets as of March 31, 2024 were USD 1,660,077 thousand."},
117
+ {"question": "What was MakeMyTrip's total revenue for the year ended March 31, 2025?",
118
+ "answer": "MakeMyTrip's total revenue for the year ended March 31, 2025 was USD 978,336 thousand."},
119
+ ]
120
+ df = pd.DataFrame(data)
121
+ training_data = []
122
+ system_prompt = "You are a helpful assistant that provides financial data from MakeMyTrip reports."
123
+ for index, row in df.iterrows():
124
+ training_data.append({"text": f"<|system|>\n{system_prompt}</s>\n<|user|>\n{row['question']}</s>\n<|assistant|>\n{row['answer']}</s>"})
125
+ dataset = Dataset.from_list(training_data)
126
+ tokenized_dataset = dataset.map(preprocess, remove_columns=["text"])
127
+
128
+ base_model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to(device)
129
+ model = replace_proj_with_moe_lora(base_model)
130
+ peft_config = LoraConfig(r=8, lora_alpha=16, lora_dropout=0.05, target_modules=["o_proj"], bias="none", task_type="CAUSAL_LM")
131
+ model = get_peft_model(model, peft_config)
132
+ model.eval()
133
+ return model
134
+
135
+ model = load_model(MODEL_ID)
136
+
137
+ # -----------------------------
138
+ # Gradio Interface
139
+ # -----------------------------
140
+ def generate_answer(prompt, max_tokens):
141
+ if prompt.strip() == "":
142
+ return "Please enter a prompt!"
143
+ system_prompt = "You are a helpful assistant that provides financial data from MakeMyTrip reports."
144
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
145
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
146
+ inputs = tokenizer(input_text, return_tensors="pt").to(device)
147
+ with torch.no_grad():
148
+ outputs = model.generate(
149
+ **inputs,
150
+ max_new_tokens=max_tokens,
151
+ do_sample=True,
152
+ top_p=0.9,
153
+ temperature=0.7,
154
+ )
155
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
156
+ return answer
157
+
158
+ iface = gr.Interface(
159
+ fn=generate_answer,
160
+ inputs=[
161
+ gr.Textbox(label="Enter your question:", lines=5, placeholder="Type your question here..."),
162
+ gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Max tokens to generate")
163
+ ],
164
+ outputs=gr.Textbox(label="Generated Answer"),
165
+ title="Chat with My Fine-Tuned Model 🤖",
166
+ description="This app allows you to ask questions about MakeMyTrip's financial data."
167
+ )
168
+
169
+ iface.launch()
170