Spaces:
Sleeping
Sleeping
| import torch | |
| import pandas as pd | |
| from torch.utils.data import Dataset, DataLoader | |
| from transformers import AutoTokenizer | |
| from evo_model import EvoTransformer | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| class FeedbackDataset(Dataset): | |
| def __init__(self, csv_file): | |
| self.data = pd.read_csv(csv_file).dropna() | |
| self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| row = self.data.iloc[idx] | |
| prompt = row['prompt'] | |
| context = row['context'] | |
| label = int(row['label']) | |
| text = f"{prompt} {context}" | |
| encoded = self.tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors="pt") | |
| return encoded['input_ids'].squeeze(0), torch.tensor(label) | |
| def fine_tune_on_feedback(): | |
| csv_file = "feedback_log.csv" | |
| dataset = FeedbackDataset(csv_file) | |
| dataloader = DataLoader(dataset, batch_size=8, shuffle=True) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = EvoTransformer().to(device) | |
| model.load_state_dict(torch.load("evo_hellaswag.pt", map_location=device)) | |
| model.train() | |
| criterion = nn.CrossEntropyLoss() | |
| optimizer = optim.Adam(model.parameters(), lr=2e-5) | |
| for epoch in range(2): | |
| for input_ids, labels in dataloader: | |
| input_ids = input_ids.to(device) | |
| labels = labels.to(device) | |
| outputs = model(input_ids) | |
| loss = criterion(outputs, labels) | |
| optimizer.zero_grad() | |
| loss.backward() | |
| optimizer.step() | |
| torch.save(model.state_dict(), "evo_hellaswag.pt") | |
| print("✅ Evo retrained and saved.") | |