Spaces:

lucabadiali
/

ML_OPS_Project

Running

App Files Files Community

lucabadiali commited on about 1 month ago

Commit

57fbf67

1 Parent(s): c57b942

Tryed first dashboard

Browse files

Files changed (7) hide show

prometheus.yml +2 -1
src/app/__pycache__/config.cpython-311.pyc +0 -0
src/app/app.py +133 -2
src/app/config.py +20 -11
src/app/utils.py +29 -1
src/nb.ipynb +208 -0
src/train_model.py +4 -10

prometheus.yml CHANGED Viewed

@@ -3,10 +3,11 @@ global:
 scrape_configs:
   - job_name: "fastapi_hf"
-    scheme: https
     metrics_path: /metrics
     static_configs:
       - targets:
           - "lucabadiali-ml-ops-project.hf.space:443"
     # Se la Space è privata, aggiungi:
     # authorization:

 scrape_configs:
   - job_name: "fastapi_hf"
+    scheme: http
     metrics_path: /metrics
     static_configs:
       - targets:
+          #- "host.docker.internal:8000"
           - "lucabadiali-ml-ops-project.hf.space:443"
     # Se la Space è privata, aggiungi:
     # authorization:

src/app/__pycache__/config.cpython-311.pyc CHANGED Viewed

Binary files a/src/app/__pycache__/config.cpython-311.pyc and b/src/app/__pycache__/config.cpython-311.pyc differ

src/app/app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import FastAPI, HTTPException
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from .utils import preprocess
 from scipy.special import softmax
 import numpy as np
 from pydantic import BaseModel
@@ -9,13 +9,57 @@ import csv
 import requests
 from typing import Union, List
 import torch
-from .config import MODEL_SOURCE, ModelSource, load_model_and_tokenizer
 from prometheus_fastapi_instrumentator import Instrumentator
 app = FastAPI()
 Instrumentator().instrument(app).expose(app, endpoint="/metrics", include_in_schema=False)
 class SentimentQuery(BaseModel):
     input_texts: Union[str, List[str]]
@@ -27,6 +71,7 @@ with urllib.request.urlopen(mapping_link) as f:
 labels = [row[1] for row in csvreader if len(row) > 1]
 tokenizer, model = load_model_and_tokenizer(MODEL_SOURCE)
 @app.get("/")
 def read_root():
@@ -55,6 +100,11 @@ async def analyze_text(query:SentimentQuery):
     response_body = []
     for i,text in enumerate(input_texts):
         response_body.append(
             {
                 "input_text":text,
@@ -73,6 +123,87 @@ async def analyze_text(query:SentimentQuery):
     }
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

 from fastapi import FastAPI, HTTPException
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from .utils import preprocess, load_model_and_tokenizer
 from scipy.special import softmax
 import numpy as np
 from pydantic import BaseModel
 import requests
 from typing import Union, List
 import torch
+from .config import MODEL_SOURCE, ModelSource
 from prometheus_fastapi_instrumentator import Instrumentator
+##################
+from prometheus_client import Counter, Gauge
+from apscheduler.schedulers.background import BackgroundScheduler
+from datetime import datetime
+import os
+import random
+import pandas as pd
+#################
+#############
+from .config import EVAL_BATCH_SIZE, N_SAMPLES, DATASET_PATH, EVAL_PERIOD_MIN
+from .utils import load_dataset
+###########
 app = FastAPI()
 Instrumentator().instrument(app).expose(app, endpoint="/metrics", include_in_schema=False)
+###################
+# ---------- Metrics (custom) ----------
+# Production predictions distribution (unlabeled)
+# PRED_COUNTER = Counter(
+#     "sentiment_requests_total",
+#     "Total predictions served by label",
+#     ["label"]
+# )
+# EVAL_SAMPLE_SIZE = Gauge(
+#     "model_evaluation_sample_size",
+#     "Number of samples used in the latest periodic evaluation"
+# )
+# EVAL_COUNTER_DIST = Counter(
+#     "sentiment_test_distribution_total",
+#     "Cumulative predicted label counts on evaluation samples",
+#     ["label"]
+# )
+# EVAL_RUNS = Counter(
+#     "model_evaluations_total",
+#     "Total number of evaluation runs completed"
+# )
+##################
 class SentimentQuery(BaseModel):
     input_texts: Union[str, List[str]]
 labels = [row[1] for row in csvreader if len(row) > 1]
 tokenizer, model = load_model_and_tokenizer(MODEL_SOURCE)
+model.eval()
 @app.get("/")
 def read_root():
     response_body = []
     for i,text in enumerate(input_texts):
+        predicted = labels[pred_labels[i]]
+        #PRED_COUNTER.labels(label=predicted).inc()
         response_body.append(
             {
                 "input_text":text,
     }
+def evaluate_accuracy():
+    dataset = load_dataset(DATASET_PATH).shuffle()["test"][:N_SAMPLES]
+    N_BATCHES = len(dataset["text"])//EVAL_BATCH_SIZE
+    accuracy = 0
+    for i in range(N_BATCHES+1):
+        if i == N_BATCHES :
+            samples, labels = dataset["text"][i*EVAL_BATCH_SIZE:], dataset["label"][i*EVAL_BATCH_SIZE:]
+        else:
+            samples, labels = dataset["text"][i*EVAL_BATCH_SIZE:(i+1)*EVAL_BATCH_SIZE], dataset["label"][i*EVAL_BATCH_SIZE:(i+1)*EVAL_BATCH_SIZE]
+        model.eval()
+        encoded_batch = tokenizer(
+            [preprocess(t) for t in samples],
+            padding=True,          # pad to same length
+            truncation=True,       # truncate long texts
+            return_tensors="pt",
+        )
+        with torch.no_grad():
+            output = model(**encoded_batch)
+        logits = output[0].detach().cpu().numpy()
+        scores = softmax(logits, axis=-1)
+        pred_labels = scores.argmax(axis=-1)
+        accuracy += sum(pred_labels==labels)
+    accuracy/=N_SAMPLES
+    return accuracy
+# Evaluation metrics (labeled test set)
+EVAL_ACCURACY = Gauge(
+    "model_evaluation_accuracy",
+    "Accuracy on latest periodic evaluation of labeled test subset"
+)
+from apscheduler.schedulers.background import BackgroundScheduler
+from datetime import datetime, timedelta
+import threading
+_model_lock = threading.Lock()
+def _run_eval_and_set_gauge():
+    # If you expect concurrent requests to /predict, the lock prevents GPU/torch contention
+    with _model_lock:
+        acc = evaluate_accuracy()
+    EVAL_ACCURACY.set(acc)
+scheduler = BackgroundScheduler(daemon=True)
+@app.on_event("startup")
+def _start_scheduler():
+    # run once soon after startup
+    scheduler.add_job(_run_eval_and_set_gauge, next_run_time=datetime.now() + timedelta(seconds=2))
+    # then every EVAL_PERIOD_MIN minutes
+    scheduler.add_job(_run_eval_and_set_gauge, "interval", minutes=EVAL_PERIOD_MIN)
+    scheduler.start()
+@app.on_event("shutdown")
+def _stop_scheduler():
+    scheduler.shutdown(wait=False)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

src/app/config.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 from enum import Enum
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from pathlib import Path
@@ -10,15 +9,25 @@ class ModelSource(str, Enum):
 MODEL_SOURCE = ModelSource(os.getenv("MODEL_SOURCE", "hf"))
 HF_MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
-def load_model_and_tokenizer(MODEL_SOURCE):
-    if MODEL_SOURCE == ModelSource.HF:   # use the latest model available in the HF hub
-        tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
-        model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL)
-    else: # use a locally fine tuned model
-        local_model_path = Path("models/saved_model")
-        assert local_model_path.exists(), """No local model was found. Run 'python3 src/train_model.py'"""
-        tokenizer = AutoTokenizer.from_pretrained("models/saved_tokenizer")
-        model = AutoModelForSequenceClassification.from_pretrained("models/saved_model")
-    return tokenizer, model

 import os
 from enum import Enum
 from pathlib import Path
 MODEL_SOURCE = ModelSource(os.getenv("MODEL_SOURCE", "hf"))
 HF_MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
+DATASET_PATH = Path("data/dataset")
+EVAL_SAMPLE_SIZE = int(os.getenv("EVAL_SAMPLE_SIZE", "80"))
+EVAL_INTERVAL_HOURS = float(os.getenv("EVAL_INTERVAL_HOURS", "1"))
+RANDOM_SEED = int(os.getenv("RANDOM_SEED", "42"))
+EVAL_BATCH_SIZE = 64
+N_SAMPLES = 500
+EVAL_PERIOD_MIN = 1
+# def load_model_and_tokenizer(MODEL_SOURCE):
+#     if MODEL_SOURCE == ModelSource.HF:   # use the latest model available in the HF hub
+#         tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
+#         model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL)
+#     else: # use a locally fine tuned model
+#         local_model_path = Path("models/saved_model")
+#         assert local_model_path.exists(), """No local model was found. Run 'python3 src/train_model.py' first"""
+#         tokenizer = AutoTokenizer.from_pretrained("models/saved_tokenizer")
+#         model = AutoModelForSequenceClassification.from_pretrained("models/saved_model")
+#     return tokenizer, model

src/app/utils.py CHANGED Viewed

@@ -1,7 +1,35 @@
 def preprocess(text):
     new_text = []
     for t in text.split(" "):
         t = '@user' if t.startswith('@') and len(t) > 1 else t
         t = 'http' if t.startswith('http') else t
         new_text.append(t)
-    return " ".join(new_text)

+from pathlib import Path
+from .config import ModelSource, HF_MODEL
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from datasets import load_dataset as hf_load_dataset
+from datasets import load_from_disk
 def preprocess(text):
     new_text = []
     for t in text.split(" "):
         t = '@user' if t.startswith('@') and len(t) > 1 else t
         t = 'http' if t.startswith('http') else t
         new_text.append(t)
+    return " ".join(new_text)
+def load_model_and_tokenizer(MODEL_SOURCE):
+    if MODEL_SOURCE == ModelSource.HF:   # use the latest model available in the HF hub
+        tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
+        model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL)
+    else: # use a locally fine tuned model
+        local_model_path = Path("models/saved_model")
+        assert local_model_path.exists(), """No local model was found. Run 'python3 src/train_model.py' first"""
+        tokenizer = AutoTokenizer.from_pretrained("models/saved_tokenizer")
+        model = AutoModelForSequenceClassification.from_pretrained("models/saved_model")
+    return tokenizer, model
+def load_dataset(dataset_path):
+    if dataset_path.exists():
+        dataset = load_from_disk(dataset_path)
+    else:
+        dataset = hf_load_dataset('tweet_eval', 'sentiment')
+    return dataset

src/nb.ipynb ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "7aaceacb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "from app.config import DATASET_PATH, MODEL_SOURCE\n",
+    "from app.utils import load_dataset, load_model_and_tokenizer, preprocess\n",
+    "from scipy.special import softmax\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "7defab3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
+    }
+   ],
+   "source": [
+    "tokenizer, model = load_model_and_tokenizer(MODEL_SOURCE)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "0a1dcfdd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = load_dataset(DATASET_PATH).shuffle()[\"test\"][:N_SAMPLES]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "501e6728",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "82b25de1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "N_BEVAL_BATCH_SIZE = 64\n",
+    "N_SAMPLES = 500\n",
+    "N_BATCHES = len(dataset[\"text\"])//EVAL_BATCH_SIZE\n",
+    "N_BATCHES"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "7dd5371b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 64\n",
+      "64 128\n",
+      "128 192\n",
+      "192 256\n",
+      "256 320\n",
+      "320 384\n",
+      "384 448\n",
+      "448 500\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "np.float64(0.71)"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "EVAL_BATCH_SIZE = 64\n",
+    "N_SAMPLES = 500\n",
+    "def evaluate_accuracy():\n",
+    "\n",
+    "    dataset = load_dataset(DATASET_PATH).shuffle()[\"test\"][:N_SAMPLES]\n",
+    "    N_BATCHES = len(dataset[\"text\"])//EVAL_BATCH_SIZE\n",
+    "\n",
+    "    accuracy = 0\n",
+    "    for i in range(N_BATCHES+1):\n",
+    "\n",
+    "        start = i*EVAL_BATCH_SIZE\n",
+    "        end = min(N_SAMPLES, (i+1)*EVAL_BATCH_SIZE)\n",
+    "        print(start, end)\n",
+    "        samples, labels = dataset[\"text\"][start:end], dataset[\"label\"][start:end]\n",
+    "        \n",
+    "        model.eval()\n",
+    "        encoded_batch = tokenizer(\n",
+    "            [preprocess(t) for t in samples],\n",
+    "            padding=True,          # pad to same length\n",
+    "            truncation=True,       # truncate long texts\n",
+    "            return_tensors=\"pt\",\n",
+    "        )\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            output = model(**encoded_batch)\n",
+    "    \n",
+    "        logits = output[0].detach().cpu().numpy()\n",
+    "        scores = softmax(logits, axis=-1)\n",
+    "        pred_labels = scores.argmax(axis=-1)\n",
+    "        accuracy += sum(pred_labels==labels)\n",
+    "    accuracy/=N_SAMPLES\n",
+    "    return accuracy\n",
+    "evaluate_accuracy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dbd3bb8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _load_test_data():\n",
+    "    \"\"\"\n",
+    "    Expects CSV with columns: text,label\n",
+    "    label values must be one of labels (negative, neutral, positive) or their indices (0,1,2).\n",
+    "    \"\"\"\n",
+    "    df = pd.read_csv(TEST_DATA_PATH)\n",
+    "    # normalize label column to strings matching our 'labels' list\n",
+    "    if np.issubdtype(df[\"label\"].dtype, np.number):\n",
+    "        df[\"label\"] = df[\"label\"].astype(int).map(lambda i: labels[i])\n",
+    "    else:\n",
+    "        df[\"label\"] = df[\"label\"].str.lower().str.strip()\n",
+    "    # keep only supported labels\n",
+    "    df = df[df[\"label\"].isin(labels)].dropna(subset=[\"text\", \"label\"])\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec0b086e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "800c8018",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ProjectEnv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

src/train_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from app.utils import preprocess
 import urllib
 import csv
 import os
@@ -8,8 +8,8 @@ from transformers import (
     TrainingArguments, Trainer, EarlyStoppingCallback,
     DataCollatorWithPadding
 )
-from datasets import load_dataset,load_from_disk
 from pathlib import Path
 # --- Device detection ---
@@ -73,14 +73,8 @@ model.gradient_checkpointing_enable()
 model.config.use_cache = False
 #### DATASET LOADING
-dataset_path = Path("data/dataset")
-if dataset_path.exists():
-    dataset = load_from_disk(dataset_path)
-else:
-    dataset = load_dataset('tweet_eval', 'sentiment')
 # ---- COPY-PASTE FROM HERE ----

+from app.utils import preprocess, load_dataset
 import urllib
 import csv
 import os
     TrainingArguments, Trainer, EarlyStoppingCallback,
     DataCollatorWithPadding
 )
 from pathlib import Path
+from app.config import DATASET_PATH
 # --- Device detection ---
 model.config.use_cache = False
 #### DATASET LOADING
+dataset = load_dataset(DATASET_PATH)
 # ---- COPY-PASTE FROM HERE ----