lucabadiali commited on
Commit
57fbf67
·
1 Parent(s): c57b942

Tryed first dashboard

Browse files
prometheus.yml CHANGED
@@ -3,10 +3,11 @@ global:
3
 
4
  scrape_configs:
5
  - job_name: "fastapi_hf"
6
- scheme: https
7
  metrics_path: /metrics
8
  static_configs:
9
  - targets:
 
10
  - "lucabadiali-ml-ops-project.hf.space:443"
11
  # Se la Space è privata, aggiungi:
12
  # authorization:
 
3
 
4
  scrape_configs:
5
  - job_name: "fastapi_hf"
6
+ scheme: http
7
  metrics_path: /metrics
8
  static_configs:
9
  - targets:
10
+ #- "host.docker.internal:8000"
11
  - "lucabadiali-ml-ops-project.hf.space:443"
12
  # Se la Space è privata, aggiungi:
13
  # authorization:
src/app/__pycache__/config.cpython-311.pyc CHANGED
Binary files a/src/app/__pycache__/config.cpython-311.pyc and b/src/app/__pycache__/config.cpython-311.pyc differ
 
src/app/app.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI, HTTPException
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
- from .utils import preprocess
4
  from scipy.special import softmax
5
  import numpy as np
6
  from pydantic import BaseModel
@@ -9,13 +9,57 @@ import csv
9
  import requests
10
  from typing import Union, List
11
  import torch
12
- from .config import MODEL_SOURCE, ModelSource, load_model_and_tokenizer
13
  from prometheus_fastapi_instrumentator import Instrumentator
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  app = FastAPI()
17
  Instrumentator().instrument(app).expose(app, endpoint="/metrics", include_in_schema=False)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  class SentimentQuery(BaseModel):
21
  input_texts: Union[str, List[str]]
@@ -27,6 +71,7 @@ with urllib.request.urlopen(mapping_link) as f:
27
  labels = [row[1] for row in csvreader if len(row) > 1]
28
 
29
  tokenizer, model = load_model_and_tokenizer(MODEL_SOURCE)
 
30
 
31
  @app.get("/")
32
  def read_root():
@@ -55,6 +100,11 @@ async def analyze_text(query:SentimentQuery):
55
 
56
  response_body = []
57
  for i,text in enumerate(input_texts):
 
 
 
 
 
58
  response_body.append(
59
  {
60
  "input_text":text,
@@ -73,6 +123,87 @@ async def analyze_text(query:SentimentQuery):
73
  }
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  if __name__ == "__main__":
77
  import uvicorn
78
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
  from fastapi import FastAPI, HTTPException
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ from .utils import preprocess, load_model_and_tokenizer
4
  from scipy.special import softmax
5
  import numpy as np
6
  from pydantic import BaseModel
 
9
  import requests
10
  from typing import Union, List
11
  import torch
12
+ from .config import MODEL_SOURCE, ModelSource
13
  from prometheus_fastapi_instrumentator import Instrumentator
14
 
15
+ ##################
16
+ from prometheus_client import Counter, Gauge
17
+ from apscheduler.schedulers.background import BackgroundScheduler
18
+ from datetime import datetime
19
+ import os
20
+ import random
21
+ import pandas as pd
22
+ #################
23
+
24
+
25
+ #############
26
+ from .config import EVAL_BATCH_SIZE, N_SAMPLES, DATASET_PATH, EVAL_PERIOD_MIN
27
+ from .utils import load_dataset
28
+ ###########
29
 
30
  app = FastAPI()
31
  Instrumentator().instrument(app).expose(app, endpoint="/metrics", include_in_schema=False)
32
 
33
+ ###################
34
+ # ---------- Metrics (custom) ----------
35
+ # Production predictions distribution (unlabeled)
36
+ # PRED_COUNTER = Counter(
37
+ # "sentiment_requests_total",
38
+ # "Total predictions served by label",
39
+ # ["label"]
40
+ # )
41
+
42
+
43
+
44
+ # EVAL_SAMPLE_SIZE = Gauge(
45
+ # "model_evaluation_sample_size",
46
+ # "Number of samples used in the latest periodic evaluation"
47
+ # )
48
+ # EVAL_COUNTER_DIST = Counter(
49
+ # "sentiment_test_distribution_total",
50
+ # "Cumulative predicted label counts on evaluation samples",
51
+ # ["label"]
52
+ # )
53
+ # EVAL_RUNS = Counter(
54
+ # "model_evaluations_total",
55
+ # "Total number of evaluation runs completed"
56
+ # )
57
+ ##################
58
+
59
+
60
+
61
+
62
+
63
 
64
  class SentimentQuery(BaseModel):
65
  input_texts: Union[str, List[str]]
 
71
  labels = [row[1] for row in csvreader if len(row) > 1]
72
 
73
  tokenizer, model = load_model_and_tokenizer(MODEL_SOURCE)
74
+ model.eval()
75
 
76
  @app.get("/")
77
  def read_root():
 
100
 
101
  response_body = []
102
  for i,text in enumerate(input_texts):
103
+
104
+ predicted = labels[pred_labels[i]]
105
+ #PRED_COUNTER.labels(label=predicted).inc()
106
+
107
+
108
  response_body.append(
109
  {
110
  "input_text":text,
 
123
  }
124
 
125
 
126
+
127
+ def evaluate_accuracy():
128
+ dataset = load_dataset(DATASET_PATH).shuffle()["test"][:N_SAMPLES]
129
+ N_BATCHES = len(dataset["text"])//EVAL_BATCH_SIZE
130
+
131
+ accuracy = 0
132
+ for i in range(N_BATCHES+1):
133
+ if i == N_BATCHES :
134
+ samples, labels = dataset["text"][i*EVAL_BATCH_SIZE:], dataset["label"][i*EVAL_BATCH_SIZE:]
135
+ else:
136
+ samples, labels = dataset["text"][i*EVAL_BATCH_SIZE:(i+1)*EVAL_BATCH_SIZE], dataset["label"][i*EVAL_BATCH_SIZE:(i+1)*EVAL_BATCH_SIZE]
137
+
138
+ model.eval()
139
+ encoded_batch = tokenizer(
140
+ [preprocess(t) for t in samples],
141
+ padding=True, # pad to same length
142
+ truncation=True, # truncate long texts
143
+ return_tensors="pt",
144
+ )
145
+
146
+ with torch.no_grad():
147
+ output = model(**encoded_batch)
148
+
149
+ logits = output[0].detach().cpu().numpy()
150
+ scores = softmax(logits, axis=-1)
151
+ pred_labels = scores.argmax(axis=-1)
152
+ accuracy += sum(pred_labels==labels)
153
+ accuracy/=N_SAMPLES
154
+ return accuracy
155
+
156
+
157
+ # Evaluation metrics (labeled test set)
158
+ EVAL_ACCURACY = Gauge(
159
+ "model_evaluation_accuracy",
160
+ "Accuracy on latest periodic evaluation of labeled test subset"
161
+ )
162
+
163
+ from apscheduler.schedulers.background import BackgroundScheduler
164
+ from datetime import datetime, timedelta
165
+ import threading
166
+
167
+ _model_lock = threading.Lock()
168
+
169
+ def _run_eval_and_set_gauge():
170
+ # If you expect concurrent requests to /predict, the lock prevents GPU/torch contention
171
+ with _model_lock:
172
+ acc = evaluate_accuracy()
173
+ EVAL_ACCURACY.set(acc)
174
+
175
+
176
+ scheduler = BackgroundScheduler(daemon=True)
177
+
178
+ @app.on_event("startup")
179
+ def _start_scheduler():
180
+ # run once soon after startup
181
+ scheduler.add_job(_run_eval_and_set_gauge, next_run_time=datetime.now() + timedelta(seconds=2))
182
+ # then every EVAL_PERIOD_MIN minutes
183
+ scheduler.add_job(_run_eval_and_set_gauge, "interval", minutes=EVAL_PERIOD_MIN)
184
+ scheduler.start()
185
+
186
+ @app.on_event("shutdown")
187
+ def _stop_scheduler():
188
+ scheduler.shutdown(wait=False)
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
  if __name__ == "__main__":
208
  import uvicorn
209
  uvicorn.run(app, host="0.0.0.0", port=8000)
src/app/config.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  from enum import Enum
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  from pathlib import Path
5
 
6
 
@@ -10,15 +9,25 @@ class ModelSource(str, Enum):
10
 
11
  MODEL_SOURCE = ModelSource(os.getenv("MODEL_SOURCE", "hf"))
12
  HF_MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
 
13
 
14
 
15
- def load_model_and_tokenizer(MODEL_SOURCE):
16
- if MODEL_SOURCE == ModelSource.HF: # use the latest model available in the HF hub
17
- tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
18
- model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL)
19
- else: # use a locally fine tuned model
20
- local_model_path = Path("models/saved_model")
21
- assert local_model_path.exists(), """No local model was found. Run 'python3 src/train_model.py'"""
22
- tokenizer = AutoTokenizer.from_pretrained("models/saved_tokenizer")
23
- model = AutoModelForSequenceClassification.from_pretrained("models/saved_model")
24
- return tokenizer, model
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from enum import Enum
 
3
  from pathlib import Path
4
 
5
 
 
9
 
10
  MODEL_SOURCE = ModelSource(os.getenv("MODEL_SOURCE", "hf"))
11
  HF_MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
12
+ DATASET_PATH = Path("data/dataset")
13
 
14
 
15
+ EVAL_SAMPLE_SIZE = int(os.getenv("EVAL_SAMPLE_SIZE", "80"))
16
+ EVAL_INTERVAL_HOURS = float(os.getenv("EVAL_INTERVAL_HOURS", "1"))
17
+ RANDOM_SEED = int(os.getenv("RANDOM_SEED", "42"))
18
+
19
+ EVAL_BATCH_SIZE = 64
20
+ N_SAMPLES = 500
21
+ EVAL_PERIOD_MIN = 1
22
+
23
+
24
+ # def load_model_and_tokenizer(MODEL_SOURCE):
25
+ # if MODEL_SOURCE == ModelSource.HF: # use the latest model available in the HF hub
26
+ # tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
27
+ # model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL)
28
+ # else: # use a locally fine tuned model
29
+ # local_model_path = Path("models/saved_model")
30
+ # assert local_model_path.exists(), """No local model was found. Run 'python3 src/train_model.py' first"""
31
+ # tokenizer = AutoTokenizer.from_pretrained("models/saved_tokenizer")
32
+ # model = AutoModelForSequenceClassification.from_pretrained("models/saved_model")
33
+ # return tokenizer, model
src/app/utils.py CHANGED
@@ -1,7 +1,35 @@
 
 
 
 
 
 
 
1
  def preprocess(text):
2
  new_text = []
3
  for t in text.split(" "):
4
  t = '@user' if t.startswith('@') and len(t) > 1 else t
5
  t = 'http' if t.startswith('http') else t
6
  new_text.append(t)
7
- return " ".join(new_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from .config import ModelSource, HF_MODEL
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ from datasets import load_dataset as hf_load_dataset
5
+ from datasets import load_from_disk
6
+
7
+
8
  def preprocess(text):
9
  new_text = []
10
  for t in text.split(" "):
11
  t = '@user' if t.startswith('@') and len(t) > 1 else t
12
  t = 'http' if t.startswith('http') else t
13
  new_text.append(t)
14
+ return " ".join(new_text)
15
+
16
+
17
+
18
+ def load_model_and_tokenizer(MODEL_SOURCE):
19
+ if MODEL_SOURCE == ModelSource.HF: # use the latest model available in the HF hub
20
+ tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
21
+ model = AutoModelForSequenceClassification.from_pretrained(HF_MODEL)
22
+ else: # use a locally fine tuned model
23
+ local_model_path = Path("models/saved_model")
24
+ assert local_model_path.exists(), """No local model was found. Run 'python3 src/train_model.py' first"""
25
+ tokenizer = AutoTokenizer.from_pretrained("models/saved_tokenizer")
26
+ model = AutoModelForSequenceClassification.from_pretrained("models/saved_model")
27
+ return tokenizer, model
28
+
29
+
30
+ def load_dataset(dataset_path):
31
+ if dataset_path.exists():
32
+ dataset = load_from_disk(dataset_path)
33
+ else:
34
+ dataset = hf_load_dataset('tweet_eval', 'sentiment')
35
+ return dataset
src/nb.ipynb ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 48,
6
+ "id": "7aaceacb",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from pathlib import Path\n",
11
+ "from app.config import DATASET_PATH, MODEL_SOURCE\n",
12
+ "from app.utils import load_dataset, load_model_and_tokenizer, preprocess\n",
13
+ "from scipy.special import softmax\n"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 49,
19
+ "id": "7defab3e",
20
+ "metadata": {},
21
+ "outputs": [
22
+ {
23
+ "name": "stderr",
24
+ "output_type": "stream",
25
+ "text": [
26
+ "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
27
+ "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
28
+ "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
29
+ ]
30
+ }
31
+ ],
32
+ "source": [
33
+ "tokenizer, model = load_model_and_tokenizer(MODEL_SOURCE)\n"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 24,
39
+ "id": "0a1dcfdd",
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "dataset = load_dataset(DATASET_PATH).shuffle()[\"test\"][:N_SAMPLES]\n"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 33,
49
+ "id": "501e6728",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "import torch"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 47,
59
+ "id": "82b25de1",
60
+ "metadata": {},
61
+ "outputs": [
62
+ {
63
+ "data": {
64
+ "text/plain": [
65
+ "2"
66
+ ]
67
+ },
68
+ "execution_count": 47,
69
+ "metadata": {},
70
+ "output_type": "execute_result"
71
+ }
72
+ ],
73
+ "source": [
74
+ "N_BEVAL_BATCH_SIZE = 64\n",
75
+ "N_SAMPLES = 500\n",
76
+ "N_BATCHES = len(dataset[\"text\"])//EVAL_BATCH_SIZE\n",
77
+ "N_BATCHES"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 54,
83
+ "id": "7dd5371b",
84
+ "metadata": {},
85
+ "outputs": [
86
+ {
87
+ "name": "stdout",
88
+ "output_type": "stream",
89
+ "text": [
90
+ "0 64\n",
91
+ "64 128\n",
92
+ "128 192\n",
93
+ "192 256\n",
94
+ "256 320\n",
95
+ "320 384\n",
96
+ "384 448\n",
97
+ "448 500\n"
98
+ ]
99
+ },
100
+ {
101
+ "data": {
102
+ "text/plain": [
103
+ "np.float64(0.71)"
104
+ ]
105
+ },
106
+ "execution_count": 54,
107
+ "metadata": {},
108
+ "output_type": "execute_result"
109
+ }
110
+ ],
111
+ "source": [
112
+ "EVAL_BATCH_SIZE = 64\n",
113
+ "N_SAMPLES = 500\n",
114
+ "def evaluate_accuracy():\n",
115
+ "\n",
116
+ " dataset = load_dataset(DATASET_PATH).shuffle()[\"test\"][:N_SAMPLES]\n",
117
+ " N_BATCHES = len(dataset[\"text\"])//EVAL_BATCH_SIZE\n",
118
+ "\n",
119
+ " accuracy = 0\n",
120
+ " for i in range(N_BATCHES+1):\n",
121
+ "\n",
122
+ " start = i*EVAL_BATCH_SIZE\n",
123
+ " end = min(N_SAMPLES, (i+1)*EVAL_BATCH_SIZE)\n",
124
+ " print(start, end)\n",
125
+ " samples, labels = dataset[\"text\"][start:end], dataset[\"label\"][start:end]\n",
126
+ " \n",
127
+ " model.eval()\n",
128
+ " encoded_batch = tokenizer(\n",
129
+ " [preprocess(t) for t in samples],\n",
130
+ " padding=True, # pad to same length\n",
131
+ " truncation=True, # truncate long texts\n",
132
+ " return_tensors=\"pt\",\n",
133
+ " )\n",
134
+ "\n",
135
+ " with torch.no_grad():\n",
136
+ " output = model(**encoded_batch)\n",
137
+ " \n",
138
+ " logits = output[0].detach().cpu().numpy()\n",
139
+ " scores = softmax(logits, axis=-1)\n",
140
+ " pred_labels = scores.argmax(axis=-1)\n",
141
+ " accuracy += sum(pred_labels==labels)\n",
142
+ " accuracy/=N_SAMPLES\n",
143
+ " return accuracy\n",
144
+ "evaluate_accuracy()"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": 1,
150
+ "id": "dbd3bb8c",
151
+ "metadata": {},
152
+ "outputs": [],
153
+ "source": [
154
+ "def _load_test_data():\n",
155
+ " \"\"\"\n",
156
+ " Expects CSV with columns: text,label\n",
157
+ " label values must be one of labels (negative, neutral, positive) or their indices (0,1,2).\n",
158
+ " \"\"\"\n",
159
+ " df = pd.read_csv(TEST_DATA_PATH)\n",
160
+ " # normalize label column to strings matching our 'labels' list\n",
161
+ " if np.issubdtype(df[\"label\"].dtype, np.number):\n",
162
+ " df[\"label\"] = df[\"label\"].astype(int).map(lambda i: labels[i])\n",
163
+ " else:\n",
164
+ " df[\"label\"] = df[\"label\"].str.lower().str.strip()\n",
165
+ " # keep only supported labels\n",
166
+ " df = df[df[\"label\"].isin(labels)].dropna(subset=[\"text\", \"label\"])\n",
167
+ " return df"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": null,
173
+ "id": "ec0b086e",
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": []
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "id": "800c8018",
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": []
185
+ }
186
+ ],
187
+ "metadata": {
188
+ "kernelspec": {
189
+ "display_name": "ProjectEnv",
190
+ "language": "python",
191
+ "name": "python3"
192
+ },
193
+ "language_info": {
194
+ "codemirror_mode": {
195
+ "name": "ipython",
196
+ "version": 3
197
+ },
198
+ "file_extension": ".py",
199
+ "mimetype": "text/x-python",
200
+ "name": "python",
201
+ "nbconvert_exporter": "python",
202
+ "pygments_lexer": "ipython3",
203
+ "version": "3.11.10"
204
+ }
205
+ },
206
+ "nbformat": 4,
207
+ "nbformat_minor": 5
208
+ }
src/train_model.py CHANGED
@@ -1,4 +1,4 @@
1
- from app.utils import preprocess
2
  import urllib
3
  import csv
4
  import os
@@ -8,8 +8,8 @@ from transformers import (
8
  TrainingArguments, Trainer, EarlyStoppingCallback,
9
  DataCollatorWithPadding
10
  )
11
- from datasets import load_dataset,load_from_disk
12
  from pathlib import Path
 
13
 
14
 
15
  # --- Device detection ---
@@ -73,14 +73,8 @@ model.gradient_checkpointing_enable()
73
  model.config.use_cache = False
74
 
75
  #### DATASET LOADING
76
-
77
-
78
- dataset_path = Path("data/dataset")
79
- if dataset_path.exists():
80
- dataset = load_from_disk(dataset_path)
81
- else:
82
- dataset = load_dataset('tweet_eval', 'sentiment')
83
-
84
 
85
 
86
  # ---- COPY-PASTE FROM HERE ----
 
1
+ from app.utils import preprocess, load_dataset
2
  import urllib
3
  import csv
4
  import os
 
8
  TrainingArguments, Trainer, EarlyStoppingCallback,
9
  DataCollatorWithPadding
10
  )
 
11
  from pathlib import Path
12
+ from app.config import DATASET_PATH
13
 
14
 
15
  # --- Device detection ---
 
73
  model.config.use_cache = False
74
 
75
  #### DATASET LOADING
76
+
77
+ dataset = load_dataset(DATASET_PATH)
 
 
 
 
 
 
78
 
79
 
80
  # ---- COPY-PASTE FROM HERE ----