lucabadiali commited on
Commit
b603fd0
·
1 Parent(s): f97ec54

Added app testing

Browse files
README.md CHANGED
@@ -1 +1,57 @@
1
- # MLOPS_Project
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MLOPS_Project
2
+
3
+ FASE 1)
4
+ - Riuscire ad allenare un modello
5
+
6
+ FASE 2)
7
+
8
+
9
+ Public Colab notebook (single link)
10
+
11
+ Loads a ready model: cardiffnlp/twitter-roberta-base-sentiment-latest (or -sep2022).
12
+
13
+ Loads a public dataset (e.g., tweet_eval/sentiment).
14
+
15
+ Runs inference + evaluation (accuracy, F1 macro, recall macro).
16
+
17
+ (Optional but easy) light fine-tuning on a fraction of the data (small batch, few epochs).
18
+
19
+ Shows a tiny monitoring demo: aggregate % positive/neutral/negative over a sample and plot a time series (synthetic timestamps are fine).
20
+
21
+ Links to your GitHub repo at the top.
22
+
23
+ Public GitHub repo
24
+
25
+ src/ with:
26
+
27
+ train.py — fine-tuning script (works on CPU/MPS/CUDA; small batch + gradient accumulation).
28
+
29
+ eval.py — evaluate a model checkpoint on validation/test.
30
+
31
+ infer.py — batch inference from CSV/JSONL.
32
+
33
+ app.py — (optional) Gradio mini UI.
34
+
35
+ data_utils.py — your subset functions + tokenization helpers.
36
+
37
+ requirements.txt
38
+
39
+ README.md — how to run locally + what the project does.
40
+
41
+ .github/workflows/ci.yml — CI runs lint + tests + a tiny dry-run of training (e.g., 500 samples, 1 epoch).
42
+
43
+ MODEL_CARD.md — brief model card (data, metrics, limits/bias).
44
+
45
+ tests/test_smoke.py — imports + 10-sample training/eval smoke test.
46
+
47
+ Minimal documentation (in README)
48
+
49
+ Goal: monitor social sentiment for MachineInnovators Inc.
50
+
51
+ Model choice: use pre-trained RoBERTa; FastText kept as optional baseline.
52
+
53
+ Pipeline overview: data → tokenize → (optional fine-tune) → evaluate → artifact save → (optional deploy).
54
+
55
+ How to reproduce: exact commands.
56
+
57
+ Monitoring idea: log predictions; compute daily sentiment mix; simple drift check (distribution shift of logits).
data/load_data.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset, DatasetDict
2
+ from pathlib import Path
3
+
4
+
5
+ DATA_FOLDER_PATH = Path(__file__).resolve().parent
6
+ dataset_path = DATA_FOLDER_PATH / "dataset"
7
+
8
+ # def get_tweet_eval_sentiment() -> DatasetDict:
9
+ # return load_dataset("tweet_eval", "sentiment")
10
+ dataset = load_dataset("tweet_eval", "sentiment")
11
+ dataset.save_to_disk(dataset_path)
nb.ipynb CHANGED
@@ -2,17 +2,25 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "id": "3a03d7b9",
7
  "metadata": {},
8
  "outputs": [
 
 
 
 
 
 
 
 
9
  {
10
  "data": {
11
  "text/plain": [
12
- "device(type='cpu')"
13
  ]
14
  },
15
- "execution_count": 3,
16
  "metadata": {},
17
  "output_type": "execute_result"
18
  }
@@ -28,8 +36,10 @@
28
  "import torch.utils.data as data_utils\n",
29
  "import torch\n",
30
  "\n",
31
- "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
32
- "device\n"
 
 
33
  ]
34
  },
35
  {
@@ -50,7 +60,7 @@
50
  },
51
  {
52
  "cell_type": "code",
53
- "execution_count": 4,
54
  "id": "0b451180",
55
  "metadata": {},
56
  "outputs": [
@@ -60,7 +70,7 @@
60
  "['negative', 'neutral', 'positive']"
61
  ]
62
  },
63
- "execution_count": 4,
64
  "metadata": {},
65
  "output_type": "execute_result"
66
  }
@@ -79,103 +89,25 @@
79
  },
80
  {
81
  "cell_type": "code",
82
- "execution_count": 66,
83
- "id": "ede5d09e",
84
  "metadata": {},
85
  "outputs": [
86
  {
87
  "name": "stderr",
88
  "output_type": "stream",
89
  "text": [
90
- "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n",
91
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
 
92
  ]
93
  }
94
  ],
95
  "source": [
96
- "MODEL = \"FacebookAI/roberta-base\"\n",
97
- "model = RobertaForSequenceClassification.from_pretrained(\n",
98
- " MODEL, num_labels=3, problem_type=\"multi_label_classification\")\n",
99
- "tokenizer = AutoTokenizer.from_pretrained(MODEL)"
100
- ]
101
- },
102
- {
103
- "cell_type": "code",
104
- "execution_count": 92,
105
- "id": "c4bafe30",
106
- "metadata": {},
107
- "outputs": [],
108
- "source": [
109
- "train_text_file = \"train_text.txt\"\n",
110
- "with open(train_text_file, \"r\") as f:\n",
111
- " texts = f.readlines()\n",
112
- "\n",
113
- "train_label_file = \"train_labels.txt\"\n",
114
- "with open(train_label_file, \"r\") as f:\n",
115
- " labels = f.readlines()\n",
116
  "\n",
117
- "len(texts), len(labels)\n",
118
- "\n",
119
- "texts, labels = texts[:100], labels[:100]\n",
120
- "\n"
121
- ]
122
- },
123
- {
124
- "cell_type": "code",
125
- "execution_count": null,
126
- "id": "87030ba1",
127
- "metadata": {},
128
- "outputs": [
129
- {
130
- "data": {
131
- "text/plain": [
132
- "(2, 100)"
133
- ]
134
- },
135
- "execution_count": 93,
136
- "metadata": {},
137
- "output_type": "execute_result"
138
- }
139
- ],
140
- "source": [
141
- "encoded_inputs = tokenizer([ preprocess(t.strip()) for t in texts], return_tensors='pt', padding=True,\n",
142
- " truncation=True)\n",
143
- "labels = [int(labels[i].strip()) for i in range(len(labels))]\n",
144
- "labels = torch.tensor(labels, dtype=torch.int)\n",
145
- "len(encoded_inputs), len(labels)"
146
- ]
147
- },
148
- {
149
- "cell_type": "code",
150
- "execution_count": 94,
151
- "id": "e9548356",
152
- "metadata": {},
153
- "outputs": [],
154
- "source": [
155
- "dataset = data_utils.TensorDataset(encoded_inputs[\"input_ids\"], encoded_inputs[\"attention_mask\"], labels)\n",
156
- "test_dataloader = data_utils.DataLoader(dataset, batch_size=10, shuffle=True)\n",
157
- "\n"
158
- ]
159
- },
160
- {
161
- "cell_type": "code",
162
- "execution_count": null,
163
- "id": "2f40f7fd",
164
- "metadata": {},
165
- "outputs": [],
166
- "source": []
167
- },
168
- {
169
- "cell_type": "code",
170
- "execution_count": null,
171
- "id": "08435697",
172
- "metadata": {},
173
- "outputs": [],
174
- "source": [
175
- "\n",
176
- "model = AutoModelForSequenceClassification.from_pretrained(MODEL)\n",
177
  "tokenizer = AutoTokenizer.from_pretrained(MODEL)\n",
178
- "text = \"Good night 😊\"\n",
179
  "text = preprocess(text)\n",
180
  "encoded_input = tokenizer(text, return_tensors='pt')\n",
181
  "output = model(**encoded_input)\n",
@@ -185,7 +117,7 @@
185
  },
186
  {
187
  "cell_type": "code",
188
- "execution_count": 10,
189
  "id": "cf6dfc8f",
190
  "metadata": {},
191
  "outputs": [
@@ -193,9 +125,9 @@
193
  "name": "stdout",
194
  "output_type": "stream",
195
  "text": [
196
- "1) positive 0.8466\n",
197
- "2) neutral 0.1458\n",
198
- "3) negative 0.0076\n"
199
  ]
200
  }
201
  ],
@@ -218,7 +150,7 @@
218
  },
219
  {
220
  "cell_type": "code",
221
- "execution_count": 3,
222
  "id": "0a6382f4",
223
  "metadata": {},
224
  "outputs": [],
@@ -232,7 +164,7 @@
232
  "from datasets import load_dataset, concatenate_datasets\n",
233
  "\n",
234
  "def tokenize_function(examples):\n",
235
- " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n",
236
  "\n",
237
  "def compute_metrics(eval_pred):\n",
238
  " logits, labels = eval_pred\n",
@@ -247,110 +179,412 @@
247
  },
248
  {
249
  "cell_type": "code",
250
- "execution_count": 5,
251
- "id": "dafaf26d",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  "metadata": {},
253
  "outputs": [
254
  {
255
  "name": "stderr",
256
  "output_type": "stream",
257
  "text": [
258
- "Map: 100%|██████████| 45615/45615 [00:10<00:00, 4346.61 examples/s]\n",
259
- "Map: 100%|██████████| 12284/12284 [00:03<00:00, 3758.76 examples/s]\n",
260
- "Map: 100%|██████████| 2000/2000 [00:00<00:00, 4820.04 examples/s]\n",
261
  "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sep2022 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n",
262
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
263
- "/workspaces/MLOPS_Project/Env/lib/python3.12/site-packages/torch/utils/data/dataloader.py:668: UserWarning: 'pin_memory' argument is set as true but no accelerator is found, then device pinned memory won't be used.\n",
264
- " warnings.warn(warn_msg)\n"
265
  ]
266
  },
267
  {
268
- "ename": "",
269
- "evalue": "",
270
- "output_type": "error",
271
- "traceback": [
272
- "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
273
- "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
274
- "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
275
- "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
276
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  }
278
  ],
279
  "source": [
280
- "MODEL_NAME = 'cardiffnlp/twitter-roberta-base-sep2022' # change to desired model from the hub\n",
281
- "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  "tokenized_datasets = dataset.map(tokenize_function, batched=True)\n",
283
  "\n",
284
- "# augment train set with test set, for downstream apps only - DO NOT EVALUATE ON TEST\n",
285
- "# tokenized_datasets['train+test'] = concatenate_datasets([tokenized_datasets['train'],\n",
286
- "# tokenized_datasets['test']])\n",
 
 
 
 
 
 
 
 
 
 
287
  "\n",
288
- "model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)\n",
 
 
 
 
289
  "\n",
 
 
 
290
  "\n",
291
  "training_args = TrainingArguments(\n",
292
- " output_dir=\"test_trainer\",\n",
293
  " learning_rate=1e-5,\n",
294
- " per_device_train_batch_size=16, # modern name\n",
295
- " per_device_eval_batch_size=16, # modern name\n",
296
- " num_train_epochs=10,\n",
 
297
  " weight_decay=0.01,\n",
298
  " warmup_ratio=0.1,\n",
 
299
  "\n",
300
- " eval_strategy=\"epoch\",\n",
301
- " logging_strategy=\"epoch\",\n",
302
- " save_strategy=\"epoch\",\n",
 
 
 
303
  "\n",
304
  " load_best_model_at_end=True,\n",
305
  " metric_for_best_model=\"recall\",\n",
306
  " greater_is_better=True,\n",
 
 
 
 
 
 
 
 
 
 
307
  " report_to=\"none\",\n",
308
  ")\n",
309
  "\n",
310
- "metric = evaluate.load('recall') # default metric for sentiment dataset is recall (macro)\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  "\n",
312
  "trainer = Trainer(\n",
313
  " model=model,\n",
314
  " args=training_args,\n",
315
- " train_dataset=tokenized_datasets['train'],\n",
316
- " eval_dataset=tokenized_datasets['validation'],\n",
317
  " compute_metrics=compute_metrics,\n",
 
 
 
318
  ")\n",
319
  "\n",
 
 
320
  "trainer.train()\n",
321
  "\n",
322
- "trainer.create_model_card()\n",
323
- "trainer.save_model('saved_model')\n",
324
- "\n",
325
- "\n"
 
 
326
  ]
327
  },
328
  {
329
  "cell_type": "code",
330
- "execution_count": 1,
331
- "id": "183032a5",
332
  "metadata": {},
333
  "outputs": [
334
  {
335
- "ename": "NameError",
336
- "evalue": "name 'torch' is not defined",
337
- "output_type": "error",
338
- "traceback": [
339
- "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
340
- "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
341
- "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mtorch\u001b[49m.cuda.is_available()\n",
342
- "\u001b[31mNameError\u001b[39m: name 'torch' is not defined"
343
  ]
344
  }
345
  ],
346
  "source": [
347
- "torch.cuda.is_available()\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  ]
349
  },
350
  {
351
  "cell_type": "code",
352
  "execution_count": null,
353
- "id": "1246f9c6",
354
  "metadata": {},
355
  "outputs": [],
356
  "source": []
@@ -358,7 +592,7 @@
358
  ],
359
  "metadata": {
360
  "kernelspec": {
361
- "display_name": "Env",
362
  "language": "python",
363
  "name": "python3"
364
  },
@@ -372,7 +606,7 @@
372
  "name": "python",
373
  "nbconvert_exporter": "python",
374
  "pygments_lexer": "ipython3",
375
- "version": "3.12.1"
376
  }
377
  },
378
  "nbformat": 4,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "id": "3a03d7b9",
7
  "metadata": {},
8
  "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "/Users/lucabadiali/Desktop/professionAI/modulo9/Project/MLOPS_Project/ProjectEnv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
14
+ " from .autonotebook import tqdm as notebook_tqdm\n"
15
+ ]
16
+ },
17
  {
18
  "data": {
19
  "text/plain": [
20
+ "device(type='mps')"
21
  ]
22
  },
23
+ "execution_count": 1,
24
  "metadata": {},
25
  "output_type": "execute_result"
26
  }
 
36
  "import torch.utils.data as data_utils\n",
37
  "import torch\n",
38
  "\n",
39
+ "\n",
40
+ "device = torch.device(\"mps\" if torch.mps.is_available() else \n",
41
+ " \"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
42
+ "device"
43
  ]
44
  },
45
  {
 
60
  },
61
  {
62
  "cell_type": "code",
63
+ "execution_count": 3,
64
  "id": "0b451180",
65
  "metadata": {},
66
  "outputs": [
 
70
  "['negative', 'neutral', 'positive']"
71
  ]
72
  },
73
+ "execution_count": 3,
74
  "metadata": {},
75
  "output_type": "execute_result"
76
  }
 
89
  },
90
  {
91
  "cell_type": "code",
92
+ "execution_count": 7,
93
+ "id": "08435697",
94
  "metadata": {},
95
  "outputs": [
96
  {
97
  "name": "stderr",
98
  "output_type": "stream",
99
  "text": [
100
+ "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
101
+ "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
102
+ "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
103
  ]
104
  }
105
  ],
106
  "source": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "\n",
108
+ "model = AutoModelForSequenceClassification.from_pretrained(\"cardiffnlp/twitter-roberta-base-sentiment-latest\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  "tokenizer = AutoTokenizer.from_pretrained(MODEL)\n",
110
+ "text = \"today I ate some pasta\"\n",
111
  "text = preprocess(text)\n",
112
  "encoded_input = tokenizer(text, return_tensors='pt')\n",
113
  "output = model(**encoded_input)\n",
 
117
  },
118
  {
119
  "cell_type": "code",
120
+ "execution_count": 17,
121
  "id": "cf6dfc8f",
122
  "metadata": {},
123
  "outputs": [
 
125
  "name": "stdout",
126
  "output_type": "stream",
127
  "text": [
128
+ "1) neutral 0.6674\n",
129
+ "2) positive 0.3132\n",
130
+ "3) negative 0.0194\n"
131
  ]
132
  }
133
  ],
 
150
  },
151
  {
152
  "cell_type": "code",
153
+ "execution_count": 21,
154
  "id": "0a6382f4",
155
  "metadata": {},
156
  "outputs": [],
 
164
  "from datasets import load_dataset, concatenate_datasets\n",
165
  "\n",
166
  "def tokenize_function(examples):\n",
167
+ " return tokenizer(examples[\"text\"], max_length=128, truncation=True)\n",
168
  "\n",
169
  "def compute_metrics(eval_pred):\n",
170
  " logits, labels = eval_pred\n",
 
179
  },
180
  {
181
  "cell_type": "code",
182
+ "execution_count": 22,
183
+ "id": "fcb7fe6d",
184
+ "metadata": {},
185
+ "outputs": [
186
+ {
187
+ "data": {
188
+ "text/plain": [
189
+ "DatasetDict({\n",
190
+ " train: Dataset({\n",
191
+ " features: ['text', 'label'],\n",
192
+ " num_rows: 45615\n",
193
+ " })\n",
194
+ " test: Dataset({\n",
195
+ " features: ['text', 'label'],\n",
196
+ " num_rows: 12284\n",
197
+ " })\n",
198
+ " validation: Dataset({\n",
199
+ " features: ['text', 'label'],\n",
200
+ " num_rows: 2000\n",
201
+ " })\n",
202
+ "})"
203
+ ]
204
+ },
205
+ "execution_count": 22,
206
+ "metadata": {},
207
+ "output_type": "execute_result"
208
+ }
209
+ ],
210
+ "source": [
211
+ "dataset"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 12,
217
+ "id": "0fabaaea",
218
  "metadata": {},
219
  "outputs": [
220
  {
221
  "name": "stderr",
222
  "output_type": "stream",
223
  "text": [
224
+ "`torch_dtype` is deprecated! Use `dtype` instead!\n",
 
 
225
  "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sep2022 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n",
226
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
227
+ "/var/folders/nc/1wpyndzx5ps8nbt0b5zm9jx80000gn/T/ipykernel_2067/3094520460.py:119: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
228
+ " trainer = Trainer(\n"
229
  ]
230
  },
231
  {
232
+ "data": {
233
+ "text/html": [
234
+ "\n",
235
+ " <div>\n",
236
+ " \n",
237
+ " <progress value='858' max='858' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
238
+ " [858/858 21:50, Epoch 3/3]\n",
239
+ " </div>\n",
240
+ " <table border=\"1\" class=\"dataframe\">\n",
241
+ " <thead>\n",
242
+ " <tr style=\"text-align: left;\">\n",
243
+ " <th>Step</th>\n",
244
+ " <th>Training Loss</th>\n",
245
+ " <th>Validation Loss</th>\n",
246
+ " <th>Accuracy</th>\n",
247
+ " <th>F1 Macro</th>\n",
248
+ " <th>Recall</th>\n",
249
+ " </tr>\n",
250
+ " </thead>\n",
251
+ " <tbody>\n",
252
+ " <tr>\n",
253
+ " <td>500</td>\n",
254
+ " <td>0.594100</td>\n",
255
+ " <td>0.668015</td>\n",
256
+ " <td>0.716000</td>\n",
257
+ " <td>0.697675</td>\n",
258
+ " <td>0.702171</td>\n",
259
+ " </tr>\n",
260
+ " </tbody>\n",
261
+ "</table><p>"
262
+ ],
263
+ "text/plain": [
264
+ "<IPython.core.display.HTML object>"
265
+ ]
266
+ },
267
+ "metadata": {},
268
+ "output_type": "display_data"
269
  }
270
  ],
271
  "source": [
272
+ "import os\n",
273
+ "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\" # avoid fork/parallelism warnings on macOS\n",
274
+ "\n",
275
+ "import torch\n",
276
+ "from transformers import (\n",
277
+ " AutoTokenizer, AutoModelForSequenceClassification,\n",
278
+ " TrainingArguments, Trainer, EarlyStoppingCallback,\n",
279
+ " DataCollatorWithPadding\n",
280
+ ")\n",
281
+ "import evaluate\n",
282
+ "\n",
283
+ "# --- Device detection ---\n",
284
+ "if torch.cuda.is_available():\n",
285
+ " device = \"cuda\"\n",
286
+ " use_bf16 = torch.cuda.is_bf16_supported()\n",
287
+ " use_fp16 = not use_bf16\n",
288
+ "elif torch.backends.mps.is_available():\n",
289
+ " device = \"mps\"\n",
290
+ " use_bf16 = False\n",
291
+ " use_fp16 = False\n",
292
+ "else:\n",
293
+ " device = \"cpu\"\n",
294
+ " use_bf16 = False\n",
295
+ " use_fp16 = False\n",
296
+ "\n",
297
+ "MODEL_NAME = \"cardiffnlp/twitter-roberta-base-sep2022\"\n",
298
+ "\n",
299
+ "# --- Tokenizer: keep short max_length to save memory ---\n",
300
+ "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, model_max_length=128)\n",
301
+ "\n",
302
+ "def tokenize_function(batch):\n",
303
+ " return tokenizer(\n",
304
+ " batch[\"text\"],\n",
305
+ " truncation=True,\n",
306
+ " max_length=128,\n",
307
+ " padding=False # we will pad per-batch via DataCollatorWithPadding\n",
308
+ " )\n",
309
+ "\n",
310
+ "# If your dataset column is \"label\", keep it; Trainer can handle it.\n",
311
  "tokenized_datasets = dataset.map(tokenize_function, batched=True)\n",
312
  "\n",
313
+ "# --- Data collator that pads dynamically ---\n",
314
+ "data_collator = DataCollatorWithPadding(\n",
315
+ " tokenizer=tokenizer,\n",
316
+ " pad_to_multiple_of=8 if (device == \"cuda\" and (use_bf16 or use_fp16)) else None\n",
317
+ ")\n",
318
+ "\n",
319
+ "# --- Model dtype choice ---\n",
320
+ "if device == \"cuda\" and use_bf16:\n",
321
+ " load_dtype = torch.bfloat16\n",
322
+ "elif device == \"cuda\" and use_fp16:\n",
323
+ " load_dtype = torch.float16\n",
324
+ "else:\n",
325
+ " load_dtype = torch.float32 # MPS/CPU -> fp32\n",
326
  "\n",
327
+ "model = AutoModelForSequenceClassification.from_pretrained(\n",
328
+ " MODEL_NAME, num_labels=3, torch_dtype=load_dtype\n",
329
+ ")\n",
330
+ "model.gradient_checkpointing_enable()\n",
331
+ "model.config.use_cache = False\n",
332
  "\n",
333
+ "# --- Training args: stop forking on macOS, fix pin_memory ---\n",
334
+ "trainer_fp16 = bool(device == \"cuda\" and use_fp16)\n",
335
+ "trainer_bf16 = bool(device == \"cuda\" and use_bf16)\n",
336
  "\n",
337
  "training_args = TrainingArguments(\n",
338
+ " output_dir=\"artifacts\",\n",
339
  " learning_rate=1e-5,\n",
340
+ " per_device_train_batch_size=4,\n",
341
+ " per_device_eval_batch_size=8,\n",
342
+ " gradient_accumulation_steps=8,\n",
343
+ " num_train_epochs=3,\n",
344
  " weight_decay=0.01,\n",
345
  " warmup_ratio=0.1,\n",
346
+ " lr_scheduler_type=\"linear\",\n",
347
  "\n",
348
+ " eval_strategy=\"steps\",\n",
349
+ " logging_strategy=\"steps\",\n",
350
+ " save_strategy=\"steps\",\n",
351
+ " eval_steps=500,\n",
352
+ " logging_steps=100,\n",
353
+ " save_steps=500,\n",
354
  "\n",
355
  " load_best_model_at_end=True,\n",
356
  " metric_for_best_model=\"recall\",\n",
357
  " greater_is_better=True,\n",
358
+ " save_total_limit=2,\n",
359
+ "\n",
360
+ " # Precision\n",
361
+ " fp16=trainer_fp16,\n",
362
+ " bf16=trainer_bf16,\n",
363
+ "\n",
364
+ " # DataLoader knobs (avoid fork/tokenizers warning on macOS)\n",
365
+ " dataloader_num_workers=0, # <- key for macOS/MPS\n",
366
+ " dataloader_pin_memory=(device == \"cuda\"), # False on MPS/CPU, True on CUDA\n",
367
+ " group_by_length=True,\n",
368
  " report_to=\"none\",\n",
369
  ")\n",
370
  "\n",
371
+ "# --- Metrics (macro recall, etc.) ---\n",
372
+ "recall_metric = evaluate.load(\"recall\")\n",
373
+ "acc_metric = evaluate.load(\"accuracy\")\n",
374
+ "f1_metric = evaluate.load(\"f1\")\n",
375
+ "\n",
376
+ "def compute_metrics(eval_pred):\n",
377
+ " logits, labels = eval_pred\n",
378
+ " preds = logits.argmax(axis=-1)\n",
379
+ " return {\n",
380
+ " \"accuracy\": acc_metric.compute(predictions=preds, references=labels)[\"accuracy\"],\n",
381
+ " \"f1_macro\": f1_metric.compute(predictions=preds, references=labels, average=\"macro\")[\"f1\"],\n",
382
+ " \"recall\": recall_metric.compute(predictions=preds, references=labels, average=\"macro\")[\"recall\"],\n",
383
+ " }\n",
384
+ "\n",
385
+ "callbacks = [EarlyStoppingCallback(early_stopping_patience=2)]\n",
386
+ "\n",
387
+ "small_train = tokenized_datasets[\"train\"].select(range(100))\n",
388
+ "small_eval = tokenized_datasets[\"validation\"].select(range(1))\n",
389
  "\n",
390
  "trainer = Trainer(\n",
391
  " model=model,\n",
392
  " args=training_args,\n",
393
+ " train_dataset= train_ds,\n",
394
+ " eval_dataset= eval_ds,\n",
395
  " compute_metrics=compute_metrics,\n",
396
+ " data_collator=data_collator, # <- important\n",
397
+ " tokenizer=tokenizer,\n",
398
+ " callbacks=callbacks,\n",
399
  ")\n",
400
  "\n",
401
+ "# Optional explicit device move (Trainer usually handles it)\n",
402
+ "model.to(device)\n",
403
  "trainer.train()\n",
404
  "\n",
405
+ "trainer.save_model(\"saved_model\")\n",
406
+ "tokenizer.save_pretrained(\"saved_model\")\n",
407
+ "try:\n",
408
+ " trainer.create_model_card()\n",
409
+ "except Exception:\n",
410
+ " pass\n"
411
  ]
412
  },
413
  {
414
  "cell_type": "code",
415
+ "execution_count": 20,
416
+ "id": "30d9a79b",
417
  "metadata": {},
418
  "outputs": [
419
  {
420
+ "name": "stdout",
421
+ "output_type": "stream",
422
+ "text": [
423
+ "1) neutral 0.7894\n",
424
+ "2) positive 0.1149\n",
425
+ "3) negative 0.0957\n"
 
 
426
  ]
427
  }
428
  ],
429
  "source": [
430
+ "text = \"The second law of thermodynamics is about entropy\"\n",
431
+ "text = preprocess(text)\n",
432
+ "encoded_input = tokenizer(text, return_tensors='pt').to(device)\n",
433
+ "output = model(**encoded_input)\n",
434
+ "scores = output[0][0].detach().cpu().numpy()\n",
435
+ "scores = softmax(scores)\n",
436
+ "ranking = np.argsort(scores)\n",
437
+ "ranking = ranking[::-1]\n",
438
+ "for i in range(scores.shape[0]):\n",
439
+ " l = labels[ranking[i]]\n",
440
+ " s = scores[ranking[i]]\n",
441
+ " print(f\"{i+1}) {l} {np.round(float(s), 4)}\")"
442
+ ]
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "execution_count": 9,
447
+ "id": "c4376c93",
448
+ "metadata": {},
449
+ "outputs": [
450
+ {
451
+ "name": "stderr",
452
+ "output_type": "stream",
453
+ "text": [
454
+ "Map: 100%|██████████| 1000/1000 [00:00<00:00, 10983.68 examples/s]\n"
455
+ ]
456
+ }
457
+ ],
458
+ "source": [
459
+ "# ---- COPY-PASTE FROM HERE ----\n",
460
+ "import os\n",
461
+ "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
462
+ "\n",
463
+ "from datasets import DatasetDict\n",
464
+ "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
465
+ "\n",
466
+ "def make_trainer_ready(\n",
467
+ " raw_ds: DatasetDict,\n",
468
+ " model_name: str = \"cardiffnlp/twitter-roberta-base-sep2022\",\n",
469
+ " train_frac: float = 0.2,\n",
470
+ " val_frac: float = 0.2,\n",
471
+ " seed: int = 42,\n",
472
+ " label_col: str = \"label\",\n",
473
+ " text_col: str = \"text\",\n",
474
+ " max_length: int = 128,\n",
475
+ " pad_to_multiple_of_8_on_cuda: bool = True,\n",
476
+ "):\n",
477
+ " \"\"\"\n",
478
+ " Returns (train_ds, eval_ds, data_collator, tokenizer) ready for HF Trainer.\n",
479
+ " - Ensures there's a validation split (creates one from train if missing).\n",
480
+ " - Takes fractional subsets, stratified by label when possible.\n",
481
+ " - Tokenizes and keeps only the columns Trainer expects.\n",
482
+ " \"\"\"\n",
483
+ " assert 0 < train_frac <= 1.0, \"train_frac must be in (0,1].\"\n",
484
+ " assert 0 < val_frac <= 1.0, \"val_frac must be in (0,1].\"\n",
485
+ " assert text_col in raw_ds[\"train\"].column_names, f\"Missing text column: {text_col}\"\n",
486
+ " assert label_col in raw_ds[\"train\"].column_names, f\"Missing label column: {label_col}\"\n",
487
+ "\n",
488
+ " tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, model_max_length=max_length)\n",
489
+ "\n",
490
+ " # 1) Ensure we have a validation split\n",
491
+ " if \"validation\" not in raw_ds:\n",
492
+ " split = raw_ds[\"train\"].train_test_split(\n",
493
+ " test_size=val_frac,\n",
494
+ " stratify_by_column=label_col if label_col in raw_ds[\"train\"].column_names else None,\n",
495
+ " seed=seed,\n",
496
+ " )\n",
497
+ " raw_ds = DatasetDict(train=split[\"train\"], validation=split[\"test\"])\n",
498
+ " else:\n",
499
+ " raw_ds = DatasetDict(train=raw_ds[\"train\"], validation=raw_ds[\"validation\"])\n",
500
+ "\n",
501
+ " # 2) Take fractions (stratified when possible)\n",
502
+ " def take_frac(ds, frac):\n",
503
+ " if frac >= 1.0: # keep full split\n",
504
+ " return ds\n",
505
+ " out = ds.train_test_split(\n",
506
+ " test_size=1 - frac,\n",
507
+ " stratify_by_column=label_col if label_col in ds.column_names else None,\n",
508
+ " seed=seed,\n",
509
+ " )\n",
510
+ " return out[\"train\"] # the kept fraction\n",
511
+ "\n",
512
+ " small_train = take_frac(raw_ds[\"train\"], train_frac)\n",
513
+ " small_eval = take_frac(raw_ds[\"validation\"], val_frac)\n",
514
+ "\n",
515
+ " # 3) Tokenize (no padding here; we pad per-batch with the collator)\n",
516
+ " def tok(batch):\n",
517
+ " return tokenizer(batch[text_col], truncation=True, max_length=max_length, padding=False)\n",
518
+ "\n",
519
+ " small_train_tok = small_train.map(tok, batched=True, remove_columns=[c for c in small_train.column_names if c not in (text_col, label_col)])\n",
520
+ " small_eval_tok = small_eval.map(tok, batched=True, remove_columns=[c for c in small_eval.column_names if c not in (text_col, label_col)])\n",
521
+ "\n",
522
+ " # 4) Keep only the columns Trainer needs\n",
523
+ " keep_cols = [\"input_ids\", \"attention_mask\", label_col]\n",
524
+ " small_train_tok = small_train_tok.remove_columns([c for c in small_train_tok.column_names if c not in keep_cols])\n",
525
+ " small_eval_tok = small_eval_tok.remove_columns([c for c in small_eval_tok.column_names if c not in keep_cols])\n",
526
+ "\n",
527
+ " # 5) Data collator with dynamic padding (CUDA gets pad_to_multiple_of=8)\n",
528
+ " import torch\n",
529
+ " pad_to_mult = 8 if (pad_to_multiple_of_8_on_cuda and torch.cuda.is_available()) else None\n",
530
+ " data_collator = DataCollatorWithPadding(tokenizer=tokenizer, pad_to_multiple_of=pad_to_mult)\n",
531
+ "\n",
532
+ " return small_train_tok, small_eval_tok, data_collator, tokenizer\n",
533
+ "\n",
534
+ "# ---- USAGE EXAMPLE ----\n",
535
+ "# Assumes you already have `dataset` (a DatasetDict with 'train' (and maybe 'validation')).\n",
536
+ "# Example:\n",
537
+ "# from datasets import load_dataset\n",
538
+ "# dataset = load_dataset(\"tweet_eval\", \"sentiment\")\n",
539
+ "\n",
540
+ "train_ds, eval_ds, data_collator, tokenizer = make_trainer_ready(\n",
541
+ " raw_ds=dataset,\n",
542
+ " model_name=\"cardiffnlp/twitter-roberta-base-sep2022\",\n",
543
+ " train_frac=0.2, # take 20% of train\n",
544
+ " val_frac=0.5, # take 50% of validation\n",
545
+ " seed=42,\n",
546
+ " label_col=\"label\",\n",
547
+ " text_col=\"text\",\n",
548
+ " max_length=128,\n",
549
+ ")\n",
550
+ "\n",
551
+ "# Pass directly to Trainer:\n",
552
+ "# from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
553
+ "# model = AutoModelForSequenceClassification.from_pretrained(\"cardiffnlp/twitter-roberta-base-sep2022\", num_labels=3)\n",
554
+ "# args = TrainingArguments(output_dir=\"out\", per_device_train_batch_size=4, per_device_eval_batch_size=8, evaluation_strategy=\"epoch\", report_to=\"none\")\n",
555
+ "# trainer = Trainer(model=model, args=args, train_dataset=train_ds, eval_dataset=eval_ds, data_collator=data_collator, tokenizer=tokenizer)\n",
556
+ "# trainer.train()\n",
557
+ "# ---- COPY-PASTE UNTIL HERE ----\n"
558
+ ]
559
+ },
560
+ {
561
+ "cell_type": "code",
562
+ "execution_count": 11,
563
+ "id": "12f775be",
564
+ "metadata": {},
565
+ "outputs": [
566
+ {
567
+ "data": {
568
+ "text/plain": [
569
+ "Dataset({\n",
570
+ " features: ['label', 'input_ids', 'attention_mask'],\n",
571
+ " num_rows: 1000\n",
572
+ "})"
573
+ ]
574
+ },
575
+ "execution_count": 11,
576
+ "metadata": {},
577
+ "output_type": "execute_result"
578
+ }
579
+ ],
580
+ "source": [
581
+ "eval_ds"
582
  ]
583
  },
584
  {
585
  "cell_type": "code",
586
  "execution_count": null,
587
+ "id": "f87c7153",
588
  "metadata": {},
589
  "outputs": [],
590
  "source": []
 
592
  ],
593
  "metadata": {
594
  "kernelspec": {
595
+ "display_name": "ProjectEnv",
596
  "language": "python",
597
  "name": "python3"
598
  },
 
606
  "name": "python",
607
  "nbconvert_exporter": "python",
608
  "pygments_lexer": "ipython3",
609
+ "version": "3.11.10"
610
  }
611
  },
612
  "nbformat": 4,
pytest.ini ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [pytest]
2
+ pythonpath = src
src/__pycache__/app.cpython-311.pyc ADDED
Binary file (5 kB). View file
 
src/__pycache__/utils.cpython-311.pyc ADDED
Binary file (785 Bytes). View file
 
src/app/__init__.py ADDED
File without changes
src/app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (198 Bytes). View file
 
src/app/__pycache__/app.cpython-311.pyc ADDED
Binary file (4.53 kB). View file
 
src/app/__pycache__/utils.cpython-311.pyc ADDED
Binary file (789 Bytes). View file
 
src/app/app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ from .utils import preprocess
4
+ from scipy.special import softmax
5
+ import numpy as np
6
+ from pydantic import BaseModel
7
+ import urllib.request
8
+ import csv
9
+ import requests
10
+ from typing import Union, List
11
+ import torch
12
+
13
+
14
+
15
+ app = FastAPI()
16
+
17
+
18
+ class SentimentQuery(BaseModel):
19
+ input_texts: Union[str, List[str]]
20
+
21
+ task='sentiment'
22
+ mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
23
+ with urllib.request.urlopen(mapping_link) as f:
24
+ html = f.read().decode('utf-8').split("\n")
25
+ csvreader = csv.reader(html, delimiter='\t')
26
+ labels = [row[1] for row in csvreader if len(row) > 1]
27
+
28
+ MODEL = f"cardiffnlp/twitter-roberta-base-{task}-latest"
29
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
30
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
31
+
32
+
33
+
34
+ @app.post("/predict")
35
+ async def analyze_text(query:SentimentQuery):
36
+
37
+ if isinstance(query.input_texts, str):
38
+ input_texts = [query.input_texts]
39
+ else: # already a List[str]
40
+ input_texts = query.input_texts
41
+ encoded_batch = tokenizer(
42
+ [preprocess(t) for t in input_texts],
43
+ padding=True, # pad to same length
44
+ truncation=True, # truncate long texts
45
+ return_tensors="pt",
46
+ )
47
+
48
+ with torch.no_grad():
49
+ output = model(**encoded_batch)
50
+
51
+ logits = output[0].detach().cpu().numpy()
52
+ scores = softmax(logits, axis=-1)
53
+ pred_labels = scores.argmax(axis=-1)
54
+
55
+ response_body = []
56
+ for i,text in enumerate(input_texts):
57
+ response_body.append(
58
+ {
59
+ "input_text":text,
60
+ "prediction":labels[pred_labels[i]],
61
+ "scores":
62
+ {
63
+ "negative": float(scores[i][0]),
64
+ "neutral": float(scores[i][1]),
65
+ "positive": float(scores[i][2])
66
+ }
67
+ })
68
+
69
+ return {
70
+ "status" : "successful",
71
+ "response_body": response_body
72
+ }
73
+
74
+
75
+ if __name__ == "__main__":
76
+ import uvicorn
77
+ uvicorn.run(app, host="0.0.0.0", port=8000)
src/app/app_post.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ url = "http://127.0.0.1:8000/predict"
4
+
5
+ data = {
6
+ "input_texts" : [
7
+ "Today I am feeling very happy!!",
8
+ "Today I am not feeling very happy at all!!",
9
+ "Today I am feeling no particular mood."]
10
+ }
11
+
12
+
13
+ response = requests.post(url, json=data)
14
+
15
+ if response.status_code == 200:
16
+ response_json = response.json()
17
+ print(response_json["status"])
18
+ for message in response_json["response_body"]:
19
+ print(message)
20
+
21
+ else:
22
+ print(f"error: {response.status_code} - {response.json()}")
src/app/utils.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def preprocess(text):
2
+ new_text = []
3
+ for t in text.split(" "):
4
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
5
+ t = 'http' if t.startswith('http') else t
6
+ new_text.append(t)
7
+ return " ".join(new_text)
tests/__pycache__/test_app.cpython-311-pytest-9.0.0.pyc ADDED
Binary file (23 kB). View file
 
tests/__pycache__/test_data.cpython-311-pytest-9.0.0.pyc ADDED
Binary file (416 Bytes). View file
 
tests/test_app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.testclient import TestClient
2
+ from app.app import app
3
+
4
+ client = TestClient(app)
5
+
6
+ def test_correct_response_structure():
7
+ data = {
8
+ "input_texts" :
9
+ "Today I am feeling very happy!!"
10
+ }
11
+
12
+ response = client.post("/predict", json = data)
13
+ response_json = response.json()
14
+ assert response.status_code == 200
15
+ assert "status" in response_json.keys()
16
+ assert "response_body" in response_json.keys()
17
+
18
+ response_body = response_json["response_body"][0]
19
+
20
+ assert "input_text" in response_body.keys()
21
+ assert "prediction" in response_body.keys()
22
+ assert "scores" in response_body.keys()
23
+
24
+
25
+ def test_incorrect_response():
26
+ data = {
27
+ "input_texts" :
28
+ 5
29
+ }
30
+ response = client.post("/predict", json = data)
31
+ response_json = response.json()
32
+ assert response.status_code == 422 # validation error by pedantic
33
+
34
+
35
+ def test_single_prediction():
36
+ input_text = "Today I am feeling very happy!!"
37
+ data = {
38
+ "input_texts" : input_text
39
+ }
40
+
41
+ response = client.post("/predict", json = data)
42
+ response_json = response.json()
43
+ assert response.status_code == 200
44
+ assert response_json["status"] == "successful"
45
+
46
+ response_body = response_json["response_body"]
47
+ assert len(response_body) == 1
48
+
49
+ response_body = response_body[0]
50
+ assert response_body["input_text"] == input_text
51
+ assert response_body["prediction"] in ["positive", "negative", "neutral"]
52
+ scores = response_body["scores"]
53
+ assert type(scores)==dict
54
+ assert len(scores)==3
55
+ assert list(scores.keys()) == ["negative", "neutral", "positive"]
56
+ for sentiment in scores.keys():
57
+ assert type(scores[sentiment])==float
58
+
59
+
60
+ def test_multiple_predictions():
61
+ input_texts = ["Today I am feeling very happy!!",
62
+ "Today I am not feeling very happy at all!!",
63
+ "Today I am feeling no particular mood."]
64
+ data = {
65
+ "input_texts" : input_texts
66
+ }
67
+
68
+ response = client.post("/predict", json = data)
69
+ response_json = response.json()
70
+ assert response.status_code == 200
71
+ assert response_json["status"] == "successful"
72
+
73
+ response_body = response_json["response_body"]
74
+ assert len(response_body) == len(input_texts)
75
+
76
+ for i in range(len(response_body)):
77
+ single_response = response_body[i]
78
+ assert single_response["input_text"] == input_texts[i]
79
+ assert single_response["prediction"] in ["positive", "negative", "neutral"]
80
+ scores = single_response["scores"]
81
+ assert type(scores)==dict
82
+ assert len(scores)==3
83
+ assert list(scores.keys()) == ["negative", "neutral", "positive"]
84
+ for sentiment in scores.keys():
85
+ assert type(scores[sentiment])==float
86
+
87
+
88
+
89
+
tests/test_data.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import pytest
2
+
3
+ from datasets import load_dataset
4
+