Hch Li commited on
Commit
7776235
·
1 Parent(s): 245fe03

try remote test

Browse files
__pycache__/about_content.cpython-310.pyc CHANGED
Binary files a/__pycache__/about_content.cpython-310.pyc and b/__pycache__/about_content.cpython-310.pyc differ
 
about_content.py CHANGED
@@ -8,7 +8,7 @@ This application is a demonstration of KV Cache Benchmarking. It allows users to
8
  #### Features:
9
  - Interactive filtering by model types and datasets
10
  - Real-time updates of benchmark results
11
- - Visualization of Quality and TTFT metrics
12
 
13
  #### Contact:
14
  For more information, please contact us at [email@example.com](mailto:email@example.com).
 
8
  #### Features:
9
  - Interactive filtering by model types and datasets
10
  - Real-time updates of benchmark results
11
+ - Visualization of Quality and TTFT (s) metrics
12
 
13
  #### Contact:
14
  For more information, please contact us at [email@example.com](mailto:email@example.com).
app.py CHANGED
@@ -36,34 +36,46 @@ def filter_and_display(selected_columns, model_types, datasets, stage):
36
  if not filtered.empty:
37
  # Adjust aggregation based on stage
38
  if stage == "decode":
39
- filtered = filtered.groupby(["Method", "Model"], as_index=False).agg({
40
  "Throughput (token/s)": "mean",
41
  "Quality": "mean",
42
  "Link": "first"
43
  })
44
  else:
45
- filtered = filtered.groupby(["Method", "Model"], as_index=False).agg({
46
  "Quality": "mean",
47
- "TTFT": "mean",
48
  "Link": "first"
49
  })
50
 
51
  # Select columns to display
52
- display_columns = ["Method", "Model"] + [col for col in selected_columns if col in filtered.columns]
53
  return filtered[display_columns] if not filtered.empty else pd.DataFrame(columns=display_columns)
54
 
55
  def create_prefill_visualization(filtered_data):
56
  if filtered_data.empty:
57
  return None
58
- fig = px.bar(filtered_data, x='Model', y='Quality', color='Method', barmode='group',
59
- title='Prefill Stage: Quality by Model and Method')
 
 
 
 
 
 
60
  return fig
61
 
62
  def create_decode_visualization(filtered_data):
63
  if filtered_data.empty:
64
  return None
65
- fig = px.bar(filtered_data, x='Model', y='Throughput (token/s)', color='Method', barmode='group',
66
- title='Decode Stage: Throughput by Model and Method')
 
 
 
 
 
 
67
  return fig
68
 
69
  # Load the data from the /data folder
@@ -77,21 +89,21 @@ def create_gradio_app():
77
  gr.Markdown(
78
  """# KV Cache Benchmark
79
  ### Demo leaderboard
80
- This demo leaderboard allows users to explore and compare different KV cache implementations across various models and datasets. It provides interactive filtering options and real-time updates of benchmark results, including visualization of Quality and TTFT metrics.
81
  """)
82
 
83
  with gr.Tabs():
84
  with gr.TabItem("KV Cache Benchmark"):
85
  # Prefill-stage selection
86
  with gr.Row():
87
- gr.Markdown("## Prefill-stage Selection")
88
  with gr.Row():
89
  with gr.Column():
90
  gr.Markdown("#### Select Columns to Display")
91
  prefill_columns_to_display = gr.CheckboxGroup(
92
- choices=["Quality", "TTFT", "Link"],
93
  label="Columns",
94
- value=["Quality", "TTFT"]
95
  )
96
 
97
  with gr.Column():
@@ -112,18 +124,31 @@ This demo leaderboard allows users to explore and compare different KV cache imp
112
 
113
  # Prefill-stage compression results
114
  with gr.Row():
115
- gr.Markdown("## Prefill-stage Compression Results")
116
 
117
- prefill_results = gr.Dataframe(value=filter_and_display(["Quality", "TTFT"], list(data["Model"].unique()), list(data["Dataset"].unique()), "prefill"), headers=["Method", "Model", "Quality", "TTFT", "Link"])
 
 
 
 
 
 
 
 
 
118
 
119
- # Prefill-stage visualization
 
120
  with gr.Row():
121
  gr.Markdown("### Prefill-stage Visualization")
122
- prefill_plot = gr.Plot(value=create_prefill_visualization(filter_and_display(["Quality"], list(data["Model"].unique()), list(data["Dataset"].unique()), "prefill")))
 
 
 
123
 
124
  # Decode-stage selection
125
  with gr.Row():
126
- gr.Markdown("## Decode-stage Selection")
127
  with gr.Row():
128
  with gr.Column():
129
  gr.Markdown("#### Select Columns to Display")
@@ -151,46 +176,88 @@ This demo leaderboard allows users to explore and compare different KV cache imp
151
 
152
  # Decode-stage compression results
153
  with gr.Row():
154
- gr.Markdown("## Decode-stage Compression Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- decode_results = gr.Dataframe(value=filter_and_display(["Throughput (token/s)", "Quality"], list(data["Model"].unique()), list(data["Dataset"].unique()), "decode"), headers=["Method", "Model", "Throughput (token/s)", "Quality", "Link"])
157
 
158
- # Decode-stage visualization
159
  with gr.Row():
160
- gr.Markdown("### Decode-stage Visualization")
161
- decode_plot = gr.Plot(value=create_decode_visualization(filter_and_display(["Throughput (token/s)"], list(data["Model"].unique()), list(data["Dataset"].unique()), "decode")))
 
 
 
 
 
 
162
 
163
  def auto_update_prefill(selected_columns, model_types, datasets):
164
  if not model_types or not datasets:
165
- return pd.DataFrame(columns=["Method", "Model"] + selected_columns), None
 
166
  filtered_data = filter_and_display(selected_columns, model_types, datasets, "prefill")
167
- return filtered_data, create_prefill_visualization(filtered_data)
168
 
169
  def auto_update_decode(selected_columns, model_types, datasets):
170
  if not model_types or not datasets:
171
- return pd.DataFrame(columns=["Method", "Model"] + selected_columns), None
 
172
  filtered_data = filter_and_display(selected_columns, model_types, datasets, "decode")
173
- return filtered_data, create_decode_visualization(filtered_data)
174
 
175
- prefill_columns_to_display.change(auto_update_prefill, inputs=[prefill_columns_to_display, prefill_model_types, prefill_datasets], outputs=[prefill_results, prefill_plot])
176
- prefill_model_types.change(auto_update_prefill, inputs=[prefill_columns_to_display, prefill_model_types, prefill_datasets], outputs=[prefill_results, prefill_plot])
177
- prefill_datasets.change(auto_update_prefill, inputs=[prefill_columns_to_display, prefill_model_types, prefill_datasets], outputs=[prefill_results, prefill_plot])
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- decode_columns_to_display.change(auto_update_decode, inputs=[decode_columns_to_display, decode_model_types, decode_datasets], outputs=[decode_results, decode_plot])
180
- decode_model_types.change(auto_update_decode, inputs=[decode_columns_to_display, decode_model_types, decode_datasets], outputs=[decode_results, decode_plot])
181
- decode_datasets.change(auto_update_decode, inputs=[decode_columns_to_display, decode_model_types, decode_datasets], outputs=[decode_results, decode_plot])
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- # Add a reload button
184
- def reload_data():
185
- global data
186
- data = load_data(data_dir)
187
- return (filter_and_display(prefill_columns_to_display.value, prefill_model_types.value, prefill_datasets.value, "prefill"),
188
- filter_and_display(decode_columns_to_display.value, decode_model_types.value, decode_datasets.value, "decode"),
189
- create_prefill_visualization(filter_and_display(prefill_columns_to_display.value, prefill_model_types.value, prefill_datasets.value, "prefill")),
190
- create_decode_visualization(filter_and_display(decode_columns_to_display.value, decode_model_types.value, decode_datasets.value, "decode")))
191
 
192
  reload_button = gr.Button("Reload Data")
193
- reload_button.click(reload_data, outputs=[prefill_results, decode_results, prefill_plot, decode_plot])
 
 
194
 
195
  with gr.TabItem("About"):
196
  gr.Markdown(about_markdown) # Use the imported about page content
@@ -202,4 +269,4 @@ This demo leaderboard allows users to explore and compare different KV cache imp
202
 
203
  if __name__ == "__main__":
204
  app = create_gradio_app()
205
- app.launch()
 
36
  if not filtered.empty:
37
  # Adjust aggregation based on stage
38
  if stage == "decode":
39
+ filtered = filtered.groupby(["Method", "Model", "Dataset"], as_index=False).agg({
40
  "Throughput (token/s)": "mean",
41
  "Quality": "mean",
42
  "Link": "first"
43
  })
44
  else:
45
+ filtered = filtered.groupby(["Method", "Model", "Dataset"], as_index=False).agg({
46
  "Quality": "mean",
47
+ "TTFT (s)": "mean",
48
  "Link": "first"
49
  })
50
 
51
  # Select columns to display
52
+ display_columns = ["Method", "Model", "Dataset"] + [col for col in selected_columns if col in filtered.columns]
53
  return filtered[display_columns] if not filtered.empty else pd.DataFrame(columns=display_columns)
54
 
55
  def create_prefill_visualization(filtered_data):
56
  if filtered_data.empty:
57
  return None
58
+ fig = px.scatter(
59
+ filtered_data,
60
+ x='TTFT (s)',
61
+ y='Quality',
62
+ color='Method',
63
+ hover_data=['Model', 'Dataset'],
64
+ title='Prefill Stage: Quality vs TTFT (s) by Method'
65
+ )
66
  return fig
67
 
68
  def create_decode_visualization(filtered_data):
69
  if filtered_data.empty:
70
  return None
71
+ fig = px.scatter(
72
+ filtered_data,
73
+ x='Throughput (token/s)',
74
+ y='Quality',
75
+ color='Method',
76
+ hover_data=['Model', 'Dataset'],
77
+ title='Decode Stage: Quality vs Throughput by Method'
78
+ )
79
  return fig
80
 
81
  # Load the data from the /data folder
 
89
  gr.Markdown(
90
  """# KV Cache Benchmark
91
  ### Demo leaderboard
92
+ This demo leaderboard allows users to explore and compare different KV cache implementations across various models and datasets. It provides interactive filtering options and real-time updates of benchmark results, including visualization of Quality and TTFT (s) metrics.
93
  """)
94
 
95
  with gr.Tabs():
96
  with gr.TabItem("KV Cache Benchmark"):
97
  # Prefill-stage selection
98
  with gr.Row():
99
+ gr.Markdown("## Prefill-Stage KV Cache Compression")
100
  with gr.Row():
101
  with gr.Column():
102
  gr.Markdown("#### Select Columns to Display")
103
  prefill_columns_to_display = gr.CheckboxGroup(
104
+ choices=["Quality", "TTFT (s)", "Link"],
105
  label="Columns",
106
+ value=["Quality", "TTFT (s)"]
107
  )
108
 
109
  with gr.Column():
 
124
 
125
  # Prefill-stage compression results
126
  with gr.Row():
127
+ gr.Markdown("## Results")
128
 
129
+ # Initialize the Prefill Dataframe with default data
130
+ prefill_default = filter_and_display(
131
+ ["Quality", "TTFT (s)"],
132
+ list(data["Model"].unique()),
133
+ list(data[data["Stage"] == "prefill"]["Dataset"].unique()),
134
+ "prefill"
135
+ )
136
+ prefill_results = gr.Dataframe(
137
+ value=prefill_default
138
+ )
139
 
140
+ # Prefill-stage visualization (Static initially)
141
+
142
  with gr.Row():
143
  gr.Markdown("### Prefill-stage Visualization")
144
+ with gr.Row():
145
+ prefill_plot = gr.Plot(
146
+ value=create_prefill_visualization(prefill_default)
147
+ )
148
 
149
  # Decode-stage selection
150
  with gr.Row():
151
+ gr.Markdown("## Decode-Stage KV Cache Compression")
152
  with gr.Row():
153
  with gr.Column():
154
  gr.Markdown("#### Select Columns to Display")
 
176
 
177
  # Decode-stage compression results
178
  with gr.Row():
179
+ gr.Markdown("## Results")
180
+
181
+ # Initialize the Decode Dataframe with default data
182
+ decode_default = filter_and_display(
183
+ ["Throughput (token/s)", "Quality"],
184
+ list(data["Model"].unique()),
185
+ list(data[data["Stage"] == "decode"]["Dataset"].unique()),
186
+ "decode"
187
+ )
188
+ decode_results = gr.Dataframe(
189
+ value=decode_default
190
+ )
191
+
192
+ # Decode-stage visualization (Static initially)
193
 
 
194
 
 
195
  with gr.Row():
196
+ gr.Markdown("### Decode-Stage Visualization")
197
+ with gr.Row():
198
+ decode_plot = gr.Plot(
199
+ value=create_decode_visualization(decode_default)
200
+ )
201
+
202
+ # AUTO-UPDATE FUNCTIONS:
203
+ # (We only update the DataFrame, NOT the Plot)
204
 
205
  def auto_update_prefill(selected_columns, model_types, datasets):
206
  if not model_types or not datasets:
207
+ # Return an empty DataFrame if no selection is made
208
+ return pd.DataFrame(columns=["Method", "Model"] + selected_columns)
209
  filtered_data = filter_and_display(selected_columns, model_types, datasets, "prefill")
210
+ return filtered_data
211
 
212
  def auto_update_decode(selected_columns, model_types, datasets):
213
  if not model_types or not datasets:
214
+ # Return an empty DataFrame if no selection is made
215
+ return pd.DataFrame(columns=["Method", "Model"] + selected_columns)
216
  filtered_data = filter_and_display(selected_columns, model_types, datasets, "decode")
217
+ return filtered_data
218
 
219
+ # Only update the tables when filters change
220
+ prefill_columns_to_display.change(
221
+ auto_update_prefill,
222
+ inputs=[prefill_columns_to_display, prefill_model_types, prefill_datasets],
223
+ outputs=[prefill_results]
224
+ )
225
+ prefill_model_types.change(
226
+ auto_update_prefill,
227
+ inputs=[prefill_columns_to_display, prefill_model_types, prefill_datasets],
228
+ outputs=[prefill_results]
229
+ )
230
+ prefill_datasets.change(
231
+ auto_update_prefill,
232
+ inputs=[prefill_columns_to_display, prefill_model_types, prefill_datasets],
233
+ outputs=[prefill_results]
234
+ )
235
 
236
+ decode_columns_to_display.change(
237
+ auto_update_decode,
238
+ inputs=[decode_columns_to_display, decode_model_types, decode_datasets],
239
+ outputs=[decode_results]
240
+ )
241
+ decode_model_types.change(
242
+ auto_update_decode,
243
+ inputs=[decode_columns_to_display, decode_model_types, decode_datasets],
244
+ outputs=[decode_results]
245
+ )
246
+ decode_datasets.change(
247
+ auto_update_decode,
248
+ inputs=[decode_columns_to_display, decode_model_types, decode_datasets],
249
+ outputs=[decode_results]
250
+ )
251
 
252
+ # Reload button to restart the whole website
253
+ def reload_website():
254
+ # This function will trigger a page reload using JavaScript
255
+ return gr.JS("window.location.reload();")
 
 
 
 
256
 
257
  reload_button = gr.Button("Reload Data")
258
+ reload_button.click(
259
+ reload_website
260
+ )
261
 
262
  with gr.TabItem("About"):
263
  gr.Markdown(about_markdown) # Use the imported about page content
 
269
 
270
  if __name__ == "__main__":
271
  app = create_gradio_app()
272
+ app.launch()
data/prefill_CacheGen_Mistral-7B-v0.3_NarrativeQA.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "Quality": 29.53,
3
- "TTFT": 2.5,
4
  "Link": "www.google.com"
5
  }
 
1
  {
2
  "Quality": 29.53,
3
+ "TTFT (s)": 2.5,
4
  "Link": "www.google.com"
5
  }
data/prefill_KIVI_Mistral-7B-v0.3_NarrativeQA.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "Quality": 27.27,
3
- "TTFT": 3.3,
4
  "Link": "www.google.com"
5
  }
 
1
  {
2
  "Quality": 27.27,
3
+ "TTFT (s)": 3.3,
4
  "Link": "www.google.com"
5
  }
data/prefill_vLLM_Mistral-7B-v0.3_NarrativeQA.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "Quality": 29.26,
3
- "TTFT": 4.8,
4
  "Link": "www.google.com"
5
  }
 
1
  {
2
  "Quality": 29.26,
3
+ "TTFT (s)": 4.8,
4
  "Link": "www.google.com"
5
  }