| | import matplotlib.pyplot as plt |
| | import numpy as np |
| |
|
| | |
| | fig, ax = plt.subplots(figsize=(12, 7)) |
| |
|
| | |
| | categories = ['v4.57.6\ndevice_map=auto\nThreadpool', |
| | 'v4.57.6\ndevice_map=auto\nNormal', |
| | 'v4.57.6\nTP', |
| | 'v5\ndevice_map=auto\nAsync', |
| | 'v5\ndevice_map=auto\nSync', |
| | 'v5\nTP\nAsync', |
| | 'v5\nTP\nSync'] |
| |
|
| | times = [66.24, 67.29, np.nan, 20.71, 45.3, 10.1, 19.28] |
| | colors = ['#3498db', '#2980b9', '#e74c3c', '#2ecc71', '#27ae60', '#f39c12', '#e67e22'] |
| |
|
| | |
| | x_pos = np.arange(len(categories)) |
| |
|
| | |
| | bars = ax.bar(x_pos, times, color=colors, alpha=0.8, edgecolor='black', linewidth=1.2) |
| |
|
| | |
| | for i, (bar, time) in enumerate(zip(bars, times)): |
| | if np.isnan(time): |
| | |
| | ax.text(bar.get_x() + bar.get_width()/2, 5, 'OOM', |
| | ha='center', va='bottom', fontsize=12, fontweight='bold', color='red') |
| | else: |
| | ax.text(bar.get_x() + bar.get_width()/2, time + 1.5, f'{time}s', |
| | ha='center', va='bottom', fontsize=10, fontweight='bold') |
| |
|
| | |
| | ax.set_xlabel('Configuration', fontsize=12, fontweight='bold') |
| | ax.set_ylabel('Loading Time (seconds)', fontsize=12, fontweight='bold') |
| | ax.set_title('Model Loading Benchmark: Qwen/Qwen1.5-110B-Chat\nGPU: 1x A100 (80 GB)', |
| | fontsize=14, fontweight='bold', pad=20) |
| |
|
| | ax.set_xticks(x_pos) |
| | ax.set_xticklabels(categories, fontsize=9, ha='center') |
| | ax.set_ylim(0, max([t for t in times if not np.isnan(t)]) * 1.15) |
| |
|
| | |
| | ax.yaxis.grid(True, linestyle='--', alpha=0.3) |
| | ax.set_axisbelow(True) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | plt.tight_layout() |
| | plt.savefig('loading_benchmark.png', dpi=300, bbox_inches='tight') |
| | plt.show() |
| |
|
| | print("Plot saved as 'loading_benchmark.png'") |
| |
|