/** * Model Inspector Component * * Displays detailed architecture information about the loaded model, * including layers, parameters, attention heads, and accessible components. * Makes the "black box" transparent by showing what can be visualized. * * @component */ "use client"; import { useState, useEffect, lazy, Suspense } from "react"; import { getApiUrl } from "@/lib/config"; import { Brain, Layers, Eye, Cpu, Database, GitBranch, Zap, ChevronRight, ChevronDown, Activity, Info, Box, Network, Move3D } from "lucide-react"; // Lazy load the 3D components to avoid SSR issues const ModelArchitecture3D = lazy(() => import('./ModelArchitecture3D')); const DecisionPath3D = lazy(() => import('./DecisionPath3DEnhanced')); interface ModelInfo { name: string; type: string; totalParams: number; layers: number; heads: number; hiddenSize: number; vocabSize: number; maxPositions: number; architecture: string[]; accessible: string[]; } export default function ModelInspector({ hideControlPanel = false }: { hideControlPanel?: boolean }) { const [expandedSections, setExpandedSections] = useState>(new Set(['overview'])); const [isConnected, setIsConnected] = useState(false); const [isLoading, setIsLoading] = useState(true); const [modelInfo, setModelInfo] = useState({ name: "Loading...", type: "unknown", totalParams: 0, layers: 0, heads: 0, hiddenSize: 0, vocabSize: 0, maxPositions: 0, architecture: [], accessible: [] }); const [deviceInfo, setDeviceInfo] = useState(""); const [modelConfig, setModelConfig] = useState | null>(null); // Fetch model information from backend useEffect(() => { const fetchModelInfo = async () => { try { const response = await fetch(`${getApiUrl()}/model/info`); if (response.ok) { const data = await response.json(); setModelInfo({ name: data.name, type: data.type, totalParams: data.totalParams, layers: data.layers, heads: data.heads, hiddenSize: data.hiddenSize, vocabSize: data.vocabSize, maxPositions: data.maxPositions, architecture: [data.architecture], accessible: data.accessible }); setDeviceInfo(data.device); setModelConfig(data.config); setIsConnected(true); } } catch (error) { console.error('Failed to fetch model info:', error); // Keep default/mock data if fetch fails setIsConnected(false); } finally { setIsLoading(false); } }; fetchModelInfo(); // Refresh model info every 10 seconds const interval = setInterval(fetchModelInfo, 10000); return () => clearInterval(interval); }, []); const toggleSection = (section: string) => { const newExpanded = new Set(expandedSections); if (newExpanded.has(section)) { newExpanded.delete(section); } else { newExpanded.add(section); } setExpandedSections(newExpanded); }; const formatNumber = (num: number) => { if (num >= 1e9) return `${(num / 1e9).toFixed(1)}B`; if (num >= 1e6) return `${(num / 1e6).toFixed(1)}M`; if (num >= 1e3) return `${(num / 1e3).toFixed(1)}K`; return num.toString(); }; return (
{/* Header */}

Model Inspector

Explore the complete architecture of the loaded model

{isLoading ? 'Loading...' : (isConnected ? 'Model Connected' : 'Disconnected')}
{/* Model Overview Section */}
{expandedSections.has('overview') && (
Total Parameters
{formatNumber(modelInfo.totalParams)}
{modelInfo.totalParams > 1e9 ? `${(modelInfo.totalParams / 1e9).toFixed(1)} Billion` : modelInfo.totalParams > 1e6 ? `${(modelInfo.totalParams / 1e6).toFixed(1)} Million` : formatNumber(modelInfo.totalParams)}
Vocabulary Size
{formatNumber(modelInfo.vocabSize)}
Unique tokens
Context Length
{formatNumber(modelInfo.maxPositions)}
Max tokens
Architecture
Transformer
GPT-style
{/* Device Information */} {deviceInfo && (
Device Information
Running on: {deviceInfo}
)} {/* Model Configuration */} {modelConfig && (
Configuration
Activation: {String(modelConfig.activation_function)}
Cache: {modelConfig.use_cache ? 'Enabled' : 'Disabled'}
)}
)}
{/* Architecture Details Section */}
{expandedSections.has('architecture') && (
{/* Layer Structure */}

Layer Structure (×{modelInfo.layers})

Multi-Head Attention ({modelInfo.heads} heads, {modelInfo.hiddenSize / modelInfo.heads} dims/head)
QKV Projection: {modelInfo.hiddenSize} → {modelInfo.hiddenSize * 3}
Output Projection: {modelInfo.hiddenSize} → {modelInfo.hiddenSize}
Feed-Forward Network (4× expansion)
FC1: {modelInfo.hiddenSize} → {modelInfo.hiddenSize * 4}
FC2: {modelInfo.hiddenSize * 4} → {modelInfo.hiddenSize}
Layer Normalization
Residual Connections
{/* Data Flow */}

Data Flow Through Model

{`Input Text ↓ [Token Embeddings] (${modelInfo.vocabSize.toLocaleString()} × ${modelInfo.hiddenSize.toLocaleString()}) ↓ [+ Rotary Position Embeddings] ↓ ╔═══════════════════════╗ ║ Layer 0 ║ ║ ├─ Attention (${modelInfo.heads}h) ║ ║ └─ FFN (${modelInfo.hiddenSize * 4}d) ║ ╚═══════════════════════╝ ↓ ... (${modelInfo.layers - 2} more layers) ↓ ╔═══════════════════════╗ ║ Layer ${modelInfo.layers - 1} ║ ║ ├─ Attention (${modelInfo.heads}h) ║ ║ └─ FFN (${modelInfo.hiddenSize * 4}d) ║ ╚═══════════════════════╝ ↓ [Layer Norm] ↓ [Language Model Head] ↓ ${modelInfo.vocabSize.toLocaleString()} Token Probabilities`}
)}
{/* Accessible Components Section */}
{expandedSections.has('accessible') && (
{modelInfo.accessible.map((item, idx) => (
{item}
))}

Complete Transparency

Every computation, weight, and decision in the model's {formatNumber(modelInfo.totalParams)} parameters is accessible. The "black box" becomes a "glass box" - we can visualize the entire thinking process as tokens flow through the network.

)}
{/* Decision Path Visualization Section */}
{expandedSections.has('decision-path') && (
Loading decision path visualization...
}>
)}
{/* 3D Visualization Section */}
{expandedSections.has('3d') && (
Loading 3D visualization...
}>
)} {/* Computation Stats Section */}
{expandedSections.has('computation') && (
Operations per token ~{formatNumber(modelInfo.totalParams * 2)}
Attention computations {modelInfo.layers * modelInfo.heads} heads
Probability calculations {modelInfo.vocabSize.toLocaleString()} tokens
Memory footprint (FP32) {((modelInfo.totalParams * 4) / (1024 * 1024 * 1024)).toFixed(2)} GB
Memory footprint (FP16) {((modelInfo.totalParams * 2) / (1024 * 1024 * 1024)).toFixed(2)} GB

Each token generation involves passing through all {modelInfo.layers} layers, computing attention across {modelInfo.layers * modelInfo.heads} heads, and producing probabilities for {modelInfo.vocabSize.toLocaleString()} possible next tokens.

)}
); }