#!/usr/bin/env python """Validate Python TEP simulation by comparing statistical properties with Fortran. Since both simulations add random noise with different sequences, we can't expect sample-by-sample matching. Instead, we validate: 1. Same steady-state operating point (mean values) 2. Same noise characteristics (standard deviations) 3. Same dynamic behavior (autocorrelation structure) Usage: # First run Fortran simulation: make fortran-run # Then run this validation: python examples/validate_statistics.py """ import numpy as np from pathlib import Path import sys sys.path.insert(0, str(Path(__file__).parent.parent)) from tep import TEPSimulator def load_fortran_data(base_path: Path = Path(".")): """Load Fortran output files.""" time_sec = np.loadtxt(base_path / "TE_data_inc.dat") time_hr = time_sec / 3600.0 n_samples = len(time_sec) # Load measurements (XMEAS 1-41) xmeas = np.zeros((n_samples, 41)) for i in range(10): filename = base_path / f"TE_data_me{i+1:02d}.dat" data = np.loadtxt(filename) start_idx = i * 4 end_idx = min(start_idx + 4, 41) n_cols = end_idx - start_idx xmeas[:, start_idx:end_idx] = data[:, :n_cols] filename = base_path / "TE_data_me11.dat" data = np.loadtxt(filename) xmeas[:, 40] = data if data.ndim == 1 else data[:, 0] # Load manipulated variables (XMV 1-12) xmv = np.zeros((n_samples, 12)) for i in range(3): filename = base_path / f"TE_data_mv{i+1}.dat" data = np.loadtxt(filename) xmv[:, i*4:(i+1)*4] = data return time_hr, xmeas, xmv def compute_statistics(data, skip_initial=60): """Compute mean, std, and autocorrelation lag-1. Args: data: Time series array (n_samples,) or (n_samples, n_vars) skip_initial: Number of samples to skip for settling (default 60 = 3 hours) """ data = data[skip_initial:] # Skip initial transient if data.ndim == 1: data = data.reshape(-1, 1) n_samples, n_vars = data.shape stats = { 'mean': np.mean(data, axis=0), 'std': np.std(data, axis=0), 'autocorr': np.zeros(n_vars) } # Compute lag-1 autocorrelation for j in range(n_vars): x = data[:-1, j] - stats['mean'][j] y = data[1:, j] - stats['mean'][j] if np.std(x) > 1e-10 and np.std(y) > 1e-10: stats['autocorr'][j] = np.corrcoef(x, y)[0, 1] return stats def main(): print("Loading Fortran output data...") try: f_time, f_xmeas, f_xmv = load_fortran_data() except FileNotFoundError as e: print(f"Error: {e}") print("Run 'make fortran-run' first to generate Fortran output.") return 1 duration = f_time[-1] print(f"Fortran simulation: {len(f_time)} samples over {duration:.1f} hours") print(f"\nRunning Python simulation for {duration:.1f} hours...") sim = TEPSimulator() sim.initialize() py_result = sim.simulate(duration_hours=duration, record_interval=180) print(f"Python simulation: {len(py_result.time)} samples") # Compute statistics for both (skip first 3 hours for settling) n_compare = min(len(f_time), len(py_result.time)) f_xmeas_stats = compute_statistics(f_xmeas[:n_compare]) p_xmeas_stats = compute_statistics(py_result.measurements[:n_compare]) f_xmv_stats = compute_statistics(f_xmv[:n_compare]) p_xmv_stats = compute_statistics(py_result.manipulated_vars[:n_compare]) # Key process variables to focus on key_xmeas = [ (7, "Reactor Pressure (kPa)"), (8, "Reactor Level (%)"), (9, "Reactor Temperature (°C)"), (11, "Separator Level (%)"), (12, "Separator Pressure (kPa)"), (15, "Stripper Level (%)"), (17, "Stripper Underflow (m³/hr)"), (21, "CW Outlet Temp (°C)"), ] print("\n" + "="*80) print("STATISTICAL VALIDATION: Key Process Measurements") print("="*80) print(f"{'Variable':<30} {'Fortran':<20} {'Python':<20} {'Diff %':<10}") print("-"*80) xmeas_pass = 0 xmeas_fail = 0 for idx, name in key_xmeas: i = idx - 1 # Convert to 0-indexed f_mean = f_xmeas_stats['mean'][i] p_mean = p_xmeas_stats['mean'][i] f_std = f_xmeas_stats['std'][i] p_std = p_xmeas_stats['std'][i] # Mean difference if abs(f_mean) > 1e-6: mean_diff = abs(f_mean - p_mean) / abs(f_mean) * 100 else: mean_diff = abs(f_mean - p_mean) * 100 # Std ratio (should be close to 1.0) if f_std > 1e-6: std_ratio = p_std / f_std else: std_ratio = 1.0 if p_std < 1e-6 else float('inf') status = "✓" if mean_diff < 5.0 and 0.5 < std_ratio < 2.0 else "✗" if status == "✓": xmeas_pass += 1 else: xmeas_fail += 1 print(f"XMEAS{idx:<2} {name:<22} " f"{f_mean:>8.2f}±{f_std:<8.2f} " f"{p_mean:>8.2f}±{p_std:<8.2f} " f"{mean_diff:>6.2f}% {status}") print("\n" + "="*80) print("STATISTICAL VALIDATION: Manipulated Variables") print("="*80) print(f"{'Variable':<30} {'Fortran':<20} {'Python':<20} {'Diff %':<10}") print("-"*80) mv_names = [ "D Feed Flow", "E Feed Flow", "A Feed Flow", "A+C Feed Flow", "Compressor Recycle", "Purge Valve", "Separator Pot Liquid", "Stripper Pot Liquid", "Stripper Steam", "Reactor CW", "Condenser CW", "Agitator Speed" ] xmv_pass = 0 xmv_fail = 0 for i in range(12): f_mean = f_xmv_stats['mean'][i] p_mean = p_xmv_stats['mean'][i] f_std = f_xmv_stats['std'][i] p_std = p_xmv_stats['std'][i] if abs(f_mean) > 1e-6: mean_diff = abs(f_mean - p_mean) / abs(f_mean) * 100 else: mean_diff = abs(f_mean - p_mean) * 100 if f_std > 1e-6: std_ratio = p_std / f_std else: std_ratio = 1.0 if p_std < 1e-6 else float('inf') status = "✓" if mean_diff < 10.0 and 0.3 < std_ratio < 3.0 else "✗" if status == "✓": xmv_pass += 1 else: xmv_fail += 1 print(f"XMV{i+1:<3} {mv_names[i]:<22} " f"{f_mean:>8.2f}±{f_std:<8.2f} " f"{p_mean:>8.2f}±{p_std:<8.2f} " f"{mean_diff:>6.2f}% {status}") print("\n" + "="*80) print("SUMMARY") print("="*80) print(f"Key measurements passed: {xmeas_pass}/{xmeas_pass + xmeas_fail}") print(f"Manipulated variables passed: {xmv_pass}/{xmv_pass + xmv_fail}") print(f"\nFortran shutdown: No (ran to completion)") print(f"Python shutdown: {'Yes - ' + py_result.shutdown_reason if py_result.shutdown else 'No (ran to completion)'}") # Overall assessment total_pass = xmeas_pass + xmv_pass total = xmeas_pass + xmeas_fail + xmv_pass + xmv_fail print("\n" + "="*80) if total_pass >= total * 0.8 and not py_result.shutdown: print("VALIDATION PASSED: Python implementation matches Fortran statistically") print("\nNote: Exact sample-by-sample matching is not expected because both") print("simulations use random measurement noise with different sequences.") print("The statistical properties (mean, variance) should match.") else: print("VALIDATION FAILED: Significant differences detected") print("Please investigate the following:") print("- Check controller gains and setpoints") print("- Check process constants and parameters") print("- Check integration timestep") print("="*80) # Try to create comparison plot try: import matplotlib.pyplot as plt fig, axes = plt.subplots(4, 2, figsize=(14, 12)) # Plot key variables plot_vars = [ (6, 'XMEAS 7: Reactor P', 'xmeas'), (8, 'XMEAS 9: Reactor T', 'xmeas'), (7, 'XMEAS 8: Reactor Level', 'xmeas'), (14, 'XMEAS 15: Sep Pressure', 'xmeas'), (2, 'XMV 3: A Feed', 'xmv'), (9, 'XMV 10: Reactor CW', 'xmv'), (8, 'XMV 9: Stripper Steam', 'xmv'), (10, 'XMV 11: Condenser CW', 'xmv'), ] for ax, (idx, title, vtype) in zip(axes.flat, plot_vars): if vtype == 'xmeas': f_data = f_xmeas[:n_compare, idx] p_data = py_result.measurements[:n_compare, idx] else: f_data = f_xmv[:n_compare, idx] p_data = py_result.manipulated_vars[:n_compare, idx] ax.plot(f_time[:n_compare], f_data, 'b-', label='Fortran', alpha=0.6, lw=0.5) ax.plot(py_result.time[:n_compare], p_data, 'r-', label='Python', alpha=0.6, lw=0.5) ax.set_title(title) ax.legend(loc='upper right', fontsize=8) ax.grid(True, alpha=0.3) axes[-1, 0].set_xlabel('Time (hours)') axes[-1, 1].set_xlabel('Time (hours)') plt.suptitle('Fortran vs Python: Statistical Comparison\n(Different noise sequences expected)', y=1.02) plt.tight_layout() plt.savefig('validation_comparison.png', dpi=150, bbox_inches='tight') print(f"\nPlot saved to: validation_comparison.png") except ImportError: print("\nMatplotlib not available for plotting") except Exception as e: print(f"\nCould not create plot: {e}") return 0 if __name__ == "__main__": sys.exit(main())