jkitchin's picture
Upload folder using huggingface_hub
f5f42f3 verified
#!/usr/bin/env python
"""Validate Python TEP simulation by comparing statistical properties with Fortran.
Since both simulations add random noise with different sequences, we can't expect
sample-by-sample matching. Instead, we validate:
1. Same steady-state operating point (mean values)
2. Same noise characteristics (standard deviations)
3. Same dynamic behavior (autocorrelation structure)
Usage:
# First run Fortran simulation:
make fortran-run
# Then run this validation:
python examples/validate_statistics.py
"""
import numpy as np
from pathlib import Path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from tep import TEPSimulator
def load_fortran_data(base_path: Path = Path(".")):
"""Load Fortran output files."""
time_sec = np.loadtxt(base_path / "TE_data_inc.dat")
time_hr = time_sec / 3600.0
n_samples = len(time_sec)
# Load measurements (XMEAS 1-41)
xmeas = np.zeros((n_samples, 41))
for i in range(10):
filename = base_path / f"TE_data_me{i+1:02d}.dat"
data = np.loadtxt(filename)
start_idx = i * 4
end_idx = min(start_idx + 4, 41)
n_cols = end_idx - start_idx
xmeas[:, start_idx:end_idx] = data[:, :n_cols]
filename = base_path / "TE_data_me11.dat"
data = np.loadtxt(filename)
xmeas[:, 40] = data if data.ndim == 1 else data[:, 0]
# Load manipulated variables (XMV 1-12)
xmv = np.zeros((n_samples, 12))
for i in range(3):
filename = base_path / f"TE_data_mv{i+1}.dat"
data = np.loadtxt(filename)
xmv[:, i*4:(i+1)*4] = data
return time_hr, xmeas, xmv
def compute_statistics(data, skip_initial=60):
"""Compute mean, std, and autocorrelation lag-1.
Args:
data: Time series array (n_samples,) or (n_samples, n_vars)
skip_initial: Number of samples to skip for settling (default 60 = 3 hours)
"""
data = data[skip_initial:] # Skip initial transient
if data.ndim == 1:
data = data.reshape(-1, 1)
n_samples, n_vars = data.shape
stats = {
'mean': np.mean(data, axis=0),
'std': np.std(data, axis=0),
'autocorr': np.zeros(n_vars)
}
# Compute lag-1 autocorrelation
for j in range(n_vars):
x = data[:-1, j] - stats['mean'][j]
y = data[1:, j] - stats['mean'][j]
if np.std(x) > 1e-10 and np.std(y) > 1e-10:
stats['autocorr'][j] = np.corrcoef(x, y)[0, 1]
return stats
def main():
print("Loading Fortran output data...")
try:
f_time, f_xmeas, f_xmv = load_fortran_data()
except FileNotFoundError as e:
print(f"Error: {e}")
print("Run 'make fortran-run' first to generate Fortran output.")
return 1
duration = f_time[-1]
print(f"Fortran simulation: {len(f_time)} samples over {duration:.1f} hours")
print(f"\nRunning Python simulation for {duration:.1f} hours...")
sim = TEPSimulator()
sim.initialize()
py_result = sim.simulate(duration_hours=duration, record_interval=180)
print(f"Python simulation: {len(py_result.time)} samples")
# Compute statistics for both (skip first 3 hours for settling)
n_compare = min(len(f_time), len(py_result.time))
f_xmeas_stats = compute_statistics(f_xmeas[:n_compare])
p_xmeas_stats = compute_statistics(py_result.measurements[:n_compare])
f_xmv_stats = compute_statistics(f_xmv[:n_compare])
p_xmv_stats = compute_statistics(py_result.manipulated_vars[:n_compare])
# Key process variables to focus on
key_xmeas = [
(7, "Reactor Pressure (kPa)"),
(8, "Reactor Level (%)"),
(9, "Reactor Temperature (°C)"),
(11, "Separator Level (%)"),
(12, "Separator Pressure (kPa)"),
(15, "Stripper Level (%)"),
(17, "Stripper Underflow (m³/hr)"),
(21, "CW Outlet Temp (°C)"),
]
print("\n" + "="*80)
print("STATISTICAL VALIDATION: Key Process Measurements")
print("="*80)
print(f"{'Variable':<30} {'Fortran':<20} {'Python':<20} {'Diff %':<10}")
print("-"*80)
xmeas_pass = 0
xmeas_fail = 0
for idx, name in key_xmeas:
i = idx - 1 # Convert to 0-indexed
f_mean = f_xmeas_stats['mean'][i]
p_mean = p_xmeas_stats['mean'][i]
f_std = f_xmeas_stats['std'][i]
p_std = p_xmeas_stats['std'][i]
# Mean difference
if abs(f_mean) > 1e-6:
mean_diff = abs(f_mean - p_mean) / abs(f_mean) * 100
else:
mean_diff = abs(f_mean - p_mean) * 100
# Std ratio (should be close to 1.0)
if f_std > 1e-6:
std_ratio = p_std / f_std
else:
std_ratio = 1.0 if p_std < 1e-6 else float('inf')
status = "✓" if mean_diff < 5.0 and 0.5 < std_ratio < 2.0 else "✗"
if status == "✓":
xmeas_pass += 1
else:
xmeas_fail += 1
print(f"XMEAS{idx:<2} {name:<22} "
f"{f_mean:>8.2f}±{f_std:<8.2f} "
f"{p_mean:>8.2f}±{p_std:<8.2f} "
f"{mean_diff:>6.2f}% {status}")
print("\n" + "="*80)
print("STATISTICAL VALIDATION: Manipulated Variables")
print("="*80)
print(f"{'Variable':<30} {'Fortran':<20} {'Python':<20} {'Diff %':<10}")
print("-"*80)
mv_names = [
"D Feed Flow",
"E Feed Flow",
"A Feed Flow",
"A+C Feed Flow",
"Compressor Recycle",
"Purge Valve",
"Separator Pot Liquid",
"Stripper Pot Liquid",
"Stripper Steam",
"Reactor CW",
"Condenser CW",
"Agitator Speed"
]
xmv_pass = 0
xmv_fail = 0
for i in range(12):
f_mean = f_xmv_stats['mean'][i]
p_mean = p_xmv_stats['mean'][i]
f_std = f_xmv_stats['std'][i]
p_std = p_xmv_stats['std'][i]
if abs(f_mean) > 1e-6:
mean_diff = abs(f_mean - p_mean) / abs(f_mean) * 100
else:
mean_diff = abs(f_mean - p_mean) * 100
if f_std > 1e-6:
std_ratio = p_std / f_std
else:
std_ratio = 1.0 if p_std < 1e-6 else float('inf')
status = "✓" if mean_diff < 10.0 and 0.3 < std_ratio < 3.0 else "✗"
if status == "✓":
xmv_pass += 1
else:
xmv_fail += 1
print(f"XMV{i+1:<3} {mv_names[i]:<22} "
f"{f_mean:>8.2f}±{f_std:<8.2f} "
f"{p_mean:>8.2f}±{p_std:<8.2f} "
f"{mean_diff:>6.2f}% {status}")
print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"Key measurements passed: {xmeas_pass}/{xmeas_pass + xmeas_fail}")
print(f"Manipulated variables passed: {xmv_pass}/{xmv_pass + xmv_fail}")
print(f"\nFortran shutdown: No (ran to completion)")
print(f"Python shutdown: {'Yes - ' + py_result.shutdown_reason if py_result.shutdown else 'No (ran to completion)'}")
# Overall assessment
total_pass = xmeas_pass + xmv_pass
total = xmeas_pass + xmeas_fail + xmv_pass + xmv_fail
print("\n" + "="*80)
if total_pass >= total * 0.8 and not py_result.shutdown:
print("VALIDATION PASSED: Python implementation matches Fortran statistically")
print("\nNote: Exact sample-by-sample matching is not expected because both")
print("simulations use random measurement noise with different sequences.")
print("The statistical properties (mean, variance) should match.")
else:
print("VALIDATION FAILED: Significant differences detected")
print("Please investigate the following:")
print("- Check controller gains and setpoints")
print("- Check process constants and parameters")
print("- Check integration timestep")
print("="*80)
# Try to create comparison plot
try:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(4, 2, figsize=(14, 12))
# Plot key variables
plot_vars = [
(6, 'XMEAS 7: Reactor P', 'xmeas'),
(8, 'XMEAS 9: Reactor T', 'xmeas'),
(7, 'XMEAS 8: Reactor Level', 'xmeas'),
(14, 'XMEAS 15: Sep Pressure', 'xmeas'),
(2, 'XMV 3: A Feed', 'xmv'),
(9, 'XMV 10: Reactor CW', 'xmv'),
(8, 'XMV 9: Stripper Steam', 'xmv'),
(10, 'XMV 11: Condenser CW', 'xmv'),
]
for ax, (idx, title, vtype) in zip(axes.flat, plot_vars):
if vtype == 'xmeas':
f_data = f_xmeas[:n_compare, idx]
p_data = py_result.measurements[:n_compare, idx]
else:
f_data = f_xmv[:n_compare, idx]
p_data = py_result.manipulated_vars[:n_compare, idx]
ax.plot(f_time[:n_compare], f_data, 'b-', label='Fortran', alpha=0.6, lw=0.5)
ax.plot(py_result.time[:n_compare], p_data, 'r-', label='Python', alpha=0.6, lw=0.5)
ax.set_title(title)
ax.legend(loc='upper right', fontsize=8)
ax.grid(True, alpha=0.3)
axes[-1, 0].set_xlabel('Time (hours)')
axes[-1, 1].set_xlabel('Time (hours)')
plt.suptitle('Fortran vs Python: Statistical Comparison\n(Different noise sequences expected)', y=1.02)
plt.tight_layout()
plt.savefig('validation_comparison.png', dpi=150, bbox_inches='tight')
print(f"\nPlot saved to: validation_comparison.png")
except ImportError:
print("\nMatplotlib not available for plotting")
except Exception as e:
print(f"\nCould not create plot: {e}")
return 0
if __name__ == "__main__":
sys.exit(main())