Spaces:

jkitchin
/

tennessee-eastman-process

Running

App Files Files Community

tennessee-eastman-process / Deploy from GitHub Actions /examples /validate_statistics.py

jkitchin

Upload folder using huggingface_hub

f5f42f3 verified 14 days ago

raw

history blame contribute delete

9.62 kB

	#!/usr/bin/env python
	"""Validate Python TEP simulation by comparing statistical properties with Fortran.

	Since both simulations add random noise with different sequences, we can't expect
	sample-by-sample matching. Instead, we validate:
	1. Same steady-state operating point (mean values)
	2. Same noise characteristics (standard deviations)
	3. Same dynamic behavior (autocorrelation structure)

	Usage:
	# First run Fortran simulation:
	make fortran-run

	# Then run this validation:
	python examples/validate_statistics.py
	"""

	import numpy as np
	from pathlib import Path
	import sys

	sys.path.insert(0, str(Path(__file__).parent.parent))

	from tep import TEPSimulator


	def load_fortran_data(base_path: Path = Path(".")):
	"""Load Fortran output files."""
	time_sec = np.loadtxt(base_path / "TE_data_inc.dat")
	time_hr = time_sec / 3600.0
	n_samples = len(time_sec)

	# Load measurements (XMEAS 1-41)
	xmeas = np.zeros((n_samples, 41))
	for i in range(10):
	filename = base_path / f"TE_data_me{i+1:02d}.dat"
	data = np.loadtxt(filename)
	start_idx = i * 4
	end_idx = min(start_idx + 4, 41)
	n_cols = end_idx - start_idx
	xmeas[:, start_idx:end_idx] = data[:, :n_cols]

	filename = base_path / "TE_data_me11.dat"
	data = np.loadtxt(filename)
	xmeas[:, 40] = data if data.ndim == 1 else data[:, 0]

	# Load manipulated variables (XMV 1-12)
	xmv = np.zeros((n_samples, 12))
	for i in range(3):
	filename = base_path / f"TE_data_mv{i+1}.dat"
	data = np.loadtxt(filename)
	xmv[:, i4:(i+1)4] = data

	return time_hr, xmeas, xmv


	def compute_statistics(data, skip_initial=60):
	"""Compute mean, std, and autocorrelation lag-1.

	Args:
	data: Time series array (n_samples,) or (n_samples, n_vars)
	skip_initial: Number of samples to skip for settling (default 60 = 3 hours)
	"""
	data = data[skip_initial:] # Skip initial transient

	if data.ndim == 1:
	data = data.reshape(-1, 1)

	n_samples, n_vars = data.shape

	stats = {
	'mean': np.mean(data, axis=0),
	'std': np.std(data, axis=0),
	'autocorr': np.zeros(n_vars)
	}

	# Compute lag-1 autocorrelation
	for j in range(n_vars):
	x = data[:-1, j] - stats['mean'][j]
	y = data[1:, j] - stats['mean'][j]
	if np.std(x) > 1e-10 and np.std(y) > 1e-10:
	stats['autocorr'][j] = np.corrcoef(x, y)[0, 1]

	return stats


	def main():
	print("Loading Fortran output data...")
	try:
	f_time, f_xmeas, f_xmv = load_fortran_data()
	except FileNotFoundError as e:
	print(f"Error: {e}")
	print("Run 'make fortran-run' first to generate Fortran output.")
	return 1

	duration = f_time[-1]
	print(f"Fortran simulation: {len(f_time)} samples over {duration:.1f} hours")

	print(f"\nRunning Python simulation for {duration:.1f} hours...")
	sim = TEPSimulator()
	sim.initialize()
	py_result = sim.simulate(duration_hours=duration, record_interval=180)
	print(f"Python simulation: {len(py_result.time)} samples")

	# Compute statistics for both (skip first 3 hours for settling)
	n_compare = min(len(f_time), len(py_result.time))

	f_xmeas_stats = compute_statistics(f_xmeas[:n_compare])
	p_xmeas_stats = compute_statistics(py_result.measurements[:n_compare])

	f_xmv_stats = compute_statistics(f_xmv[:n_compare])
	p_xmv_stats = compute_statistics(py_result.manipulated_vars[:n_compare])

	# Key process variables to focus on
	key_xmeas = [
	(7, "Reactor Pressure (kPa)"),
	(8, "Reactor Level (%)"),
	(9, "Reactor Temperature (°C)"),
	(11, "Separator Level (%)"),
	(12, "Separator Pressure (kPa)"),
	(15, "Stripper Level (%)"),
	(17, "Stripper Underflow (m³/hr)"),
	(21, "CW Outlet Temp (°C)"),
	]

	print("\n" + "="*80)
	print("STATISTICAL VALIDATION: Key Process Measurements")
	print("="*80)
	print(f"{'Variable':<30} {'Fortran':<20} {'Python':<20} {'Diff %':<10}")
	print("-"*80)

	xmeas_pass = 0
	xmeas_fail = 0

	for idx, name in key_xmeas:
	i = idx - 1 # Convert to 0-indexed
	f_mean = f_xmeas_stats['mean'][i]
	p_mean = p_xmeas_stats['mean'][i]
	f_std = f_xmeas_stats['std'][i]
	p_std = p_xmeas_stats['std'][i]

	# Mean difference
	if abs(f_mean) > 1e-6:
	mean_diff = abs(f_mean - p_mean) / abs(f_mean) * 100
	else:
	mean_diff = abs(f_mean - p_mean) * 100

	# Std ratio (should be close to 1.0)
	if f_std > 1e-6:
	std_ratio = p_std / f_std
	else:
	std_ratio = 1.0 if p_std < 1e-6 else float('inf')

	status = "✓" if mean_diff < 5.0 and 0.5 < std_ratio < 2.0 else "✗"
	if status == "✓":
	xmeas_pass += 1
	else:
	xmeas_fail += 1

	print(f"XMEAS{idx:<2} {name:<22} "
	f"{f_mean:>8.2f}±{f_std:<8.2f} "
	f"{p_mean:>8.2f}±{p_std:<8.2f} "
	f"{mean_diff:>6.2f}% {status}")

	print("\n" + "="*80)
	print("STATISTICAL VALIDATION: Manipulated Variables")
	print("="*80)
	print(f"{'Variable':<30} {'Fortran':<20} {'Python':<20} {'Diff %':<10}")
	print("-"*80)

	mv_names = [
	"D Feed Flow",
	"E Feed Flow",
	"A Feed Flow",
	"A+C Feed Flow",
	"Compressor Recycle",
	"Purge Valve",
	"Separator Pot Liquid",
	"Stripper Pot Liquid",
	"Stripper Steam",
	"Reactor CW",
	"Condenser CW",
	"Agitator Speed"
	]

	xmv_pass = 0
	xmv_fail = 0

	for i in range(12):
	f_mean = f_xmv_stats['mean'][i]
	p_mean = p_xmv_stats['mean'][i]
	f_std = f_xmv_stats['std'][i]
	p_std = p_xmv_stats['std'][i]

	if abs(f_mean) > 1e-6:
	mean_diff = abs(f_mean - p_mean) / abs(f_mean) * 100
	else:
	mean_diff = abs(f_mean - p_mean) * 100

	if f_std > 1e-6:
	std_ratio = p_std / f_std
	else:
	std_ratio = 1.0 if p_std < 1e-6 else float('inf')

	status = "✓" if mean_diff < 10.0 and 0.3 < std_ratio < 3.0 else "✗"
	if status == "✓":
	xmv_pass += 1
	else:
	xmv_fail += 1

	print(f"XMV{i+1:<3} {mv_names[i]:<22} "
	f"{f_mean:>8.2f}±{f_std:<8.2f} "
	f"{p_mean:>8.2f}±{p_std:<8.2f} "
	f"{mean_diff:>6.2f}% {status}")

	print("\n" + "="*80)
	print("SUMMARY")
	print("="*80)
	print(f"Key measurements passed: {xmeas_pass}/{xmeas_pass + xmeas_fail}")
	print(f"Manipulated variables passed: {xmv_pass}/{xmv_pass + xmv_fail}")
	print(f"\nFortran shutdown: No (ran to completion)")
	print(f"Python shutdown: {'Yes - ' + py_result.shutdown_reason if py_result.shutdown else 'No (ran to completion)'}")

	# Overall assessment
	total_pass = xmeas_pass + xmv_pass
	total = xmeas_pass + xmeas_fail + xmv_pass + xmv_fail

	print("\n" + "="*80)
	if total_pass >= total * 0.8 and not py_result.shutdown:
	print("VALIDATION PASSED: Python implementation matches Fortran statistically")
	print("\nNote: Exact sample-by-sample matching is not expected because both")
	print("simulations use random measurement noise with different sequences.")
	print("The statistical properties (mean, variance) should match.")
	else:
	print("VALIDATION FAILED: Significant differences detected")
	print("Please investigate the following:")
	print("- Check controller gains and setpoints")
	print("- Check process constants and parameters")
	print("- Check integration timestep")
	print("="*80)

	# Try to create comparison plot
	try:
	import matplotlib.pyplot as plt

	fig, axes = plt.subplots(4, 2, figsize=(14, 12))

	# Plot key variables
	plot_vars = [
	(6, 'XMEAS 7: Reactor P', 'xmeas'),
	(8, 'XMEAS 9: Reactor T', 'xmeas'),
	(7, 'XMEAS 8: Reactor Level', 'xmeas'),
	(14, 'XMEAS 15: Sep Pressure', 'xmeas'),
	(2, 'XMV 3: A Feed', 'xmv'),
	(9, 'XMV 10: Reactor CW', 'xmv'),
	(8, 'XMV 9: Stripper Steam', 'xmv'),
	(10, 'XMV 11: Condenser CW', 'xmv'),
	]

	for ax, (idx, title, vtype) in zip(axes.flat, plot_vars):
	if vtype == 'xmeas':
	f_data = f_xmeas[:n_compare, idx]
	p_data = py_result.measurements[:n_compare, idx]
	else:
	f_data = f_xmv[:n_compare, idx]
	p_data = py_result.manipulated_vars[:n_compare, idx]

	ax.plot(f_time[:n_compare], f_data, 'b-', label='Fortran', alpha=0.6, lw=0.5)
	ax.plot(py_result.time[:n_compare], p_data, 'r-', label='Python', alpha=0.6, lw=0.5)
	ax.set_title(title)
	ax.legend(loc='upper right', fontsize=8)
	ax.grid(True, alpha=0.3)

	axes[-1, 0].set_xlabel('Time (hours)')
	axes[-1, 1].set_xlabel('Time (hours)')

	plt.suptitle('Fortran vs Python: Statistical Comparison\n(Different noise sequences expected)', y=1.02)
	plt.tight_layout()
	plt.savefig('validation_comparison.png', dpi=150, bbox_inches='tight')
	print(f"\nPlot saved to: validation_comparison.png")

	except ImportError:
	print("\nMatplotlib not available for plotting")
	except Exception as e:
	print(f"\nCould not create plot: {e}")

	return 0


	if __name__ == "__main__":
	sys.exit(main())