Spaces:

InstaDeepAI
/

sentinel

Runtime error

App Files Files Community

sentinel / tests /test_risk_models /test_plcom2012_model.py

jeuko

Sync from GitHub (main)

8018595 verified about 2 months ago

raw

history blame contribute delete

25.7 kB

	"""Tests for the PLCOm2012 Lung Cancer Risk Model.

	Ground truth values are calculated from authors' reference implementation in
	https://brocku.ca/lung-cancer-screening-and-risk-prediction/risk-calculators/
	and the reference implementation in R: https://github.com/resplab/PLCOm2012.
	"""

	import pytest

	from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel
	from sentinel.user_input import (
	Anthropometrics,
	CancerType,
	ChronicCondition,
	Demographics,
	Ethnicity,
	FamilyMemberCancer,
	FamilyRelation,
	FamilySide,
	Lifestyle,
	PersonalMedicalHistory,
	RelationshipDegree,
	Sex,
	SmokingHistory,
	SmokingStatus,
	UserInput,
	)

	# Test cases with calculated ground truth data (inline UserInput like Gail tests)
	GROUND_TRUTH_CASES = [
	{
	"name": "low_risk_current_smoker",
	"input": UserInput(
	demographics=Demographics(
	age_years=55,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=25.0 * (1.75**2),
	),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=10,
	years_smoked=20,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[],
	previous_cancers=[],
	),
	family_history=[],
	),
	"expected": 0.31,
	},
	{
	"name": "moderate_risk_former_smoker",
	"input": UserInput(
	demographics=Demographics(
	age_years=62,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=27.0 * (1.75**2),
	),
	education_level=3,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.FORMER,
	cigarettes_per_day=20,
	years_smoked=30,
	years_since_quit=5,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[],
	previous_cancers=[],
	),
	family_history=[],
	),
	"expected": 1.24,
	},
	{
	"name": "high_risk_multiple_factors",
	"input": UserInput(
	demographics=Demographics(
	age_years=70,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=22.0 * (1.75**2),
	),
	education_level=2,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=40,
	years_smoked=45,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[ChronicCondition.COPD],
	previous_cancers=[CancerType.BREAST],
	),
	family_history=[
	FamilyMemberCancer(
	relation=FamilyRelation.MOTHER,
	side=FamilySide.MATERNAL,
	degree=RelationshipDegree.FIRST,
	cancer_type=CancerType.LUNG,
	age_at_diagnosis=65,
	)
	],
	),
	"expected": 31.19,
	},
	{
	"name": "black_race_variant",
	"input": UserInput(
	demographics=Demographics(
	age_years=58,
	sex=Sex.MALE,
	ethnicity=Ethnicity.BLACK,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=28.0 * (1.75**2),
	),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.FORMER,
	cigarettes_per_day=15,
	years_smoked=25,
	years_since_quit=8,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[],
	previous_cancers=[],
	),
	family_history=[],
	),
	"expected": 0.696,
	},
	{
	"name": "hispanic_low_education",
	"input": UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.HISPANIC,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=30.0 * (1.75**2),
	),
	education_level=1,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=25,
	years_smoked=35,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[],
	previous_cancers=[],
	),
	family_history=[],
	),
	"expected": 1.161,
	},
	{
	"name": "asian_former_heavy_smoker",
	"input": UserInput(
	demographics=Demographics(
	age_years=65,
	sex=Sex.MALE,
	ethnicity=Ethnicity.ASIAN,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=24.0 * (1.75**2),
	),
	education_level=5,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.FORMER,
	cigarettes_per_day=25,
	years_smoked=35,
	years_since_quit=3,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[ChronicCondition.COPD],
	previous_cancers=[],
	),
	family_history=[
	FamilyMemberCancer(
	relation=FamilyRelation.MOTHER,
	side=FamilySide.MATERNAL,
	degree=RelationshipDegree.FIRST,
	cancer_type=CancerType.LUNG,
	age_at_diagnosis=65,
	)
	],
	),
	"expected": 3.40,
	},
	]


	class TestPLCOm2012Model:
	"""Test suite for PLCOm2012RiskModel."""

	def setup_method(self):
	"""Initialize PLCOm2012RiskModel instance for testing."""
	self.model = PLCOm2012RiskModel()

	@pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"])
	def test_ground_truth_validation(self, case):
	"""Test against calculated ground truth results.

	Args:
	case: Parameterized ground truth case dict.
	"""
	user = case["input"]
	score_str = self.model.compute_score(user)
	calculated = float(score_str.rstrip("%"))
	expected = case["expected"]

	# Using tight tolerance since these are calculated values
	assert calculated == pytest.approx(expected, abs=0.01)

	def test_user_input_integration_current_smoker(self):
	"""Test integration with UserInput model for current smoker."""
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(
	height_cm=175.0,
	weight_kg=80.0,
	),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[],
	previous_cancers=[],
	),
	family_history=[],
	)

	score = self.model.compute_score(user)
	assert score != "N/A: Model is for current or former smokers only."
	assert "%" in score
	assert float(score.replace("%", "")) > 0

	def test_user_input_integration_former_smoker(self):
	"""Test integration with UserInput model for former smoker."""
	user = UserInput(
	demographics=Demographics(
	age_years=65,
	sex=Sex.FEMALE,
	ethnicity=Ethnicity.BLACK,
	anthropometrics=Anthropometrics(
	height_cm=160.0,
	weight_kg=70.0,
	),
	education_level=3,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.FORMER,
	cigarettes_per_day=15,
	years_smoked=30,
	years_since_quit=10,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[ChronicCondition.COPD],
	previous_cancers=[CancerType.BREAST],
	),
	family_history=[
	FamilyMemberCancer(
	relation=FamilyRelation.FATHER,
	side=FamilySide.PATERNAL,
	degree=RelationshipDegree.FIRST,
	cancer_type=CancerType.LUNG,
	age_at_diagnosis=68,
	)
	],
	)

	score = self.model.compute_score(user)
	assert score != "N/A: Model is for current or former smokers only."
	assert "%" in score
	assert float(score.replace("%", "")) > 0

	def test_never_smoker_handling(self):
	"""Test that never smokers receive N/A response."""
	never_smoker = UserInput(
	demographics=Demographics(
	age_years=55,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.NEVER,
	cigarettes_per_day=0,
	years_smoked=0,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	score = self.model.compute_score(never_smoker)
	assert score == "N/A: Model is for current or former smokers only."

	def test_validation_errors(self):
	"""Test validation errors for missing required fields."""
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	# This should pass validation since all required fields are present
	score = self.model.compute_score(user)
	assert "%" in score

	def test_age_out_of_range(self):
	"""Test age outside validated range raises ValueError."""
	user = UserInput(
	demographics=Demographics(
	age_years=45, # Below minimum
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
	self.model.compute_score(user)

	def test_age_validation_legacy(self):
	"""Test age validation (50-80 range) - legacy behavior."""
	# This test is now handled by input validation, so we expect ValueError
	young_user = UserInput(
	demographics=Demographics(
	age_years=49,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)
	with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
	self.model.compute_score(young_user)

	old_user = UserInput(
	demographics=Demographics(
	age_years=81,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)
	with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
	self.model.compute_score(old_user)

	def test_missing_bmi_data(self):
	"""Test handling of missing BMI data."""
	# This test is now handled by input validation since anthropometrics is required
	# We can't create a UserInput without anthropometrics due to Pydantic validation
	pass

	def test_missing_education_level(self):
	"""Test handling of missing education level."""
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	# Missing education_level
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
	self.model.compute_score(user)

	def test_missing_smoking_intensity(self):
	"""Test handling of missing smoking intensity."""
	# This test is now handled by the model's internal validation
	# since 0 cigarettes per day causes a division by zero in the calculation
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=0, # This will cause division by zero
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	# The model should handle this gracefully and return an N/A message
	score = self.model.compute_score(user)
	assert "Calculation failed" in score

	def test_missing_smoking_duration(self):
	"""Test handling of missing smoking duration."""
	# This test is now handled by input validation since years_smoked >= 0 is required
	# The model will accept 0 years smoked as valid input
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=0, # This is valid input
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	# This should work fine with 0 years smoked
	score = self.model.compute_score(user)
	assert "%" in score

	def test_missing_quit_years_former_smoker(self):
	"""Test handling of missing quit years for former smoker."""
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.FORMER,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None, # This will trigger N/A message
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	score = self.model.compute_score(user)
	assert "Missing years since quitting for former smoker" in score

	def test_copd_detection(self):
	"""Test COPD detection from chronic illnesses."""
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(
	chronic_conditions=[ChronicCondition.COPD, ChronicCondition.DIABETES],
	),
	family_history=[],
	)

	score = self.model.compute_score(user)
	assert "%" in score

	def test_family_history_lung_cancer_detection(self):
	"""Test lung cancer family history detection."""
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=Ethnicity.WHITE,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[
	FamilyMemberCancer(
	relation=FamilyRelation.MOTHER,
	side=FamilySide.MATERNAL,
	degree=RelationshipDegree.FIRST,
	cancer_type=CancerType.LUNG,
	age_at_diagnosis=65,
	),
	FamilyMemberCancer(
	relation=FamilyRelation.MATERNAL_UNCLE,
	side=FamilySide.MATERNAL,
	degree=RelationshipDegree.SECOND,
	cancer_type=CancerType.LUNG,
	age_at_diagnosis=70,
	), # Should not count (not first-degree relative)
	],
	)

	score = self.model.compute_score(user)
	assert "%" in score

	def test_race_handling(self):
	"""Test different race/ethnicity handling."""
	races = [
	Ethnicity.WHITE,
	Ethnicity.BLACK,
	Ethnicity.HISPANIC,
	Ethnicity.ASIAN,
	Ethnicity.PACIFIC_ISLANDER,
	]

	for race in races:
	user = UserInput(
	demographics=Demographics(
	age_years=60,
	sex=Sex.MALE,
	ethnicity=race,
	anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
	education_level=4,
	),
	lifestyle=Lifestyle(
	smoking=SmokingHistory(
	status=SmokingStatus.CURRENT,
	cigarettes_per_day=20,
	years_smoked=25,
	years_since_quit=None,
	),
	),
	personal_medical_history=PersonalMedicalHistory(),
	family_history=[],
	)

	score = self.model.compute_score(user)
	assert "%" in score
	assert float(score.replace("%", "")) > 0

	def test_model_metadata(self):
	"""Test model metadata methods."""
	assert self.model.name == "plcom2012"
	assert self.model.cancer_type() == "lung"
	assert "PLCOm2012" in self.model.description()
	assert "6-year" in self.model.description()
	assert "percentage chance" in self.model.interpretation()
	assert isinstance(self.model.references(), list)
	assert len(self.model.references()) > 0
	assert "Tammemägi" in self.model.references()[0]

	def test_smoking_status_encoding(self):
	"""Test smoking status encoding (current=0, former=1)."""
	# Test current smoker
	current_input = dict(
	age=60,
	race="white",
	education=4,
	bmi=25.0,
	copd=0,
	cancer_hist=0,
	family_hist_lung_cancer=0,
	smoking_status=0,
	smoking_intensity=20,
	duration_smoking=25,
	smoking_quit_time=0,
	)
	current_risk = self.model.calculate_risk(**current_input)

	# Test former smoker (same parameters except status and quit time)
	former_input = dict(
	age=60,
	race="white",
	education=4,
	bmi=25.0,
	copd=0,
	cancer_hist=0,
	family_hist_lung_cancer=0,
	smoking_status=1,
	smoking_intensity=20,
	duration_smoking=25,
	smoking_quit_time=5,
	)
	former_risk = self.model.calculate_risk(**former_input)

	# Both should be positive numbers
	assert current_risk > 0
	assert former_risk > 0

	def test_smoking_intensity_transformation(self):
	"""Test smoking intensity transformation ((intensity/10)^-1)."""
	# Test with different intensities
	intensities = [10, 20, 30, 40]
	risks = []

	for intensity in intensities:
	input_data = dict(
	age=60,
	race="white",
	education=4,
	bmi=25.0,
	copd=0,
	cancer_hist=0,
	family_hist_lung_cancer=0,
	smoking_status=0,
	smoking_intensity=intensity,
	duration_smoking=25,
	smoking_quit_time=0,
	)
	risk = self.model.calculate_risk(**input_data)
	risks.append(risk)

	# All risks should be positive
	for risk in risks:
	assert risk > 0