"""Tests for the Liverpool Lung Project improved (LLPi) Risk Model. Ground truth values are calculated from the reference R implementation in lcmodels/R/lcmodels.R (risk.llpi function). """ import pytest from sentinel.risk_models.llpi import LLPiRiskModel from sentinel.user_input import ( Anthropometrics, CancerType, ChronicCondition, Demographics, FamilyMemberCancer, FamilyRelation, FamilySide, Lifestyle, PersonalMedicalHistory, RelationshipDegree, Sex, SmokingHistory, SmokingStatus, UserInput, ) # Ground truth test cases generated from R implementation GROUND_TRUTH_CASES = [ { "name": "low_risk_young_female", "input": UserInput( demographics=Demographics( age_years=50, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=70.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, years_smoked=10, cigarettes_per_day=10, years_since_quit=5, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 0.73, }, { "name": "moderate_risk_male", "input": UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, years_smoked=25, cigarettes_per_day=20, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 2.91, }, { "name": "high_risk_copd", "input": UserInput( demographics=Demographics( age_years=70, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, years_smoked=45, cigarettes_per_day=30, years_since_quit=2, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD], previous_cancers=[], ), family_history=[], ), "expected": 21.62, }, { "name": "high_risk_prior_cancer", "input": UserInput( demographics=Demographics( age_years=65, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=78.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, years_smoked=35, cigarettes_per_day=25, years_since_quit=5, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[CancerType.PROSTATE], ), family_history=[], ), "expected": 14.31, }, { "name": "early_onset_family_history", "input": UserInput( demographics=Demographics( age_years=55, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=68.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, years_smoked=20, cigarettes_per_day=15, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=55, # Early onset (< 60) ) ], ), "expected": 2.24, }, { "name": "late_onset_family_history", "input": UserInput( demographics=Demographics( age_years=58, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=178.0, weight_kg=82.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, years_smoked=30, cigarettes_per_day=20, years_since_quit=3, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.FATHER, side=FamilySide.PATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=65, # Late onset (>= 60) ) ], ), "expected": 3.59, }, { "name": "multiple_risk_factors", "input": UserInput( demographics=Demographics( age_years=68, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=172.0, weight_kg=76.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, years_smoked=40, cigarettes_per_day=30, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD], previous_cancers=[CancerType.PROSTATE], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=58, # Early onset ) ], ), "expected": 58.29, }, { "name": "female_copd_family_history", "input": UserInput( demographics=Demographics( age_years=62, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=162.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, years_smoked=28, cigarettes_per_day=18, years_since_quit=4, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD], previous_cancers=[], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.FATHER, side=FamilySide.PATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=70, # Late onset ) ], ), "expected": 6.19, }, { "name": "minimal_smoking_no_risk", "input": UserInput( demographics=Demographics( age_years=52, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=160.0, weight_kg=62.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, years_smoked=5, cigarettes_per_day=8, years_since_quit=10, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 0.63, }, { "name": "heavy_smoker_female", "input": UserInput( demographics=Demographics( age_years=58, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=168.0, weight_kg=72.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, years_smoked=38, cigarettes_per_day=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 3.20, }, ] class TestLLPiModel: """Test suite for LLPiRiskModel.""" def setup_method(self): """Initialize LLPiRiskModel instance for testing.""" self.model = LLPiRiskModel() @pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"]) def test_ground_truth_validation(self, case): """Test against calculated ground truth results from R implementation. Args: case: Parameterized ground truth case dict. """ user = case["input"] score_str = self.model.compute_score(user) calculated = float(score_str.rstrip("%")) expected = case["expected"] # Using tight tolerance since these are calculated values assert calculated == pytest.approx(expected, abs=0.01) def test_never_smoker_handling(self): """Test that never smokers receive N/A response.""" never_smoker = UserInput( demographics=Demographics( age_years=55, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.NEVER, cigarettes_per_day=0, years_smoked=0, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) score = self.model.compute_score(never_smoker) assert score == "N/A: Model is for current or former smokers only." def test_no_smoking_history(self): """Test handling when years_smoked is 0.""" user = UserInput( demographics=Demographics( age_years=55, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=10, years_smoked=0, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) score = self.model.compute_score(user) assert "Model requires smoking history" in score def test_age_validation(self): """Test age validation (40-85 range).""" young_user = UserInput( demographics=Demographics( age_years=35, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=15, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) with pytest.raises(ValueError, match=r"Invalid inputs for LLPi:"): self.model.compute_score(young_user) old_user = UserInput( demographics=Demographics( age_years=90, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, cigarettes_per_day=20, years_smoked=30, years_since_quit=10, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) with pytest.raises(ValueError, match=r"Invalid inputs for LLPi:"): self.model.compute_score(old_user) def test_copd_detection(self): """Test COPD detection from chronic conditions.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD, ChronicCondition.DIABETES], ), family_history=[], ) score = self.model.compute_score(user) assert "%" in score # Risk should be higher with COPD risk_with_copd = float(score.rstrip("%")) # Compare to same user without COPD user_no_copd = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.DIABETES], ), family_history=[], ) score_no_copd = self.model.compute_score(user_no_copd) risk_no_copd = float(score_no_copd.rstrip("%")) assert risk_with_copd > risk_no_copd def test_prior_cancer_detection(self): """Test prior cancer history detection.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[CancerType.COLORECTAL, CancerType.PROSTATE], ), family_history=[], ) score = self.model.compute_score(user) assert "%" in score # Risk should be higher with prior cancer risk_with_cancer = float(score.rstrip("%")) # Compare to same user without prior cancer user_no_cancer = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ) score_no_cancer = self.model.compute_score(user_no_cancer) risk_no_cancer = float(score_no_cancer.rstrip("%")) assert risk_with_cancer > risk_no_cancer def test_family_history_early_vs_late(self): """Test that early onset family history has higher coefficient than late onset.""" base_user_data = { "demographics": Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), "lifestyle": Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), "personal_medical_history": PersonalMedicalHistory(), } # Early onset family history user_early = UserInput( **base_user_data, family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=55, # Early onset ) ], ) # Late onset family history user_late = UserInput( **base_user_data, family_history=[ FamilyMemberCancer( relation=FamilyRelation.FATHER, side=FamilySide.PATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=70, # Late onset ) ], ) # No family history user_none = UserInput(**base_user_data, family_history=[]) risk_early = float(self.model.compute_score(user_early).rstrip("%")) risk_late = float(self.model.compute_score(user_late).rstrip("%")) risk_none = float(self.model.compute_score(user_none).rstrip("%")) # Early onset should confer higher risk than late onset assert risk_early > risk_late > risk_none def test_family_history_non_lung_cancer_ignored(self): """Test that non-lung cancer family history is ignored.""" user_lung = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=65, ) ], ) user_breast = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.BREAST, age_at_diagnosis=65, ) ], ) user_none = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) risk_lung = float(self.model.compute_score(user_lung).rstrip("%")) risk_breast = float(self.model.compute_score(user_breast).rstrip("%")) risk_none = float(self.model.compute_score(user_none).rstrip("%")) # Lung cancer family history should increase risk assert risk_lung > risk_none # Breast cancer family history should not affect risk assert risk_breast == pytest.approx(risk_none, abs=0.01) def test_sex_difference(self): """Test that males have higher risk than females (all else equal).""" user_male = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) user_female = UserInput( demographics=Demographics( age_years=60, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) risk_male = float(self.model.compute_score(user_male).rstrip("%")) risk_female = float(self.model.compute_score(user_female).rstrip("%")) # Males should have higher risk (positive coefficient for male) assert risk_male > risk_female def test_model_metadata(self): """Test model metadata methods.""" assert self.model.name == "llpi" assert self.model.cancer_type() == "lung" assert "LLPi" in self.model.description() assert "8.7-year" in self.model.description() assert "percentage chance" in self.model.interpretation() assert isinstance(self.model.references(), list) assert len(self.model.references()) > 0 assert "Marcus" in self.model.references()[0] def test_calculate_risk_directly(self): """Test the calculate_risk method directly with known inputs.""" # Test case: moderate_risk_male from ground truth risk = self.model.calculate_risk( age=60, male=1, smkyears=25, copd=0, prior_cancer=0, fam_cancer_onset=0, ) # Should match ground truth: 2.91% assert risk * 100 == pytest.approx(2.91, abs=0.01) def test_increasing_age_increases_risk(self): """Test that increasing age increases risk (positive age coefficient).""" risks = [] for age in [50, 60, 70]: user = UserInput( demographics=Demographics( age_years=age, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) risk = float(self.model.compute_score(user).rstrip("%")) risks.append(risk) # Risk should increase with age assert risks[0] < risks[1] < risks[2] def test_increasing_smoking_years_increases_risk(self): """Test that longer smoking history increases risk.""" risks = [] for years in [10, 25, 40]: user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=years, years_since_quit=None, ) ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) risk = float(self.model.compute_score(user).rstrip("%")) risks.append(risk) # Risk should increase with more smoking years assert risks[0] < risks[1] < risks[2]