"""Tests for the PLCOm2012 Lung Cancer Risk Model. Ground truth values are calculated from authors' reference implementation in https://brocku.ca/lung-cancer-screening-and-risk-prediction/risk-calculators/ and the reference implementation in R: https://github.com/resplab/PLCOm2012. """ import pytest from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel from sentinel.user_input import ( Anthropometrics, CancerType, ChronicCondition, Demographics, Ethnicity, FamilyMemberCancer, FamilyRelation, FamilySide, Lifestyle, PersonalMedicalHistory, RelationshipDegree, Sex, SmokingHistory, SmokingStatus, UserInput, ) # Test cases with calculated ground truth data (inline UserInput like Gail tests) GROUND_TRUTH_CASES = [ { "name": "low_risk_current_smoker", "input": UserInput( demographics=Demographics( age_years=55, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=25.0 * (1.75**2), ), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=10, years_smoked=20, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 0.31, }, { "name": "moderate_risk_former_smoker", "input": UserInput( demographics=Demographics( age_years=62, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=27.0 * (1.75**2), ), education_level=3, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, cigarettes_per_day=20, years_smoked=30, years_since_quit=5, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 1.24, }, { "name": "high_risk_multiple_factors", "input": UserInput( demographics=Demographics( age_years=70, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=22.0 * (1.75**2), ), education_level=2, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=40, years_smoked=45, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD], previous_cancers=[CancerType.BREAST], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=65, ) ], ), "expected": 31.19, }, { "name": "black_race_variant", "input": UserInput( demographics=Demographics( age_years=58, sex=Sex.MALE, ethnicity=Ethnicity.BLACK, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=28.0 * (1.75**2), ), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, cigarettes_per_day=15, years_smoked=25, years_since_quit=8, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 0.696, }, { "name": "hispanic_low_education", "input": UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.HISPANIC, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=30.0 * (1.75**2), ), education_level=1, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=25, years_smoked=35, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ), "expected": 1.161, }, { "name": "asian_former_heavy_smoker", "input": UserInput( demographics=Demographics( age_years=65, sex=Sex.MALE, ethnicity=Ethnicity.ASIAN, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=24.0 * (1.75**2), ), education_level=5, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, cigarettes_per_day=25, years_smoked=35, years_since_quit=3, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD], previous_cancers=[], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=65, ) ], ), "expected": 3.40, }, ] class TestPLCOm2012Model: """Test suite for PLCOm2012RiskModel.""" def setup_method(self): """Initialize PLCOm2012RiskModel instance for testing.""" self.model = PLCOm2012RiskModel() @pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"]) def test_ground_truth_validation(self, case): """Test against calculated ground truth results. Args: case: Parameterized ground truth case dict. """ user = case["input"] score_str = self.model.compute_score(user) calculated = float(score_str.rstrip("%")) expected = case["expected"] # Using tight tolerance since these are calculated values assert calculated == pytest.approx(expected, abs=0.01) def test_user_input_integration_current_smoker(self): """Test integration with UserInput model for current smoker.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics( height_cm=175.0, weight_kg=80.0, ), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[], previous_cancers=[], ), family_history=[], ) score = self.model.compute_score(user) assert score != "N/A: Model is for current or former smokers only." assert "%" in score assert float(score.replace("%", "")) > 0 def test_user_input_integration_former_smoker(self): """Test integration with UserInput model for former smoker.""" user = UserInput( demographics=Demographics( age_years=65, sex=Sex.FEMALE, ethnicity=Ethnicity.BLACK, anthropometrics=Anthropometrics( height_cm=160.0, weight_kg=70.0, ), education_level=3, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, cigarettes_per_day=15, years_smoked=30, years_since_quit=10, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD], previous_cancers=[CancerType.BREAST], ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.FATHER, side=FamilySide.PATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=68, ) ], ) score = self.model.compute_score(user) assert score != "N/A: Model is for current or former smokers only." assert "%" in score assert float(score.replace("%", "")) > 0 def test_never_smoker_handling(self): """Test that never smokers receive N/A response.""" never_smoker = UserInput( demographics=Demographics( age_years=55, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.NEVER, cigarettes_per_day=0, years_smoked=0, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) score = self.model.compute_score(never_smoker) assert score == "N/A: Model is for current or former smokers only." def test_validation_errors(self): """Test validation errors for missing required fields.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) # This should pass validation since all required fields are present score = self.model.compute_score(user) assert "%" in score def test_age_out_of_range(self): """Test age outside validated range raises ValueError.""" user = UserInput( demographics=Demographics( age_years=45, # Below minimum sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): self.model.compute_score(user) def test_age_validation_legacy(self): """Test age validation (50-80 range) - legacy behavior.""" # This test is now handled by input validation, so we expect ValueError young_user = UserInput( demographics=Demographics( age_years=49, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): self.model.compute_score(young_user) old_user = UserInput( demographics=Demographics( age_years=81, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): self.model.compute_score(old_user) def test_missing_bmi_data(self): """Test handling of missing BMI data.""" # This test is now handled by input validation since anthropometrics is required # We can't create a UserInput without anthropometrics due to Pydantic validation pass def test_missing_education_level(self): """Test handling of missing education level.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), # Missing education_level ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): self.model.compute_score(user) def test_missing_smoking_intensity(self): """Test handling of missing smoking intensity.""" # This test is now handled by the model's internal validation # since 0 cigarettes per day causes a division by zero in the calculation user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=0, # This will cause division by zero years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) # The model should handle this gracefully and return an N/A message score = self.model.compute_score(user) assert "Calculation failed" in score def test_missing_smoking_duration(self): """Test handling of missing smoking duration.""" # This test is now handled by input validation since years_smoked >= 0 is required # The model will accept 0 years smoked as valid input user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=0, # This is valid input years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) # This should work fine with 0 years smoked score = self.model.compute_score(user) assert "%" in score def test_missing_quit_years_former_smoker(self): """Test handling of missing quit years for former smoker.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.FORMER, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, # This will trigger N/A message ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) score = self.model.compute_score(user) assert "Missing years since quitting for former smoker" in score def test_copd_detection(self): """Test COPD detection from chronic illnesses.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ChronicCondition.COPD, ChronicCondition.DIABETES], ), family_history=[], ) score = self.model.compute_score(user) assert "%" in score def test_family_history_lung_cancer_detection(self): """Test lung cancer family history detection.""" user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, side=FamilySide.MATERNAL, degree=RelationshipDegree.FIRST, cancer_type=CancerType.LUNG, age_at_diagnosis=65, ), FamilyMemberCancer( relation=FamilyRelation.MATERNAL_UNCLE, side=FamilySide.MATERNAL, degree=RelationshipDegree.SECOND, cancer_type=CancerType.LUNG, age_at_diagnosis=70, ), # Should not count (not first-degree relative) ], ) score = self.model.compute_score(user) assert "%" in score def test_race_handling(self): """Test different race/ethnicity handling.""" races = [ Ethnicity.WHITE, Ethnicity.BLACK, Ethnicity.HISPANIC, Ethnicity.ASIAN, Ethnicity.PACIFIC_ISLANDER, ] for race in races: user = UserInput( demographics=Demographics( age_years=60, sex=Sex.MALE, ethnicity=race, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), education_level=4, ), lifestyle=Lifestyle( smoking=SmokingHistory( status=SmokingStatus.CURRENT, cigarettes_per_day=20, years_smoked=25, years_since_quit=None, ), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) score = self.model.compute_score(user) assert "%" in score assert float(score.replace("%", "")) > 0 def test_model_metadata(self): """Test model metadata methods.""" assert self.model.name == "plcom2012" assert self.model.cancer_type() == "lung" assert "PLCOm2012" in self.model.description() assert "6-year" in self.model.description() assert "percentage chance" in self.model.interpretation() assert isinstance(self.model.references(), list) assert len(self.model.references()) > 0 assert "Tammemägi" in self.model.references()[0] def test_smoking_status_encoding(self): """Test smoking status encoding (current=0, former=1).""" # Test current smoker current_input = dict( age=60, race="white", education=4, bmi=25.0, copd=0, cancer_hist=0, family_hist_lung_cancer=0, smoking_status=0, smoking_intensity=20, duration_smoking=25, smoking_quit_time=0, ) current_risk = self.model.calculate_risk(**current_input) # Test former smoker (same parameters except status and quit time) former_input = dict( age=60, race="white", education=4, bmi=25.0, copd=0, cancer_hist=0, family_hist_lung_cancer=0, smoking_status=1, smoking_intensity=20, duration_smoking=25, smoking_quit_time=5, ) former_risk = self.model.calculate_risk(**former_input) # Both should be positive numbers assert current_risk > 0 assert former_risk > 0 def test_smoking_intensity_transformation(self): """Test smoking intensity transformation ((intensity/10)^-1).""" # Test with different intensities intensities = [10, 20, 30, 40] risks = [] for intensity in intensities: input_data = dict( age=60, race="white", education=4, bmi=25.0, copd=0, cancer_hist=0, family_hist_lung_cancer=0, smoking_status=0, smoking_intensity=intensity, duration_smoking=25, smoking_quit_time=0, ) risk = self.model.calculate_risk(**input_data) risks.append(risk) # All risks should be positive for risk in risks: assert risk > 0