Spaces:
Runtime error
Runtime error
| """Tests for the PLCOm2012 Lung Cancer Risk Model. | |
| Ground truth values are calculated from authors' reference implementation in | |
| https://brocku.ca/lung-cancer-screening-and-risk-prediction/risk-calculators/ | |
| and the reference implementation in R: https://github.com/resplab/PLCOm2012. | |
| """ | |
| import pytest | |
| from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel | |
| from sentinel.user_input import ( | |
| Anthropometrics, | |
| CancerType, | |
| ChronicCondition, | |
| Demographics, | |
| Ethnicity, | |
| FamilyMemberCancer, | |
| FamilyRelation, | |
| FamilySide, | |
| Lifestyle, | |
| PersonalMedicalHistory, | |
| RelationshipDegree, | |
| Sex, | |
| SmokingHistory, | |
| SmokingStatus, | |
| UserInput, | |
| ) | |
| # Test cases with calculated ground truth data (inline UserInput like Gail tests) | |
| GROUND_TRUTH_CASES = [ | |
| { | |
| "name": "low_risk_current_smoker", | |
| "input": UserInput( | |
| demographics=Demographics( | |
| age_years=55, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=25.0 * (1.75**2), | |
| ), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=10, | |
| years_smoked=20, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[], | |
| previous_cancers=[], | |
| ), | |
| family_history=[], | |
| ), | |
| "expected": 0.31, | |
| }, | |
| { | |
| "name": "moderate_risk_former_smoker", | |
| "input": UserInput( | |
| demographics=Demographics( | |
| age_years=62, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=27.0 * (1.75**2), | |
| ), | |
| education_level=3, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.FORMER, | |
| cigarettes_per_day=20, | |
| years_smoked=30, | |
| years_since_quit=5, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[], | |
| previous_cancers=[], | |
| ), | |
| family_history=[], | |
| ), | |
| "expected": 1.24, | |
| }, | |
| { | |
| "name": "high_risk_multiple_factors", | |
| "input": UserInput( | |
| demographics=Demographics( | |
| age_years=70, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=22.0 * (1.75**2), | |
| ), | |
| education_level=2, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=40, | |
| years_smoked=45, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[ChronicCondition.COPD], | |
| previous_cancers=[CancerType.BREAST], | |
| ), | |
| family_history=[ | |
| FamilyMemberCancer( | |
| relation=FamilyRelation.MOTHER, | |
| side=FamilySide.MATERNAL, | |
| degree=RelationshipDegree.FIRST, | |
| cancer_type=CancerType.LUNG, | |
| age_at_diagnosis=65, | |
| ) | |
| ], | |
| ), | |
| "expected": 31.19, | |
| }, | |
| { | |
| "name": "black_race_variant", | |
| "input": UserInput( | |
| demographics=Demographics( | |
| age_years=58, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.BLACK, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=28.0 * (1.75**2), | |
| ), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.FORMER, | |
| cigarettes_per_day=15, | |
| years_smoked=25, | |
| years_since_quit=8, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[], | |
| previous_cancers=[], | |
| ), | |
| family_history=[], | |
| ), | |
| "expected": 0.696, | |
| }, | |
| { | |
| "name": "hispanic_low_education", | |
| "input": UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.HISPANIC, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=30.0 * (1.75**2), | |
| ), | |
| education_level=1, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=25, | |
| years_smoked=35, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[], | |
| previous_cancers=[], | |
| ), | |
| family_history=[], | |
| ), | |
| "expected": 1.161, | |
| }, | |
| { | |
| "name": "asian_former_heavy_smoker", | |
| "input": UserInput( | |
| demographics=Demographics( | |
| age_years=65, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.ASIAN, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=24.0 * (1.75**2), | |
| ), | |
| education_level=5, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.FORMER, | |
| cigarettes_per_day=25, | |
| years_smoked=35, | |
| years_since_quit=3, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[ChronicCondition.COPD], | |
| previous_cancers=[], | |
| ), | |
| family_history=[ | |
| FamilyMemberCancer( | |
| relation=FamilyRelation.MOTHER, | |
| side=FamilySide.MATERNAL, | |
| degree=RelationshipDegree.FIRST, | |
| cancer_type=CancerType.LUNG, | |
| age_at_diagnosis=65, | |
| ) | |
| ], | |
| ), | |
| "expected": 3.40, | |
| }, | |
| ] | |
| class TestPLCOm2012Model: | |
| """Test suite for PLCOm2012RiskModel.""" | |
| def setup_method(self): | |
| """Initialize PLCOm2012RiskModel instance for testing.""" | |
| self.model = PLCOm2012RiskModel() | |
| def test_ground_truth_validation(self, case): | |
| """Test against calculated ground truth results. | |
| Args: | |
| case: Parameterized ground truth case dict. | |
| """ | |
| user = case["input"] | |
| score_str = self.model.compute_score(user) | |
| calculated = float(score_str.rstrip("%")) | |
| expected = case["expected"] | |
| # Using tight tolerance since these are calculated values | |
| assert calculated == pytest.approx(expected, abs=0.01) | |
| def test_user_input_integration_current_smoker(self): | |
| """Test integration with UserInput model for current smoker.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics( | |
| height_cm=175.0, | |
| weight_kg=80.0, | |
| ), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[], | |
| previous_cancers=[], | |
| ), | |
| family_history=[], | |
| ) | |
| score = self.model.compute_score(user) | |
| assert score != "N/A: Model is for current or former smokers only." | |
| assert "%" in score | |
| assert float(score.replace("%", "")) > 0 | |
| def test_user_input_integration_former_smoker(self): | |
| """Test integration with UserInput model for former smoker.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=65, | |
| sex=Sex.FEMALE, | |
| ethnicity=Ethnicity.BLACK, | |
| anthropometrics=Anthropometrics( | |
| height_cm=160.0, | |
| weight_kg=70.0, | |
| ), | |
| education_level=3, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.FORMER, | |
| cigarettes_per_day=15, | |
| years_smoked=30, | |
| years_since_quit=10, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[ChronicCondition.COPD], | |
| previous_cancers=[CancerType.BREAST], | |
| ), | |
| family_history=[ | |
| FamilyMemberCancer( | |
| relation=FamilyRelation.FATHER, | |
| side=FamilySide.PATERNAL, | |
| degree=RelationshipDegree.FIRST, | |
| cancer_type=CancerType.LUNG, | |
| age_at_diagnosis=68, | |
| ) | |
| ], | |
| ) | |
| score = self.model.compute_score(user) | |
| assert score != "N/A: Model is for current or former smokers only." | |
| assert "%" in score | |
| assert float(score.replace("%", "")) > 0 | |
| def test_never_smoker_handling(self): | |
| """Test that never smokers receive N/A response.""" | |
| never_smoker = UserInput( | |
| demographics=Demographics( | |
| age_years=55, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.NEVER, | |
| cigarettes_per_day=0, | |
| years_smoked=0, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| score = self.model.compute_score(never_smoker) | |
| assert score == "N/A: Model is for current or former smokers only." | |
| def test_validation_errors(self): | |
| """Test validation errors for missing required fields.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| # This should pass validation since all required fields are present | |
| score = self.model.compute_score(user) | |
| assert "%" in score | |
| def test_age_out_of_range(self): | |
| """Test age outside validated range raises ValueError.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=45, # Below minimum | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): | |
| self.model.compute_score(user) | |
| def test_age_validation_legacy(self): | |
| """Test age validation (50-80 range) - legacy behavior.""" | |
| # This test is now handled by input validation, so we expect ValueError | |
| young_user = UserInput( | |
| demographics=Demographics( | |
| age_years=49, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): | |
| self.model.compute_score(young_user) | |
| old_user = UserInput( | |
| demographics=Demographics( | |
| age_years=81, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): | |
| self.model.compute_score(old_user) | |
| def test_missing_bmi_data(self): | |
| """Test handling of missing BMI data.""" | |
| # This test is now handled by input validation since anthropometrics is required | |
| # We can't create a UserInput without anthropometrics due to Pydantic validation | |
| pass | |
| def test_missing_education_level(self): | |
| """Test handling of missing education level.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| # Missing education_level | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"): | |
| self.model.compute_score(user) | |
| def test_missing_smoking_intensity(self): | |
| """Test handling of missing smoking intensity.""" | |
| # This test is now handled by the model's internal validation | |
| # since 0 cigarettes per day causes a division by zero in the calculation | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=0, # This will cause division by zero | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| # The model should handle this gracefully and return an N/A message | |
| score = self.model.compute_score(user) | |
| assert "Calculation failed" in score | |
| def test_missing_smoking_duration(self): | |
| """Test handling of missing smoking duration.""" | |
| # This test is now handled by input validation since years_smoked >= 0 is required | |
| # The model will accept 0 years smoked as valid input | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=0, # This is valid input | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| # This should work fine with 0 years smoked | |
| score = self.model.compute_score(user) | |
| assert "%" in score | |
| def test_missing_quit_years_former_smoker(self): | |
| """Test handling of missing quit years for former smoker.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.FORMER, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, # This will trigger N/A message | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| score = self.model.compute_score(user) | |
| assert "Missing years since quitting for former smoker" in score | |
| def test_copd_detection(self): | |
| """Test COPD detection from chronic illnesses.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[ChronicCondition.COPD, ChronicCondition.DIABETES], | |
| ), | |
| family_history=[], | |
| ) | |
| score = self.model.compute_score(user) | |
| assert "%" in score | |
| def test_family_history_lung_cancer_detection(self): | |
| """Test lung cancer family history detection.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=Ethnicity.WHITE, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[ | |
| FamilyMemberCancer( | |
| relation=FamilyRelation.MOTHER, | |
| side=FamilySide.MATERNAL, | |
| degree=RelationshipDegree.FIRST, | |
| cancer_type=CancerType.LUNG, | |
| age_at_diagnosis=65, | |
| ), | |
| FamilyMemberCancer( | |
| relation=FamilyRelation.MATERNAL_UNCLE, | |
| side=FamilySide.MATERNAL, | |
| degree=RelationshipDegree.SECOND, | |
| cancer_type=CancerType.LUNG, | |
| age_at_diagnosis=70, | |
| ), # Should not count (not first-degree relative) | |
| ], | |
| ) | |
| score = self.model.compute_score(user) | |
| assert "%" in score | |
| def test_race_handling(self): | |
| """Test different race/ethnicity handling.""" | |
| races = [ | |
| Ethnicity.WHITE, | |
| Ethnicity.BLACK, | |
| Ethnicity.HISPANIC, | |
| Ethnicity.ASIAN, | |
| Ethnicity.PACIFIC_ISLANDER, | |
| ] | |
| for race in races: | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=60, | |
| sex=Sex.MALE, | |
| ethnicity=race, | |
| anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0), | |
| education_level=4, | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory( | |
| status=SmokingStatus.CURRENT, | |
| cigarettes_per_day=20, | |
| years_smoked=25, | |
| years_since_quit=None, | |
| ), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| score = self.model.compute_score(user) | |
| assert "%" in score | |
| assert float(score.replace("%", "")) > 0 | |
| def test_model_metadata(self): | |
| """Test model metadata methods.""" | |
| assert self.model.name == "plcom2012" | |
| assert self.model.cancer_type() == "lung" | |
| assert "PLCOm2012" in self.model.description() | |
| assert "6-year" in self.model.description() | |
| assert "percentage chance" in self.model.interpretation() | |
| assert isinstance(self.model.references(), list) | |
| assert len(self.model.references()) > 0 | |
| assert "Tammemägi" in self.model.references()[0] | |
| def test_smoking_status_encoding(self): | |
| """Test smoking status encoding (current=0, former=1).""" | |
| # Test current smoker | |
| current_input = dict( | |
| age=60, | |
| race="white", | |
| education=4, | |
| bmi=25.0, | |
| copd=0, | |
| cancer_hist=0, | |
| family_hist_lung_cancer=0, | |
| smoking_status=0, | |
| smoking_intensity=20, | |
| duration_smoking=25, | |
| smoking_quit_time=0, | |
| ) | |
| current_risk = self.model.calculate_risk(**current_input) | |
| # Test former smoker (same parameters except status and quit time) | |
| former_input = dict( | |
| age=60, | |
| race="white", | |
| education=4, | |
| bmi=25.0, | |
| copd=0, | |
| cancer_hist=0, | |
| family_hist_lung_cancer=0, | |
| smoking_status=1, | |
| smoking_intensity=20, | |
| duration_smoking=25, | |
| smoking_quit_time=5, | |
| ) | |
| former_risk = self.model.calculate_risk(**former_input) | |
| # Both should be positive numbers | |
| assert current_risk > 0 | |
| assert former_risk > 0 | |
| def test_smoking_intensity_transformation(self): | |
| """Test smoking intensity transformation ((intensity/10)^-1).""" | |
| # Test with different intensities | |
| intensities = [10, 20, 30, 40] | |
| risks = [] | |
| for intensity in intensities: | |
| input_data = dict( | |
| age=60, | |
| race="white", | |
| education=4, | |
| bmi=25.0, | |
| copd=0, | |
| cancer_hist=0, | |
| family_hist_lung_cancer=0, | |
| smoking_status=0, | |
| smoking_intensity=intensity, | |
| duration_smoking=25, | |
| smoking_quit_time=0, | |
| ) | |
| risk = self.model.calculate_risk(**input_data) | |
| risks.append(risk) | |
| # All risks should be positive | |
| for risk in risks: | |
| assert risk > 0 | |