"""Tests for the Gail Breast Cancer Risk Model. Ground truth values collected from: https://bcrisktool.cancer.gov/ """ import pytest from sentinel.risk_models import GailRiskModel from sentinel.user_input import ( Anthropometrics, BreastHealthHistory, CancerType, Demographics, Ethnicity, FamilyMemberCancer, FamilyRelation, FamilySide, FemaleSpecific, Lifestyle, MenstrualHistory, ParityHistory, PersonalMedicalHistory, RelationshipDegree, Sex, SmokingHistory, SmokingStatus, UserInput, ) # Test cases with ground truth data from NCI BCRAT calculator GROUND_TRUTH_CASES = [ { "name": "low_risk", "input": UserInput( demographics=Demographics( age_years=40, sex=Sex.FEMALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=13), parity=ParityHistory( num_live_births=1, age_at_first_live_birth=25, ), breast_health=BreastHealthHistory(), ), family_history=[], ), "expected": 0.6, }, { "name": "average_risk", "input": UserInput( demographics=Demographics( age_years=50, sex=Sex.FEMALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=12), parity=ParityHistory( num_live_births=1, age_at_first_live_birth=28, ), breast_health=BreastHealthHistory(), ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, cancer_type=CancerType.BREAST, age_at_diagnosis=55, degree=RelationshipDegree.FIRST, side=FamilySide.MATERNAL, ) ], ), "expected": 2.2, }, { "name": "high_risk", "input": UserInput( demographics=Demographics( age_years=55, sex=Sex.FEMALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=11), parity=ParityHistory( num_live_births=1, age_at_first_live_birth=35, ), breast_health=BreastHealthHistory( num_biopsies=2, atypical_hyperplasia=True, ), ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, cancer_type=CancerType.BREAST, age_at_diagnosis=45, degree=RelationshipDegree.FIRST, side=FamilySide.MATERNAL, ), FamilyMemberCancer( relation=FamilyRelation.SISTER, cancer_type=CancerType.BREAST, age_at_diagnosis=50, degree=RelationshipDegree.FIRST, side=FamilySide.MATERNAL, ), ], ), "expected": 10.9, }, { "name": "african_american", "input": UserInput( demographics=Demographics( age_years=45, sex=Sex.FEMALE, ethnicity=Ethnicity.BLACK, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=12), parity=ParityHistory( num_live_births=1, age_at_first_live_birth=22, ), breast_health=BreastHealthHistory(), ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, cancer_type=CancerType.BREAST, age_at_diagnosis=55, degree=RelationshipDegree.FIRST, side=FamilySide.MATERNAL, ) ], ), "expected": 1.6, }, { "name": "hispanic_nulliparous", "input": UserInput( demographics=Demographics( age_years=42, sex=Sex.FEMALE, ethnicity=Ethnicity.HISPANIC, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=14), parity=ParityHistory( num_live_births=0, age_at_first_live_birth=None, ), breast_health=BreastHealthHistory(), ), family_history=[], ), "expected": 0.9, }, ] class TestGailModel: """Test suite for GailRiskModel.""" def setup_method(self): """Initialize GailRiskModel instance for testing.""" self.model = GailRiskModel() @pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"]) def test_ground_truth_validation(self, case): """Test against NCI BCRAT ground truth results. Args: case: Parameterized ground truth case dict. """ user = case["input"] fs = user.female_specific age = user.demographics.age_years projection_age = min(age + 5, 90) num_biopsies = fs.breast_health.num_biopsies or 0 hyperplasia = 1 if (fs.breast_health.atypical_hyperplasia or False) else 0 age_menarche = fs.menstrual.age_at_menarche or 99 if (fs.parity.num_live_births or 0) > 0: age_first_birth = fs.parity.age_at_first_live_birth or 98 else: age_first_birth = 98 num_relatives = sum( 1 for fh in user.family_history if fh.cancer_type == CancerType.BREAST and fh.relation in {FamilyRelation.MOTHER, FamilyRelation.SISTER, FamilyRelation.DAUGHTER} ) race = 1 if user.demographics.ethnicity: if user.demographics.ethnicity == Ethnicity.BLACK: race = 2 elif user.demographics.ethnicity in { Ethnicity.ASIAN, Ethnicity.PACIFIC_ISLANDER, }: race = 3 elif user.demographics.ethnicity == Ethnicity.HISPANIC: race = 6 calculated = self.model.absolute_risk( age=age, projection_age=projection_age, num_biopsies=num_biopsies, hyperplasia=hyperplasia, age_menarche=age_menarche, age_first_birth=age_first_birth, num_relatives=num_relatives, race=race, ) expected = case["expected"] assert calculated == pytest.approx(expected, abs=0.5) def test_user_input_integration(self): """Test integration with UserInput model.""" user = UserInput( demographics=Demographics( age_years=45, sex=Sex.FEMALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=12), parity=ParityHistory( num_live_births=2, age_at_first_live_birth=25, ), breast_health=BreastHealthHistory(), ), family_history=[ FamilyMemberCancer( relation=FamilyRelation.MOTHER, cancer_type=CancerType.BREAST, age_at_diagnosis=55, degree=RelationshipDegree.FIRST, side=FamilySide.MATERNAL, ) ], ) score = self.model.compute_score(user) assert score != "N/A: Missing female-specific information." assert float(score) > 0 def test_male_patient_handling(self): """Test that male patients raise ValueError due to validation failure.""" male_user = UserInput( demographics=Demographics( age_years=45, sex=Sex.MALE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), ) # Male patients should now raise ValueError due to validation failure with pytest.raises(ValueError) as exc_info: self.model.compute_score(male_user) assert "Invalid inputs for Gail" in str(exc_info.value) assert "must be FEMALE" in str(exc_info.value) def test_age_validation(self): """Test age validation (35-85 range).""" young_user = UserInput( demographics=Demographics( age_years=34, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=13), parity=ParityHistory( num_live_births=1, age_at_first_live_birth=25, ), breast_health=BreastHealthHistory(), ), ) with pytest.raises( ValueError, match=r"Invalid inputs for Gail.*age_years.*greater than or equal to 35", ): self.model.compute_score(young_user) old_user = UserInput( demographics=Demographics( age_years=86, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), female_specific=FemaleSpecific( menstrual=MenstrualHistory(age_at_menarche=13), parity=ParityHistory( num_live_births=1, age_at_first_live_birth=25, ), breast_health=BreastHealthHistory(), ), ) with pytest.raises( ValueError, match=r"Invalid inputs for Gail.*age_years.*less than or equal to 85", ): self.model.compute_score(old_user) def test_model_metadata(self): """Test model metadata methods.""" assert self.model.name == "gail" assert self.model.cancer_type() == "breast" assert ( "Gail Model" in self.model.description() or "BCRAT" in self.model.description() ) assert "1.67" in self.model.interpretation() assert isinstance(self.model.references(), list) assert len(self.model.references()) > 0 def test_male_patient_validation_rejection(self): """Test that male patients are rejected during validation.""" male_user = UserInput( demographics=Demographics( age_years=40, sex=Sex.MALE, # Male patient should be rejected ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), ) # Validation should fail before compute_score is called is_valid, errors = self.model.validate_inputs(male_user) assert not is_valid assert len(errors) == 1 assert "Field 'demographics.sex': must be FEMALE" in errors[0] def test_male_patient_compute_score_raises_error(self): """Test that compute_score raises ValueError for male patients.""" male_user = UserInput( demographics=Demographics( age_years=40, sex=Sex.MALE, ethnicity=Ethnicity.WHITE, anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), ) # compute_score should raise ValueError due to validation failure with pytest.raises(ValueError) as exc_info: self.model.compute_score(male_user) assert "Invalid inputs for Gail" in str(exc_info.value) assert "must be FEMALE" in str(exc_info.value) def test_ethnicity_restriction_validation(self): """Test that unsupported ethnicities are rejected during validation.""" # Test with unsupported ethnicity user = UserInput( demographics=Demographics( age_years=40, sex=Sex.FEMALE, ethnicity=Ethnicity.ASHKENAZI_JEWISH, # Not in supported list anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), ) # Validation should fail is_valid, errors = self.model.validate_inputs(user) assert not is_valid assert len(errors) == 1 assert "Field 'demographics.ethnicity': Input should be" in errors[0] assert "WHITE" in errors[0] and "BLACK" in errors[0] and "ASIAN" in errors[0] def test_supported_ethnicities_pass_validation(self): """Test that all supported ethnicities pass validation.""" supported_ethnicities = [ Ethnicity.WHITE, Ethnicity.BLACK, Ethnicity.ASIAN, Ethnicity.PACIFIC_ISLANDER, Ethnicity.HISPANIC, ] for ethnicity in supported_ethnicities: user = UserInput( demographics=Demographics( age_years=40, sex=Sex.FEMALE, ethnicity=ethnicity, anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), ) is_valid, errors = self.model.validate_inputs(user) assert is_valid, f"Failed for ethnicity: {ethnicity}" assert len(errors) == 0