sentinel / tests /test_risk_models /test_gail_model.py
jeuko's picture
Sync from GitHub (main)
7638cbd verified
"""Tests for the Gail Breast Cancer Risk Model.
Ground truth values collected from: https://bcrisktool.cancer.gov/
"""
import pytest
from sentinel.risk_models import GailRiskModel
from sentinel.user_input import (
Anthropometrics,
BreastHealthHistory,
CancerType,
Demographics,
Ethnicity,
FamilyMemberCancer,
FamilyRelation,
FamilySide,
FemaleSpecific,
Lifestyle,
MenstrualHistory,
ParityHistory,
PersonalMedicalHistory,
RelationshipDegree,
Sex,
SmokingHistory,
SmokingStatus,
UserInput,
)
# Test cases with ground truth data from NCI BCRAT calculator
GROUND_TRUTH_CASES = [
{
"name": "low_risk",
"input": UserInput(
demographics=Demographics(
age_years=40,
sex=Sex.FEMALE,
ethnicity=Ethnicity.WHITE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=13),
parity=ParityHistory(
num_live_births=1,
age_at_first_live_birth=25,
),
breast_health=BreastHealthHistory(),
),
family_history=[],
),
"expected": 0.6,
},
{
"name": "average_risk",
"input": UserInput(
demographics=Demographics(
age_years=50,
sex=Sex.FEMALE,
ethnicity=Ethnicity.WHITE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=12),
parity=ParityHistory(
num_live_births=1,
age_at_first_live_birth=28,
),
breast_health=BreastHealthHistory(),
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
cancer_type=CancerType.BREAST,
age_at_diagnosis=55,
degree=RelationshipDegree.FIRST,
side=FamilySide.MATERNAL,
)
],
),
"expected": 2.2,
},
{
"name": "high_risk",
"input": UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.FEMALE,
ethnicity=Ethnicity.WHITE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=11),
parity=ParityHistory(
num_live_births=1,
age_at_first_live_birth=35,
),
breast_health=BreastHealthHistory(
num_biopsies=2,
atypical_hyperplasia=True,
),
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
cancer_type=CancerType.BREAST,
age_at_diagnosis=45,
degree=RelationshipDegree.FIRST,
side=FamilySide.MATERNAL,
),
FamilyMemberCancer(
relation=FamilyRelation.SISTER,
cancer_type=CancerType.BREAST,
age_at_diagnosis=50,
degree=RelationshipDegree.FIRST,
side=FamilySide.MATERNAL,
),
],
),
"expected": 10.9,
},
{
"name": "african_american",
"input": UserInput(
demographics=Demographics(
age_years=45,
sex=Sex.FEMALE,
ethnicity=Ethnicity.BLACK,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=12),
parity=ParityHistory(
num_live_births=1,
age_at_first_live_birth=22,
),
breast_health=BreastHealthHistory(),
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
cancer_type=CancerType.BREAST,
age_at_diagnosis=55,
degree=RelationshipDegree.FIRST,
side=FamilySide.MATERNAL,
)
],
),
"expected": 1.6,
},
{
"name": "hispanic_nulliparous",
"input": UserInput(
demographics=Demographics(
age_years=42,
sex=Sex.FEMALE,
ethnicity=Ethnicity.HISPANIC,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=14),
parity=ParityHistory(
num_live_births=0,
age_at_first_live_birth=None,
),
breast_health=BreastHealthHistory(),
),
family_history=[],
),
"expected": 0.9,
},
]
class TestGailModel:
"""Test suite for GailRiskModel."""
def setup_method(self):
"""Initialize GailRiskModel instance for testing."""
self.model = GailRiskModel()
@pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"])
def test_ground_truth_validation(self, case):
"""Test against NCI BCRAT ground truth results.
Args:
case: Parameterized ground truth case dict.
"""
user = case["input"]
fs = user.female_specific
age = user.demographics.age_years
projection_age = min(age + 5, 90)
num_biopsies = fs.breast_health.num_biopsies or 0
hyperplasia = 1 if (fs.breast_health.atypical_hyperplasia or False) else 0
age_menarche = fs.menstrual.age_at_menarche or 99
if (fs.parity.num_live_births or 0) > 0:
age_first_birth = fs.parity.age_at_first_live_birth or 98
else:
age_first_birth = 98
num_relatives = sum(
1
for fh in user.family_history
if fh.cancer_type == CancerType.BREAST
and fh.relation
in {FamilyRelation.MOTHER, FamilyRelation.SISTER, FamilyRelation.DAUGHTER}
)
race = 1
if user.demographics.ethnicity:
if user.demographics.ethnicity == Ethnicity.BLACK:
race = 2
elif user.demographics.ethnicity in {
Ethnicity.ASIAN,
Ethnicity.PACIFIC_ISLANDER,
}:
race = 3
elif user.demographics.ethnicity == Ethnicity.HISPANIC:
race = 6
calculated = self.model.absolute_risk(
age=age,
projection_age=projection_age,
num_biopsies=num_biopsies,
hyperplasia=hyperplasia,
age_menarche=age_menarche,
age_first_birth=age_first_birth,
num_relatives=num_relatives,
race=race,
)
expected = case["expected"]
assert calculated == pytest.approx(expected, abs=0.5)
def test_user_input_integration(self):
"""Test integration with UserInput model."""
user = UserInput(
demographics=Demographics(
age_years=45,
sex=Sex.FEMALE,
ethnicity=Ethnicity.WHITE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=12),
parity=ParityHistory(
num_live_births=2,
age_at_first_live_birth=25,
),
breast_health=BreastHealthHistory(),
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
cancer_type=CancerType.BREAST,
age_at_diagnosis=55,
degree=RelationshipDegree.FIRST,
side=FamilySide.MATERNAL,
)
],
)
score = self.model.compute_score(user)
assert score != "N/A: Missing female-specific information."
assert float(score) > 0
def test_male_patient_handling(self):
"""Test that male patients raise ValueError due to validation failure."""
male_user = UserInput(
demographics=Demographics(
age_years=45,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
)
# Male patients should now raise ValueError due to validation failure
with pytest.raises(ValueError) as exc_info:
self.model.compute_score(male_user)
assert "Invalid inputs for Gail" in str(exc_info.value)
assert "must be FEMALE" in str(exc_info.value)
def test_age_validation(self):
"""Test age validation (35-85 range)."""
young_user = UserInput(
demographics=Demographics(
age_years=34,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=13),
parity=ParityHistory(
num_live_births=1,
age_at_first_live_birth=25,
),
breast_health=BreastHealthHistory(),
),
)
with pytest.raises(
ValueError,
match=r"Invalid inputs for Gail.*age_years.*greater than or equal to 35",
):
self.model.compute_score(young_user)
old_user = UserInput(
demographics=Demographics(
age_years=86,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
female_specific=FemaleSpecific(
menstrual=MenstrualHistory(age_at_menarche=13),
parity=ParityHistory(
num_live_births=1,
age_at_first_live_birth=25,
),
breast_health=BreastHealthHistory(),
),
)
with pytest.raises(
ValueError,
match=r"Invalid inputs for Gail.*age_years.*less than or equal to 85",
):
self.model.compute_score(old_user)
def test_model_metadata(self):
"""Test model metadata methods."""
assert self.model.name == "gail"
assert self.model.cancer_type() == "breast"
assert (
"Gail Model" in self.model.description()
or "BCRAT" in self.model.description()
)
assert "1.67" in self.model.interpretation()
assert isinstance(self.model.references(), list)
assert len(self.model.references()) > 0
def test_male_patient_validation_rejection(self):
"""Test that male patients are rejected during validation."""
male_user = UserInput(
demographics=Demographics(
age_years=40,
sex=Sex.MALE, # Male patient should be rejected
ethnicity=Ethnicity.WHITE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
)
# Validation should fail before compute_score is called
is_valid, errors = self.model.validate_inputs(male_user)
assert not is_valid
assert len(errors) == 1
assert "Field 'demographics.sex': must be FEMALE" in errors[0]
def test_male_patient_compute_score_raises_error(self):
"""Test that compute_score raises ValueError for male patients."""
male_user = UserInput(
demographics=Demographics(
age_years=40,
sex=Sex.MALE,
ethnicity=Ethnicity.WHITE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
)
# compute_score should raise ValueError due to validation failure
with pytest.raises(ValueError) as exc_info:
self.model.compute_score(male_user)
assert "Invalid inputs for Gail" in str(exc_info.value)
assert "must be FEMALE" in str(exc_info.value)
def test_ethnicity_restriction_validation(self):
"""Test that unsupported ethnicities are rejected during validation."""
# Test with unsupported ethnicity
user = UserInput(
demographics=Demographics(
age_years=40,
sex=Sex.FEMALE,
ethnicity=Ethnicity.ASHKENAZI_JEWISH, # Not in supported list
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
)
# Validation should fail
is_valid, errors = self.model.validate_inputs(user)
assert not is_valid
assert len(errors) == 1
assert "Field 'demographics.ethnicity': Input should be" in errors[0]
assert "WHITE" in errors[0] and "BLACK" in errors[0] and "ASIAN" in errors[0]
def test_supported_ethnicities_pass_validation(self):
"""Test that all supported ethnicities pass validation."""
supported_ethnicities = [
Ethnicity.WHITE,
Ethnicity.BLACK,
Ethnicity.ASIAN,
Ethnicity.PACIFIC_ISLANDER,
Ethnicity.HISPANIC,
]
for ethnicity in supported_ethnicities:
user = UserInput(
demographics=Demographics(
age_years=40,
sex=Sex.FEMALE,
ethnicity=ethnicity,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
)
is_valid, errors = self.model.validate_inputs(user)
assert is_valid, f"Failed for ethnicity: {ethnicity}"
assert len(errors) == 0