Spaces:

InstaDeepAI
/

sentinel

Runtime error

File size: 16,893 Bytes

"""Tests for the Gail Breast Cancer Risk Model.

Ground truth values collected from: https://bcrisktool.cancer.gov/
"""

import pytest

from sentinel.risk_models import GailRiskModel
from sentinel.user_input import (
    Anthropometrics,
    BreastHealthHistory,
    CancerType,
    Demographics,
    Ethnicity,
    FamilyMemberCancer,
    FamilyRelation,
    FamilySide,
    FemaleSpecific,
    Lifestyle,
    MenstrualHistory,
    ParityHistory,
    PersonalMedicalHistory,
    RelationshipDegree,
    Sex,
    SmokingHistory,
    SmokingStatus,
    UserInput,
)

# Test cases with ground truth data from NCI BCRAT calculator
GROUND_TRUTH_CASES = [
    {
        "name": "low_risk",
        "input": UserInput(
            demographics=Demographics(
                age_years=40,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=13),
                parity=ParityHistory(
                    num_live_births=1,
                    age_at_first_live_birth=25,
                ),
                breast_health=BreastHealthHistory(),
            ),
            family_history=[],
        ),
        "expected": 0.6,
    },
    {
        "name": "average_risk",
        "input": UserInput(
            demographics=Demographics(
                age_years=50,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=12),
                parity=ParityHistory(
                    num_live_births=1,
                    age_at_first_live_birth=28,
                ),
                breast_health=BreastHealthHistory(),
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    cancer_type=CancerType.BREAST,
                    age_at_diagnosis=55,
                    degree=RelationshipDegree.FIRST,
                    side=FamilySide.MATERNAL,
                )
            ],
        ),
        "expected": 2.2,
    },
    {
        "name": "high_risk",
        "input": UserInput(
            demographics=Demographics(
                age_years=55,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=11),
                parity=ParityHistory(
                    num_live_births=1,
                    age_at_first_live_birth=35,
                ),
                breast_health=BreastHealthHistory(
                    num_biopsies=2,
                    atypical_hyperplasia=True,
                ),
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    cancer_type=CancerType.BREAST,
                    age_at_diagnosis=45,
                    degree=RelationshipDegree.FIRST,
                    side=FamilySide.MATERNAL,
                ),
                FamilyMemberCancer(
                    relation=FamilyRelation.SISTER,
                    cancer_type=CancerType.BREAST,
                    age_at_diagnosis=50,
                    degree=RelationshipDegree.FIRST,
                    side=FamilySide.MATERNAL,
                ),
            ],
        ),
        "expected": 10.9,
    },
    {
        "name": "african_american",
        "input": UserInput(
            demographics=Demographics(
                age_years=45,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.BLACK,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=12),
                parity=ParityHistory(
                    num_live_births=1,
                    age_at_first_live_birth=22,
                ),
                breast_health=BreastHealthHistory(),
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    cancer_type=CancerType.BREAST,
                    age_at_diagnosis=55,
                    degree=RelationshipDegree.FIRST,
                    side=FamilySide.MATERNAL,
                )
            ],
        ),
        "expected": 1.6,
    },
    {
        "name": "hispanic_nulliparous",
        "input": UserInput(
            demographics=Demographics(
                age_years=42,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.HISPANIC,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=14),
                parity=ParityHistory(
                    num_live_births=0,
                    age_at_first_live_birth=None,
                ),
                breast_health=BreastHealthHistory(),
            ),
            family_history=[],
        ),
        "expected": 0.9,
    },
]


class TestGailModel:
    """Test suite for GailRiskModel."""

    def setup_method(self):
        """Initialize GailRiskModel instance for testing."""
        self.model = GailRiskModel()

    @pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"])
    def test_ground_truth_validation(self, case):
        """Test against NCI BCRAT ground truth results.

        Args:
            case: Parameterized ground truth case dict.
        """
        user = case["input"]
        fs = user.female_specific
        age = user.demographics.age_years
        projection_age = min(age + 5, 90)
        num_biopsies = fs.breast_health.num_biopsies or 0
        hyperplasia = 1 if (fs.breast_health.atypical_hyperplasia or False) else 0
        age_menarche = fs.menstrual.age_at_menarche or 99
        if (fs.parity.num_live_births or 0) > 0:
            age_first_birth = fs.parity.age_at_first_live_birth or 98
        else:
            age_first_birth = 98
        num_relatives = sum(
            1
            for fh in user.family_history
            if fh.cancer_type == CancerType.BREAST
            and fh.relation
            in {FamilyRelation.MOTHER, FamilyRelation.SISTER, FamilyRelation.DAUGHTER}
        )
        race = 1
        if user.demographics.ethnicity:
            if user.demographics.ethnicity == Ethnicity.BLACK:
                race = 2
            elif user.demographics.ethnicity in {
                Ethnicity.ASIAN,
                Ethnicity.PACIFIC_ISLANDER,
            }:
                race = 3
            elif user.demographics.ethnicity == Ethnicity.HISPANIC:
                race = 6

        calculated = self.model.absolute_risk(
            age=age,
            projection_age=projection_age,
            num_biopsies=num_biopsies,
            hyperplasia=hyperplasia,
            age_menarche=age_menarche,
            age_first_birth=age_first_birth,
            num_relatives=num_relatives,
            race=race,
        )
        expected = case["expected"]

        assert calculated == pytest.approx(expected, abs=0.5)

    def test_user_input_integration(self):
        """Test integration with UserInput model."""
        user = UserInput(
            demographics=Demographics(
                age_years=45,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=12),
                parity=ParityHistory(
                    num_live_births=2,
                    age_at_first_live_birth=25,
                ),
                breast_health=BreastHealthHistory(),
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    cancer_type=CancerType.BREAST,
                    age_at_diagnosis=55,
                    degree=RelationshipDegree.FIRST,
                    side=FamilySide.MATERNAL,
                )
            ],
        )

        score = self.model.compute_score(user)
        assert score != "N/A: Missing female-specific information."
        assert float(score) > 0

    def test_male_patient_handling(self):
        """Test that male patients raise ValueError due to validation failure."""
        male_user = UserInput(
            demographics=Demographics(
                age_years=45,
                sex=Sex.MALE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
        )

        # Male patients should now raise ValueError due to validation failure
        with pytest.raises(ValueError) as exc_info:
            self.model.compute_score(male_user)

        assert "Invalid inputs for Gail" in str(exc_info.value)
        assert "must be FEMALE" in str(exc_info.value)

    def test_age_validation(self):
        """Test age validation (35-85 range)."""
        young_user = UserInput(
            demographics=Demographics(
                age_years=34,
                sex=Sex.FEMALE,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=13),
                parity=ParityHistory(
                    num_live_births=1,
                    age_at_first_live_birth=25,
                ),
                breast_health=BreastHealthHistory(),
            ),
        )
        with pytest.raises(
            ValueError,
            match=r"Invalid inputs for Gail.*age_years.*greater than or equal to 35",
        ):
            self.model.compute_score(young_user)

        old_user = UserInput(
            demographics=Demographics(
                age_years=86,
                sex=Sex.FEMALE,
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            female_specific=FemaleSpecific(
                menstrual=MenstrualHistory(age_at_menarche=13),
                parity=ParityHistory(
                    num_live_births=1,
                    age_at_first_live_birth=25,
                ),
                breast_health=BreastHealthHistory(),
            ),
        )
        with pytest.raises(
            ValueError,
            match=r"Invalid inputs for Gail.*age_years.*less than or equal to 85",
        ):
            self.model.compute_score(old_user)

    def test_model_metadata(self):
        """Test model metadata methods."""
        assert self.model.name == "gail"
        assert self.model.cancer_type() == "breast"
        assert (
            "Gail Model" in self.model.description()
            or "BCRAT" in self.model.description()
        )
        assert "1.67" in self.model.interpretation()
        assert isinstance(self.model.references(), list)
        assert len(self.model.references()) > 0

    def test_male_patient_validation_rejection(self):
        """Test that male patients are rejected during validation."""
        male_user = UserInput(
            demographics=Demographics(
                age_years=40,
                sex=Sex.MALE,  # Male patient should be rejected
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
        )

        # Validation should fail before compute_score is called
        is_valid, errors = self.model.validate_inputs(male_user)
        assert not is_valid
        assert len(errors) == 1
        assert "Field 'demographics.sex': must be FEMALE" in errors[0]

    def test_male_patient_compute_score_raises_error(self):
        """Test that compute_score raises ValueError for male patients."""
        male_user = UserInput(
            demographics=Demographics(
                age_years=40,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
        )

        # compute_score should raise ValueError due to validation failure
        with pytest.raises(ValueError) as exc_info:
            self.model.compute_score(male_user)

        assert "Invalid inputs for Gail" in str(exc_info.value)
        assert "must be FEMALE" in str(exc_info.value)

    def test_ethnicity_restriction_validation(self):
        """Test that unsupported ethnicities are rejected during validation."""
        # Test with unsupported ethnicity
        user = UserInput(
            demographics=Demographics(
                age_years=40,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.ASHKENAZI_JEWISH,  # Not in supported list
                anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(status=SmokingStatus.NEVER),
            ),
            personal_medical_history=PersonalMedicalHistory(),
        )

        # Validation should fail
        is_valid, errors = self.model.validate_inputs(user)
        assert not is_valid
        assert len(errors) == 1
        assert "Field 'demographics.ethnicity': Input should be" in errors[0]
        assert "WHITE" in errors[0] and "BLACK" in errors[0] and "ASIAN" in errors[0]

    def test_supported_ethnicities_pass_validation(self):
        """Test that all supported ethnicities pass validation."""
        supported_ethnicities = [
            Ethnicity.WHITE,
            Ethnicity.BLACK,
            Ethnicity.ASIAN,
            Ethnicity.PACIFIC_ISLANDER,
            Ethnicity.HISPANIC,
        ]

        for ethnicity in supported_ethnicities:
            user = UserInput(
                demographics=Demographics(
                    age_years=40,
                    sex=Sex.FEMALE,
                    ethnicity=ethnicity,
                    anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=65.0),
                ),
                lifestyle=Lifestyle(
                    smoking=SmokingHistory(status=SmokingStatus.NEVER),
                ),
                personal_medical_history=PersonalMedicalHistory(),
            )

            is_valid, errors = self.model.validate_inputs(user)
            assert is_valid, f"Failed for ethnicity: {ethnicity}"
            assert len(errors) == 0