"""Tests for the PLCOm2012 Lung Cancer Risk Model.

Ground truth values are calculated from authors' reference implementation in
https://brocku.ca/lung-cancer-screening-and-risk-prediction/risk-calculators/
and the reference implementation in R: https://github.com/resplab/PLCOm2012.
"""

import pytest

from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel
from sentinel.user_input import (
    Anthropometrics,
    CancerType,
    ChronicCondition,
    Demographics,
    Ethnicity,
    FamilyMemberCancer,
    FamilyRelation,
    FamilySide,
    Lifestyle,
    PersonalMedicalHistory,
    RelationshipDegree,
    Sex,
    SmokingHistory,
    SmokingStatus,
    UserInput,
)

# Test cases with calculated ground truth data (inline UserInput like Gail tests)
GROUND_TRUTH_CASES = [
    {
        "name": "low_risk_current_smoker",
        "input": UserInput(
            demographics=Demographics(
                age_years=55,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=25.0 * (1.75**2),
                ),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=10,
                    years_smoked=20,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[],
                previous_cancers=[],
            ),
            family_history=[],
        ),
        "expected": 0.31,
    },
    {
        "name": "moderate_risk_former_smoker",
        "input": UserInput(
            demographics=Demographics(
                age_years=62,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=27.0 * (1.75**2),
                ),
                education_level=3,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.FORMER,
                    cigarettes_per_day=20,
                    years_smoked=30,
                    years_since_quit=5,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[],
                previous_cancers=[],
            ),
            family_history=[],
        ),
        "expected": 1.24,
    },
    {
        "name": "high_risk_multiple_factors",
        "input": UserInput(
            demographics=Demographics(
                age_years=70,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=22.0 * (1.75**2),
                ),
                education_level=2,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=40,
                    years_smoked=45,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[ChronicCondition.COPD],
                previous_cancers=[CancerType.BREAST],
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    side=FamilySide.MATERNAL,
                    degree=RelationshipDegree.FIRST,
                    cancer_type=CancerType.LUNG,
                    age_at_diagnosis=65,
                )
            ],
        ),
        "expected": 31.19,
    },
    {
        "name": "black_race_variant",
        "input": UserInput(
            demographics=Demographics(
                age_years=58,
                sex=Sex.MALE,
                ethnicity=Ethnicity.BLACK,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=28.0 * (1.75**2),
                ),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.FORMER,
                    cigarettes_per_day=15,
                    years_smoked=25,
                    years_since_quit=8,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[],
                previous_cancers=[],
            ),
            family_history=[],
        ),
        "expected": 0.696,
    },
    {
        "name": "hispanic_low_education",
        "input": UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.HISPANIC,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=30.0 * (1.75**2),
                ),
                education_level=1,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=25,
                    years_smoked=35,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[],
                previous_cancers=[],
            ),
            family_history=[],
        ),
        "expected": 1.161,
    },
    {
        "name": "asian_former_heavy_smoker",
        "input": UserInput(
            demographics=Demographics(
                age_years=65,
                sex=Sex.MALE,
                ethnicity=Ethnicity.ASIAN,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=24.0 * (1.75**2),
                ),
                education_level=5,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.FORMER,
                    cigarettes_per_day=25,
                    years_smoked=35,
                    years_since_quit=3,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[ChronicCondition.COPD],
                previous_cancers=[],
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    side=FamilySide.MATERNAL,
                    degree=RelationshipDegree.FIRST,
                    cancer_type=CancerType.LUNG,
                    age_at_diagnosis=65,
                )
            ],
        ),
        "expected": 3.40,
    },
]


class TestPLCOm2012Model:
    """Test suite for PLCOm2012RiskModel."""

    def setup_method(self):
        """Initialize PLCOm2012RiskModel instance for testing."""
        self.model = PLCOm2012RiskModel()

    @pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"])
    def test_ground_truth_validation(self, case):
        """Test against calculated ground truth results.

        Args:
            case: Parameterized ground truth case dict.
        """
        user = case["input"]
        score_str = self.model.compute_score(user)
        calculated = float(score_str.rstrip("%"))
        expected = case["expected"]

        # Using tight tolerance since these are calculated values
        assert calculated == pytest.approx(expected, abs=0.01)

    def test_user_input_integration_current_smoker(self):
        """Test integration with UserInput model for current smoker."""
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(
                    height_cm=175.0,
                    weight_kg=80.0,
                ),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[],
                previous_cancers=[],
            ),
            family_history=[],
        )

        score = self.model.compute_score(user)
        assert score != "N/A: Model is for current or former smokers only."
        assert "%" in score
        assert float(score.replace("%", "")) > 0

    def test_user_input_integration_former_smoker(self):
        """Test integration with UserInput model for former smoker."""
        user = UserInput(
            demographics=Demographics(
                age_years=65,
                sex=Sex.FEMALE,
                ethnicity=Ethnicity.BLACK,
                anthropometrics=Anthropometrics(
                    height_cm=160.0,
                    weight_kg=70.0,
                ),
                education_level=3,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.FORMER,
                    cigarettes_per_day=15,
                    years_smoked=30,
                    years_since_quit=10,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[ChronicCondition.COPD],
                previous_cancers=[CancerType.BREAST],
            ),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.FATHER,
                    side=FamilySide.PATERNAL,
                    degree=RelationshipDegree.FIRST,
                    cancer_type=CancerType.LUNG,
                    age_at_diagnosis=68,
                )
            ],
        )

        score = self.model.compute_score(user)
        assert score != "N/A: Model is for current or former smokers only."
        assert "%" in score
        assert float(score.replace("%", "")) > 0

    def test_never_smoker_handling(self):
        """Test that never smokers receive N/A response."""
        never_smoker = UserInput(
            demographics=Demographics(
                age_years=55,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.NEVER,
                    cigarettes_per_day=0,
                    years_smoked=0,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        score = self.model.compute_score(never_smoker)
        assert score == "N/A: Model is for current or former smokers only."

    def test_validation_errors(self):
        """Test validation errors for missing required fields."""
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        # This should pass validation since all required fields are present
        score = self.model.compute_score(user)
        assert "%" in score

    def test_age_out_of_range(self):
        """Test age outside validated range raises ValueError."""
        user = UserInput(
            demographics=Demographics(
                age_years=45,  # Below minimum
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
            self.model.compute_score(user)

    def test_age_validation_legacy(self):
        """Test age validation (50-80 range) - legacy behavior."""
        # This test is now handled by input validation, so we expect ValueError
        young_user = UserInput(
            demographics=Demographics(
                age_years=49,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )
        with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
            self.model.compute_score(young_user)

        old_user = UserInput(
            demographics=Demographics(
                age_years=81,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )
        with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
            self.model.compute_score(old_user)

    def test_missing_bmi_data(self):
        """Test handling of missing BMI data."""
        # This test is now handled by input validation since anthropometrics is required
        # We can't create a UserInput without anthropometrics due to Pydantic validation
        pass

    def test_missing_education_level(self):
        """Test handling of missing education level."""
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                # Missing education_level
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        with pytest.raises(ValueError, match=r"Invalid inputs for PLCOm2012:"):
            self.model.compute_score(user)

    def test_missing_smoking_intensity(self):
        """Test handling of missing smoking intensity."""
        # This test is now handled by the model's internal validation
        # since 0 cigarettes per day causes a division by zero in the calculation
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=0,  # This will cause division by zero
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        # The model should handle this gracefully and return an N/A message
        score = self.model.compute_score(user)
        assert "Calculation failed" in score

    def test_missing_smoking_duration(self):
        """Test handling of missing smoking duration."""
        # This test is now handled by input validation since years_smoked >= 0 is required
        # The model will accept 0 years smoked as valid input
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=0,  # This is valid input
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        # This should work fine with 0 years smoked
        score = self.model.compute_score(user)
        assert "%" in score

    def test_missing_quit_years_former_smoker(self):
        """Test handling of missing quit years for former smoker."""
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.FORMER,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,  # This will trigger N/A message
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[],
        )

        score = self.model.compute_score(user)
        assert "Missing years since quitting for former smoker" in score

    def test_copd_detection(self):
        """Test COPD detection from chronic illnesses."""
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(
                chronic_conditions=[ChronicCondition.COPD, ChronicCondition.DIABETES],
            ),
            family_history=[],
        )

        score = self.model.compute_score(user)
        assert "%" in score

    def test_family_history_lung_cancer_detection(self):
        """Test lung cancer family history detection."""
        user = UserInput(
            demographics=Demographics(
                age_years=60,
                sex=Sex.MALE,
                ethnicity=Ethnicity.WHITE,
                anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                education_level=4,
            ),
            lifestyle=Lifestyle(
                smoking=SmokingHistory(
                    status=SmokingStatus.CURRENT,
                    cigarettes_per_day=20,
                    years_smoked=25,
                    years_since_quit=None,
                ),
            ),
            personal_medical_history=PersonalMedicalHistory(),
            family_history=[
                FamilyMemberCancer(
                    relation=FamilyRelation.MOTHER,
                    side=FamilySide.MATERNAL,
                    degree=RelationshipDegree.FIRST,
                    cancer_type=CancerType.LUNG,
                    age_at_diagnosis=65,
                ),
                FamilyMemberCancer(
                    relation=FamilyRelation.MATERNAL_UNCLE,
                    side=FamilySide.MATERNAL,
                    degree=RelationshipDegree.SECOND,
                    cancer_type=CancerType.LUNG,
                    age_at_diagnosis=70,
                ),  # Should not count (not first-degree relative)
            ],
        )

        score = self.model.compute_score(user)
        assert "%" in score

    def test_race_handling(self):
        """Test different race/ethnicity handling."""
        races = [
            Ethnicity.WHITE,
            Ethnicity.BLACK,
            Ethnicity.HISPANIC,
            Ethnicity.ASIAN,
            Ethnicity.PACIFIC_ISLANDER,
        ]

        for race in races:
            user = UserInput(
                demographics=Demographics(
                    age_years=60,
                    sex=Sex.MALE,
                    ethnicity=race,
                    anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
                    education_level=4,
                ),
                lifestyle=Lifestyle(
                    smoking=SmokingHistory(
                        status=SmokingStatus.CURRENT,
                        cigarettes_per_day=20,
                        years_smoked=25,
                        years_since_quit=None,
                    ),
                ),
                personal_medical_history=PersonalMedicalHistory(),
                family_history=[],
            )

            score = self.model.compute_score(user)
            assert "%" in score
            assert float(score.replace("%", "")) > 0

    def test_model_metadata(self):
        """Test model metadata methods."""
        assert self.model.name == "plcom2012"
        assert self.model.cancer_type() == "lung"
        assert "PLCOm2012" in self.model.description()
        assert "6-year" in self.model.description()
        assert "percentage chance" in self.model.interpretation()
        assert isinstance(self.model.references(), list)
        assert len(self.model.references()) > 0
        assert "Tammemägi" in self.model.references()[0]

    def test_smoking_status_encoding(self):
        """Test smoking status encoding (current=0, former=1)."""
        # Test current smoker
        current_input = dict(
            age=60,
            race="white",
            education=4,
            bmi=25.0,
            copd=0,
            cancer_hist=0,
            family_hist_lung_cancer=0,
            smoking_status=0,
            smoking_intensity=20,
            duration_smoking=25,
            smoking_quit_time=0,
        )
        current_risk = self.model.calculate_risk(**current_input)

        # Test former smoker (same parameters except status and quit time)
        former_input = dict(
            age=60,
            race="white",
            education=4,
            bmi=25.0,
            copd=0,
            cancer_hist=0,
            family_hist_lung_cancer=0,
            smoking_status=1,
            smoking_intensity=20,
            duration_smoking=25,
            smoking_quit_time=5,
        )
        former_risk = self.model.calculate_risk(**former_input)

        # Both should be positive numbers
        assert current_risk > 0
        assert former_risk > 0

    def test_smoking_intensity_transformation(self):
        """Test smoking intensity transformation ((intensity/10)^-1)."""
        # Test with different intensities
        intensities = [10, 20, 30, 40]
        risks = []

        for intensity in intensities:
            input_data = dict(
                age=60,
                race="white",
                education=4,
                bmi=25.0,
                copd=0,
                cancer_hist=0,
                family_hist_lung_cancer=0,
                smoking_status=0,
                smoking_intensity=intensity,
                duration_smoking=25,
                smoking_quit_time=0,
            )
            risk = self.model.calculate_risk(**input_data)
            risks.append(risk)

        # All risks should be positive
        for risk in risks:
            assert risk > 0