sentinel / tests /test_risk_models /test_llpi_model.py
jeuko's picture
Sync from GitHub (main)
f6b7a59 verified
"""Tests for the Liverpool Lung Project improved (LLPi) Risk Model.
Ground truth values are calculated from the reference R implementation
in lcmodels/R/lcmodels.R (risk.llpi function).
"""
import pytest
from sentinel.risk_models.llpi import LLPiRiskModel
from sentinel.user_input import (
Anthropometrics,
CancerType,
ChronicCondition,
Demographics,
FamilyMemberCancer,
FamilyRelation,
FamilySide,
Lifestyle,
PersonalMedicalHistory,
RelationshipDegree,
Sex,
SmokingHistory,
SmokingStatus,
UserInput,
)
# Ground truth test cases generated from R implementation
GROUND_TRUTH_CASES = [
{
"name": "low_risk_young_female",
"input": UserInput(
demographics=Demographics(
age_years=50,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=70.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
years_smoked=10,
cigarettes_per_day=10,
years_since_quit=5,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[],
),
"expected": 0.73,
},
{
"name": "moderate_risk_male",
"input": UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
years_smoked=25,
cigarettes_per_day=20,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[],
),
"expected": 2.91,
},
{
"name": "high_risk_copd",
"input": UserInput(
demographics=Demographics(
age_years=70,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=75.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
years_smoked=45,
cigarettes_per_day=30,
years_since_quit=2,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[ChronicCondition.COPD],
previous_cancers=[],
),
family_history=[],
),
"expected": 21.62,
},
{
"name": "high_risk_prior_cancer",
"input": UserInput(
demographics=Demographics(
age_years=65,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=78.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
years_smoked=35,
cigarettes_per_day=25,
years_since_quit=5,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[CancerType.PROSTATE],
),
family_history=[],
),
"expected": 14.31,
},
{
"name": "early_onset_family_history",
"input": UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165.0, weight_kg=68.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
years_smoked=20,
cigarettes_per_day=15,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
side=FamilySide.MATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=55, # Early onset (< 60)
)
],
),
"expected": 2.24,
},
{
"name": "late_onset_family_history",
"input": UserInput(
demographics=Demographics(
age_years=58,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=178.0, weight_kg=82.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
years_smoked=30,
cigarettes_per_day=20,
years_since_quit=3,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.FATHER,
side=FamilySide.PATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=65, # Late onset (>= 60)
)
],
),
"expected": 3.59,
},
{
"name": "multiple_risk_factors",
"input": UserInput(
demographics=Demographics(
age_years=68,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=172.0, weight_kg=76.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
years_smoked=40,
cigarettes_per_day=30,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[ChronicCondition.COPD],
previous_cancers=[CancerType.PROSTATE],
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
side=FamilySide.MATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=58, # Early onset
)
],
),
"expected": 58.29,
},
{
"name": "female_copd_family_history",
"input": UserInput(
demographics=Demographics(
age_years=62,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=162.0, weight_kg=65.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
years_smoked=28,
cigarettes_per_day=18,
years_since_quit=4,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[ChronicCondition.COPD],
previous_cancers=[],
),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.FATHER,
side=FamilySide.PATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=70, # Late onset
)
],
),
"expected": 6.19,
},
{
"name": "minimal_smoking_no_risk",
"input": UserInput(
demographics=Demographics(
age_years=52,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=160.0, weight_kg=62.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
years_smoked=5,
cigarettes_per_day=8,
years_since_quit=10,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[],
),
"expected": 0.63,
},
{
"name": "heavy_smoker_female",
"input": UserInput(
demographics=Demographics(
age_years=58,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=168.0, weight_kg=72.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
years_smoked=38,
cigarettes_per_day=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[],
),
"expected": 3.20,
},
]
class TestLLPiModel:
"""Test suite for LLPiRiskModel."""
def setup_method(self):
"""Initialize LLPiRiskModel instance for testing."""
self.model = LLPiRiskModel()
@pytest.mark.parametrize("case", GROUND_TRUTH_CASES, ids=lambda x: x["name"])
def test_ground_truth_validation(self, case):
"""Test against calculated ground truth results from R implementation.
Args:
case: Parameterized ground truth case dict.
"""
user = case["input"]
score_str = self.model.compute_score(user)
calculated = float(score_str.rstrip("%"))
expected = case["expected"]
# Using tight tolerance since these are calculated values
assert calculated == pytest.approx(expected, abs=0.01)
def test_never_smoker_handling(self):
"""Test that never smokers receive N/A response."""
never_smoker = UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.NEVER,
cigarettes_per_day=0,
years_smoked=0,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
score = self.model.compute_score(never_smoker)
assert score == "N/A: Model is for current or former smokers only."
def test_no_smoking_history(self):
"""Test handling when years_smoked is 0."""
user = UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=10,
years_smoked=0,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
score = self.model.compute_score(user)
assert "Model requires smoking history" in score
def test_age_validation(self):
"""Test age validation (40-85 range)."""
young_user = UserInput(
demographics=Demographics(
age_years=35,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=15,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
with pytest.raises(ValueError, match=r"Invalid inputs for LLPi:"):
self.model.compute_score(young_user)
old_user = UserInput(
demographics=Demographics(
age_years=90,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.FORMER,
cigarettes_per_day=20,
years_smoked=30,
years_since_quit=10,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
with pytest.raises(ValueError, match=r"Invalid inputs for LLPi:"):
self.model.compute_score(old_user)
def test_copd_detection(self):
"""Test COPD detection from chronic conditions."""
user = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[ChronicCondition.COPD, ChronicCondition.DIABETES],
),
family_history=[],
)
score = self.model.compute_score(user)
assert "%" in score
# Risk should be higher with COPD
risk_with_copd = float(score.rstrip("%"))
# Compare to same user without COPD
user_no_copd = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[ChronicCondition.DIABETES],
),
family_history=[],
)
score_no_copd = self.model.compute_score(user_no_copd)
risk_no_copd = float(score_no_copd.rstrip("%"))
assert risk_with_copd > risk_no_copd
def test_prior_cancer_detection(self):
"""Test prior cancer history detection."""
user = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[CancerType.COLORECTAL, CancerType.PROSTATE],
),
family_history=[],
)
score = self.model.compute_score(user)
assert "%" in score
# Risk should be higher with prior cancer
risk_with_cancer = float(score.rstrip("%"))
# Compare to same user without prior cancer
user_no_cancer = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[],
previous_cancers=[],
),
family_history=[],
)
score_no_cancer = self.model.compute_score(user_no_cancer)
risk_no_cancer = float(score_no_cancer.rstrip("%"))
assert risk_with_cancer > risk_no_cancer
def test_family_history_early_vs_late(self):
"""Test that early onset family history has higher coefficient than late onset."""
base_user_data = {
"demographics": Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
"lifestyle": Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
"personal_medical_history": PersonalMedicalHistory(),
}
# Early onset family history
user_early = UserInput(
**base_user_data,
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
side=FamilySide.MATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=55, # Early onset
)
],
)
# Late onset family history
user_late = UserInput(
**base_user_data,
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.FATHER,
side=FamilySide.PATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=70, # Late onset
)
],
)
# No family history
user_none = UserInput(**base_user_data, family_history=[])
risk_early = float(self.model.compute_score(user_early).rstrip("%"))
risk_late = float(self.model.compute_score(user_late).rstrip("%"))
risk_none = float(self.model.compute_score(user_none).rstrip("%"))
# Early onset should confer higher risk than late onset
assert risk_early > risk_late > risk_none
def test_family_history_non_lung_cancer_ignored(self):
"""Test that non-lung cancer family history is ignored."""
user_lung = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
side=FamilySide.MATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.LUNG,
age_at_diagnosis=65,
)
],
)
user_breast = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[
FamilyMemberCancer(
relation=FamilyRelation.MOTHER,
side=FamilySide.MATERNAL,
degree=RelationshipDegree.FIRST,
cancer_type=CancerType.BREAST,
age_at_diagnosis=65,
)
],
)
user_none = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
risk_lung = float(self.model.compute_score(user_lung).rstrip("%"))
risk_breast = float(self.model.compute_score(user_breast).rstrip("%"))
risk_none = float(self.model.compute_score(user_none).rstrip("%"))
# Lung cancer family history should increase risk
assert risk_lung > risk_none
# Breast cancer family history should not affect risk
assert risk_breast == pytest.approx(risk_none, abs=0.01)
def test_sex_difference(self):
"""Test that males have higher risk than females (all else equal)."""
user_male = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
user_female = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
risk_male = float(self.model.compute_score(user_male).rstrip("%"))
risk_female = float(self.model.compute_score(user_female).rstrip("%"))
# Males should have higher risk (positive coefficient for male)
assert risk_male > risk_female
def test_model_metadata(self):
"""Test model metadata methods."""
assert self.model.name == "llpi"
assert self.model.cancer_type() == "lung"
assert "LLPi" in self.model.description()
assert "8.7-year" in self.model.description()
assert "percentage chance" in self.model.interpretation()
assert isinstance(self.model.references(), list)
assert len(self.model.references()) > 0
assert "Marcus" in self.model.references()[0]
def test_calculate_risk_directly(self):
"""Test the calculate_risk method directly with known inputs."""
# Test case: moderate_risk_male from ground truth
risk = self.model.calculate_risk(
age=60,
male=1,
smkyears=25,
copd=0,
prior_cancer=0,
fam_cancer_onset=0,
)
# Should match ground truth: 2.91%
assert risk * 100 == pytest.approx(2.91, abs=0.01)
def test_increasing_age_increases_risk(self):
"""Test that increasing age increases risk (positive age coefficient)."""
risks = []
for age in [50, 60, 70]:
user = UserInput(
demographics=Demographics(
age_years=age,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=25,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
risk = float(self.model.compute_score(user).rstrip("%"))
risks.append(risk)
# Risk should increase with age
assert risks[0] < risks[1] < risks[2]
def test_increasing_smoking_years_increases_risk(self):
"""Test that longer smoking history increases risk."""
risks = []
for years in [10, 25, 40]:
user = UserInput(
demographics=Demographics(
age_years=60,
sex=Sex.MALE,
anthropometrics=Anthropometrics(height_cm=175.0, weight_kg=80.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(
status=SmokingStatus.CURRENT,
cigarettes_per_day=20,
years_smoked=years,
years_since_quit=None,
)
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
risk = float(self.model.compute_score(user).rstrip("%"))
risks.append(risk)
# Risk should increase with more smoking years
assert risks[0] < risks[1] < risks[2]