sentinel / tests /test_risk_models /test_qcancer_model.py
jeuko's picture
Sync from GitHub (main)
8018595 verified
"""Tests for the QCancer multi-site cancer risk model."""
import csv
from pathlib import Path
import pytest
from sentinel.risk_models import QCancerRiskModel
from sentinel.risk_models.qcancer import (
compute_female_probabilities,
compute_male_probabilities,
)
from sentinel.user_input import (
AlcoholConsumption,
Anthropometrics,
Demographics,
Lifestyle,
PersonalMedicalHistory,
Sex,
SmokingHistory,
SmokingStatus,
UserInput,
)
FIXTURE_PATH = Path("tests/fixtures/qcancer_reference.tsv")
FEMALE_INPUT_PATH = Path("tests/fixtures/qcancer_inputs_female.tsv")
MALE_INPUT_PATH = Path("tests/fixtures/qcancer_inputs_male.tsv")
def _load_reference_cases() -> list[dict[str, str]]:
with FIXTURE_PATH.open("r", encoding="utf-8") as handle:
return list(csv.DictReader(handle, delimiter="\t"))
def _parse_probability_columns(row: dict[str, str]) -> dict[str, float]:
result = {}
for key in row:
if key in {"case_id", "sex"}:
continue
# Keep keys as-is (including "none" from C binary output)
result[key] = float(row[key])
return result
REFERENCE_CASES = _load_reference_cases()
class TestQCancerModel:
"""Test suite for QCancer risk model."""
def setup_method(self) -> None:
"""Set up test fixtures."""
self.model = QCancerRiskModel()
@pytest.mark.parametrize("case", REFERENCE_CASES, ids=lambda c: c["case_id"])
def test_reference_regression(self, case: dict[str, str]) -> None:
"""Test exact implementation against C binary output using TSV inputs.
Args:
case: Test case dictionary containing case_id, sex, and expected probabilities.
"""
expected = _parse_probability_columns(case)
case_id = case["case_id"]
sex = case["sex"]
# Load the corresponding TSV input
if sex == "female":
with FEMALE_INPUT_PATH.open("r", encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter="\t")
inputs = {row["case_id"]: row for row in reader}
if case_id not in inputs:
pytest.skip(f"No input TSV for {case_id}")
inp = inputs[case_id]
# Call exact function with TSV parameters
result = compute_female_probabilities(
age=int(inp["age"]),
alcohol_cat4=int(inp["alcohol_cat4"]),
b_chronicpan=int(inp["b_chronicpan"]),
b_copd=int(inp["b_copd"]),
b_endometrial=int(inp["b_endometrial"]),
b_type2=int(inp["b_type2"]),
bmi=float(inp["bmi"]),
c_hb=int(inp["c_hb"]),
fh_breastcancer=int(inp["fh_breastcancer"]),
fh_gicancer=int(inp["fh_gicancer"]),
fh_ovariancancer=int(inp["fh_ovariancancer"]),
new_abdodist=int(inp["new_abdodist"]),
new_abdopain=int(inp["new_abdopain"]),
new_appetiteloss=int(inp["new_appetiteloss"]),
new_breastlump=int(inp["new_breastlump"]),
new_breastpain=int(inp["new_breastpain"]),
new_breastskin=int(inp["new_breastskin"]),
new_dysphagia=int(inp["new_dysphagia"]),
new_gibleed=int(inp["new_gibleed"]),
new_haematuria=int(inp["new_haematuria"]),
new_haemoptysis=int(inp["new_haemoptysis"]),
new_heartburn=int(inp["new_heartburn"]),
new_imb=int(inp["new_imb"]),
new_indigestion=int(inp["new_indigestion"]),
new_necklump=int(inp["new_necklump"]),
new_nightsweats=int(inp["new_nightsweats"]),
new_pmb=int(inp["new_pmb"]),
new_postcoital=int(inp["new_postcoital"]),
new_rectalbleed=int(inp["new_rectalbleed"]),
new_vte=int(inp["new_vte"]),
new_weightloss=int(inp["new_weightloss"]),
s1_bowelchange=int(inp["s1_bowelchange"]),
s1_bruising=int(inp["s1_bruising"]),
s1_constipation=int(inp["s1_constipation"]),
s1_cough=int(inp["s1_cough"]),
smoke_cat=int(inp["smoke_cat"]),
town=float(inp["town"]),
)
else: # male
with MALE_INPUT_PATH.open("r", encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter="\t")
inputs = {row["case_id"]: row for row in reader}
if case_id not in inputs:
pytest.skip(f"No input TSV for {case_id}")
inp = inputs[case_id]
# Call exact function with TSV parameters
result = compute_male_probabilities(
age=int(inp["age"]),
alcohol_cat4=int(inp["alcohol_cat4"]),
b_chronicpan=int(inp["b_chronicpan"]),
b_copd=int(inp["b_copd"]),
b_type2=int(inp["b_type2"]),
bmi=float(inp["bmi"]),
c_hb=int(inp["c_hb"]),
fh_gicancer=int(inp["fh_gicancer"]),
fh_prostatecancer=int(inp["fh_prostatecancer"]),
new_abdodist=int(inp["new_abdodist"]),
new_abdopain=int(inp["new_abdopain"]),
new_appetiteloss=int(inp["new_appetiteloss"]),
new_dysphagia=int(inp["new_dysphagia"]),
new_gibleed=int(inp["new_gibleed"]),
new_haematuria=int(inp["new_haematuria"]),
new_haemoptysis=int(inp["new_haemoptysis"]),
new_heartburn=int(inp["new_heartburn"]),
new_indigestion=int(inp["new_indigestion"]),
new_necklump=int(inp["new_necklump"]),
new_nightsweats=int(inp["new_nightsweats"]),
new_rectalbleed=int(inp["new_rectalbleed"]),
new_testespain=int(inp["new_testespain"]),
new_testicularlump=int(inp["new_testicularlump"]),
new_vte=int(inp["new_vte"]),
new_weightloss=int(inp["new_weightloss"]),
s1_bowelchange=int(inp["s1_bowelchange"]),
s1_constipation=int(inp["s1_constipation"]),
s1_cough=int(inp["s1_cough"]),
s1_impotence=int(inp["s1_impotence"]),
s1_nocturia=int(inp["s1_nocturia"]),
s1_urinaryfreq=int(inp["s1_urinaryfreq"]),
s1_urinaryretention=int(inp["s1_urinaryretention"]),
smoke_cat=int(inp["smoke_cat"]),
town=float(inp["town"]),
)
# Compare results
for cancer_site, expected_pct in expected.items():
observed = result.get(cancer_site, 0.0)
assert observed == pytest.approx(expected_pct, abs=0.01)
def test_metadata(self) -> None:
"""Test that model returns correct metadata."""
assert self.model.name == "qcancer"
assert self.model.cancer_type() == "multiple"
assert "QCancer" in self.model.description()
def test_compute_score_with_user_input(self) -> None:
"""Test that QCancerRiskModel.compute_score works with UserInput."""
user = UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
alcohol_consumption=AlcoholConsumption.LIGHT,
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
result = self.model.compute_score(user)
assert "No Cancer:" in result
assert "%" in result
def test_qcancer_with_anaemia_and_endometrial_polyps(self) -> None:
"""Test QCancer processes anaemia and endometrial polyps correctly."""
from sentinel.user_input import ChronicCondition
user = UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
alcohol_consumption=AlcoholConsumption.LIGHT,
),
personal_medical_history=PersonalMedicalHistory(
chronic_conditions=[
ChronicCondition.ANAEMIA,
ChronicCondition.ENDOMETRIAL_POLYPS,
]
),
family_history=[],
)
# Should not raise an error and should include these conditions in calculation
result = self.model.compute_score(user)
assert "No Cancer:" in result
assert "%" in result
# Should have multiple cancer types listed
assert result.count("%") >= 10
def test_validate_inputs_valid_user(self) -> None:
"""Test that valid user input passes validation."""
user = UserInput(
demographics=Demographics(
age_years=55,
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
is_valid, errors = self.model.validate_inputs(user)
assert is_valid
assert len(errors) == 0
def test_validate_inputs_age_out_of_range(self) -> None:
"""Test that age outside QCancer range is caught."""
user = UserInput(
demographics=Demographics(
age_years=20, # Too young for QCancer (requires 25-99)
sex=Sex.FEMALE,
anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0),
),
lifestyle=Lifestyle(
smoking=SmokingHistory(status=SmokingStatus.NEVER),
),
personal_medical_history=PersonalMedicalHistory(),
family_history=[],
)
is_valid, errors = self.model.validate_inputs(user)
assert not is_valid
assert any("age_years" in err and "25" in err for err in errors)