"""Tests for the QCancer multi-site cancer risk model.""" import csv from pathlib import Path import pytest from sentinel.risk_models import QCancerRiskModel from sentinel.risk_models.qcancer import ( compute_female_probabilities, compute_male_probabilities, ) from sentinel.user_input import ( AlcoholConsumption, Anthropometrics, Demographics, Lifestyle, PersonalMedicalHistory, Sex, SmokingHistory, SmokingStatus, UserInput, ) FIXTURE_PATH = Path("tests/fixtures/qcancer_reference.tsv") FEMALE_INPUT_PATH = Path("tests/fixtures/qcancer_inputs_female.tsv") MALE_INPUT_PATH = Path("tests/fixtures/qcancer_inputs_male.tsv") def _load_reference_cases() -> list[dict[str, str]]: with FIXTURE_PATH.open("r", encoding="utf-8") as handle: return list(csv.DictReader(handle, delimiter="\t")) def _parse_probability_columns(row: dict[str, str]) -> dict[str, float]: result = {} for key in row: if key in {"case_id", "sex"}: continue # Keep keys as-is (including "none" from C binary output) result[key] = float(row[key]) return result REFERENCE_CASES = _load_reference_cases() class TestQCancerModel: """Test suite for QCancer risk model.""" def setup_method(self) -> None: """Set up test fixtures.""" self.model = QCancerRiskModel() @pytest.mark.parametrize("case", REFERENCE_CASES, ids=lambda c: c["case_id"]) def test_reference_regression(self, case: dict[str, str]) -> None: """Test exact implementation against C binary output using TSV inputs. Args: case: Test case dictionary containing case_id, sex, and expected probabilities. """ expected = _parse_probability_columns(case) case_id = case["case_id"] sex = case["sex"] # Load the corresponding TSV input if sex == "female": with FEMALE_INPUT_PATH.open("r", encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t") inputs = {row["case_id"]: row for row in reader} if case_id not in inputs: pytest.skip(f"No input TSV for {case_id}") inp = inputs[case_id] # Call exact function with TSV parameters result = compute_female_probabilities( age=int(inp["age"]), alcohol_cat4=int(inp["alcohol_cat4"]), b_chronicpan=int(inp["b_chronicpan"]), b_copd=int(inp["b_copd"]), b_endometrial=int(inp["b_endometrial"]), b_type2=int(inp["b_type2"]), bmi=float(inp["bmi"]), c_hb=int(inp["c_hb"]), fh_breastcancer=int(inp["fh_breastcancer"]), fh_gicancer=int(inp["fh_gicancer"]), fh_ovariancancer=int(inp["fh_ovariancancer"]), new_abdodist=int(inp["new_abdodist"]), new_abdopain=int(inp["new_abdopain"]), new_appetiteloss=int(inp["new_appetiteloss"]), new_breastlump=int(inp["new_breastlump"]), new_breastpain=int(inp["new_breastpain"]), new_breastskin=int(inp["new_breastskin"]), new_dysphagia=int(inp["new_dysphagia"]), new_gibleed=int(inp["new_gibleed"]), new_haematuria=int(inp["new_haematuria"]), new_haemoptysis=int(inp["new_haemoptysis"]), new_heartburn=int(inp["new_heartburn"]), new_imb=int(inp["new_imb"]), new_indigestion=int(inp["new_indigestion"]), new_necklump=int(inp["new_necklump"]), new_nightsweats=int(inp["new_nightsweats"]), new_pmb=int(inp["new_pmb"]), new_postcoital=int(inp["new_postcoital"]), new_rectalbleed=int(inp["new_rectalbleed"]), new_vte=int(inp["new_vte"]), new_weightloss=int(inp["new_weightloss"]), s1_bowelchange=int(inp["s1_bowelchange"]), s1_bruising=int(inp["s1_bruising"]), s1_constipation=int(inp["s1_constipation"]), s1_cough=int(inp["s1_cough"]), smoke_cat=int(inp["smoke_cat"]), town=float(inp["town"]), ) else: # male with MALE_INPUT_PATH.open("r", encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t") inputs = {row["case_id"]: row for row in reader} if case_id not in inputs: pytest.skip(f"No input TSV for {case_id}") inp = inputs[case_id] # Call exact function with TSV parameters result = compute_male_probabilities( age=int(inp["age"]), alcohol_cat4=int(inp["alcohol_cat4"]), b_chronicpan=int(inp["b_chronicpan"]), b_copd=int(inp["b_copd"]), b_type2=int(inp["b_type2"]), bmi=float(inp["bmi"]), c_hb=int(inp["c_hb"]), fh_gicancer=int(inp["fh_gicancer"]), fh_prostatecancer=int(inp["fh_prostatecancer"]), new_abdodist=int(inp["new_abdodist"]), new_abdopain=int(inp["new_abdopain"]), new_appetiteloss=int(inp["new_appetiteloss"]), new_dysphagia=int(inp["new_dysphagia"]), new_gibleed=int(inp["new_gibleed"]), new_haematuria=int(inp["new_haematuria"]), new_haemoptysis=int(inp["new_haemoptysis"]), new_heartburn=int(inp["new_heartburn"]), new_indigestion=int(inp["new_indigestion"]), new_necklump=int(inp["new_necklump"]), new_nightsweats=int(inp["new_nightsweats"]), new_rectalbleed=int(inp["new_rectalbleed"]), new_testespain=int(inp["new_testespain"]), new_testicularlump=int(inp["new_testicularlump"]), new_vte=int(inp["new_vte"]), new_weightloss=int(inp["new_weightloss"]), s1_bowelchange=int(inp["s1_bowelchange"]), s1_constipation=int(inp["s1_constipation"]), s1_cough=int(inp["s1_cough"]), s1_impotence=int(inp["s1_impotence"]), s1_nocturia=int(inp["s1_nocturia"]), s1_urinaryfreq=int(inp["s1_urinaryfreq"]), s1_urinaryretention=int(inp["s1_urinaryretention"]), smoke_cat=int(inp["smoke_cat"]), town=float(inp["town"]), ) # Compare results for cancer_site, expected_pct in expected.items(): observed = result.get(cancer_site, 0.0) assert observed == pytest.approx(expected_pct, abs=0.01) def test_metadata(self) -> None: """Test that model returns correct metadata.""" assert self.model.name == "qcancer" assert self.model.cancer_type() == "multiple" assert "QCancer" in self.model.description() def test_compute_score_with_user_input(self) -> None: """Test that QCancerRiskModel.compute_score works with UserInput.""" user = UserInput( demographics=Demographics( age_years=55, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), alcohol_consumption=AlcoholConsumption.LIGHT, ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) result = self.model.compute_score(user) assert "No Cancer:" in result assert "%" in result def test_qcancer_with_anaemia_and_endometrial_polyps(self) -> None: """Test QCancer processes anaemia and endometrial polyps correctly.""" from sentinel.user_input import ChronicCondition user = UserInput( demographics=Demographics( age_years=55, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), alcohol_consumption=AlcoholConsumption.LIGHT, ), personal_medical_history=PersonalMedicalHistory( chronic_conditions=[ ChronicCondition.ANAEMIA, ChronicCondition.ENDOMETRIAL_POLYPS, ] ), family_history=[], ) # Should not raise an error and should include these conditions in calculation result = self.model.compute_score(user) assert "No Cancer:" in result assert "%" in result # Should have multiple cancer types listed assert result.count("%") >= 10 def test_validate_inputs_valid_user(self) -> None: """Test that valid user input passes validation.""" user = UserInput( demographics=Demographics( age_years=55, sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) is_valid, errors = self.model.validate_inputs(user) assert is_valid assert len(errors) == 0 def test_validate_inputs_age_out_of_range(self) -> None: """Test that age outside QCancer range is caught.""" user = UserInput( demographics=Demographics( age_years=20, # Too young for QCancer (requires 25-99) sex=Sex.FEMALE, anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), ), lifestyle=Lifestyle( smoking=SmokingHistory(status=SmokingStatus.NEVER), ), personal_medical_history=PersonalMedicalHistory(), family_history=[], ) is_valid, errors = self.model.validate_inputs(user) assert not is_valid assert any("age_years" in err and "25" in err for err in errors)