Spaces:
Runtime error
Runtime error
| """Tests for the QCancer multi-site cancer risk model.""" | |
| import csv | |
| from pathlib import Path | |
| import pytest | |
| from sentinel.risk_models import QCancerRiskModel | |
| from sentinel.risk_models.qcancer import ( | |
| compute_female_probabilities, | |
| compute_male_probabilities, | |
| ) | |
| from sentinel.user_input import ( | |
| AlcoholConsumption, | |
| Anthropometrics, | |
| Demographics, | |
| Lifestyle, | |
| PersonalMedicalHistory, | |
| Sex, | |
| SmokingHistory, | |
| SmokingStatus, | |
| UserInput, | |
| ) | |
| FIXTURE_PATH = Path("tests/fixtures/qcancer_reference.tsv") | |
| FEMALE_INPUT_PATH = Path("tests/fixtures/qcancer_inputs_female.tsv") | |
| MALE_INPUT_PATH = Path("tests/fixtures/qcancer_inputs_male.tsv") | |
| def _load_reference_cases() -> list[dict[str, str]]: | |
| with FIXTURE_PATH.open("r", encoding="utf-8") as handle: | |
| return list(csv.DictReader(handle, delimiter="\t")) | |
| def _parse_probability_columns(row: dict[str, str]) -> dict[str, float]: | |
| result = {} | |
| for key in row: | |
| if key in {"case_id", "sex"}: | |
| continue | |
| # Keep keys as-is (including "none" from C binary output) | |
| result[key] = float(row[key]) | |
| return result | |
| REFERENCE_CASES = _load_reference_cases() | |
| class TestQCancerModel: | |
| """Test suite for QCancer risk model.""" | |
| def setup_method(self) -> None: | |
| """Set up test fixtures.""" | |
| self.model = QCancerRiskModel() | |
| def test_reference_regression(self, case: dict[str, str]) -> None: | |
| """Test exact implementation against C binary output using TSV inputs. | |
| Args: | |
| case: Test case dictionary containing case_id, sex, and expected probabilities. | |
| """ | |
| expected = _parse_probability_columns(case) | |
| case_id = case["case_id"] | |
| sex = case["sex"] | |
| # Load the corresponding TSV input | |
| if sex == "female": | |
| with FEMALE_INPUT_PATH.open("r", encoding="utf-8") as f: | |
| reader = csv.DictReader(f, delimiter="\t") | |
| inputs = {row["case_id"]: row for row in reader} | |
| if case_id not in inputs: | |
| pytest.skip(f"No input TSV for {case_id}") | |
| inp = inputs[case_id] | |
| # Call exact function with TSV parameters | |
| result = compute_female_probabilities( | |
| age=int(inp["age"]), | |
| alcohol_cat4=int(inp["alcohol_cat4"]), | |
| b_chronicpan=int(inp["b_chronicpan"]), | |
| b_copd=int(inp["b_copd"]), | |
| b_endometrial=int(inp["b_endometrial"]), | |
| b_type2=int(inp["b_type2"]), | |
| bmi=float(inp["bmi"]), | |
| c_hb=int(inp["c_hb"]), | |
| fh_breastcancer=int(inp["fh_breastcancer"]), | |
| fh_gicancer=int(inp["fh_gicancer"]), | |
| fh_ovariancancer=int(inp["fh_ovariancancer"]), | |
| new_abdodist=int(inp["new_abdodist"]), | |
| new_abdopain=int(inp["new_abdopain"]), | |
| new_appetiteloss=int(inp["new_appetiteloss"]), | |
| new_breastlump=int(inp["new_breastlump"]), | |
| new_breastpain=int(inp["new_breastpain"]), | |
| new_breastskin=int(inp["new_breastskin"]), | |
| new_dysphagia=int(inp["new_dysphagia"]), | |
| new_gibleed=int(inp["new_gibleed"]), | |
| new_haematuria=int(inp["new_haematuria"]), | |
| new_haemoptysis=int(inp["new_haemoptysis"]), | |
| new_heartburn=int(inp["new_heartburn"]), | |
| new_imb=int(inp["new_imb"]), | |
| new_indigestion=int(inp["new_indigestion"]), | |
| new_necklump=int(inp["new_necklump"]), | |
| new_nightsweats=int(inp["new_nightsweats"]), | |
| new_pmb=int(inp["new_pmb"]), | |
| new_postcoital=int(inp["new_postcoital"]), | |
| new_rectalbleed=int(inp["new_rectalbleed"]), | |
| new_vte=int(inp["new_vte"]), | |
| new_weightloss=int(inp["new_weightloss"]), | |
| s1_bowelchange=int(inp["s1_bowelchange"]), | |
| s1_bruising=int(inp["s1_bruising"]), | |
| s1_constipation=int(inp["s1_constipation"]), | |
| s1_cough=int(inp["s1_cough"]), | |
| smoke_cat=int(inp["smoke_cat"]), | |
| town=float(inp["town"]), | |
| ) | |
| else: # male | |
| with MALE_INPUT_PATH.open("r", encoding="utf-8") as f: | |
| reader = csv.DictReader(f, delimiter="\t") | |
| inputs = {row["case_id"]: row for row in reader} | |
| if case_id not in inputs: | |
| pytest.skip(f"No input TSV for {case_id}") | |
| inp = inputs[case_id] | |
| # Call exact function with TSV parameters | |
| result = compute_male_probabilities( | |
| age=int(inp["age"]), | |
| alcohol_cat4=int(inp["alcohol_cat4"]), | |
| b_chronicpan=int(inp["b_chronicpan"]), | |
| b_copd=int(inp["b_copd"]), | |
| b_type2=int(inp["b_type2"]), | |
| bmi=float(inp["bmi"]), | |
| c_hb=int(inp["c_hb"]), | |
| fh_gicancer=int(inp["fh_gicancer"]), | |
| fh_prostatecancer=int(inp["fh_prostatecancer"]), | |
| new_abdodist=int(inp["new_abdodist"]), | |
| new_abdopain=int(inp["new_abdopain"]), | |
| new_appetiteloss=int(inp["new_appetiteloss"]), | |
| new_dysphagia=int(inp["new_dysphagia"]), | |
| new_gibleed=int(inp["new_gibleed"]), | |
| new_haematuria=int(inp["new_haematuria"]), | |
| new_haemoptysis=int(inp["new_haemoptysis"]), | |
| new_heartburn=int(inp["new_heartburn"]), | |
| new_indigestion=int(inp["new_indigestion"]), | |
| new_necklump=int(inp["new_necklump"]), | |
| new_nightsweats=int(inp["new_nightsweats"]), | |
| new_rectalbleed=int(inp["new_rectalbleed"]), | |
| new_testespain=int(inp["new_testespain"]), | |
| new_testicularlump=int(inp["new_testicularlump"]), | |
| new_vte=int(inp["new_vte"]), | |
| new_weightloss=int(inp["new_weightloss"]), | |
| s1_bowelchange=int(inp["s1_bowelchange"]), | |
| s1_constipation=int(inp["s1_constipation"]), | |
| s1_cough=int(inp["s1_cough"]), | |
| s1_impotence=int(inp["s1_impotence"]), | |
| s1_nocturia=int(inp["s1_nocturia"]), | |
| s1_urinaryfreq=int(inp["s1_urinaryfreq"]), | |
| s1_urinaryretention=int(inp["s1_urinaryretention"]), | |
| smoke_cat=int(inp["smoke_cat"]), | |
| town=float(inp["town"]), | |
| ) | |
| # Compare results | |
| for cancer_site, expected_pct in expected.items(): | |
| observed = result.get(cancer_site, 0.0) | |
| assert observed == pytest.approx(expected_pct, abs=0.01) | |
| def test_metadata(self) -> None: | |
| """Test that model returns correct metadata.""" | |
| assert self.model.name == "qcancer" | |
| assert self.model.cancer_type() == "multiple" | |
| assert "QCancer" in self.model.description() | |
| def test_compute_score_with_user_input(self) -> None: | |
| """Test that QCancerRiskModel.compute_score works with UserInput.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=55, | |
| sex=Sex.FEMALE, | |
| anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory(status=SmokingStatus.NEVER), | |
| alcohol_consumption=AlcoholConsumption.LIGHT, | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| result = self.model.compute_score(user) | |
| assert "No Cancer:" in result | |
| assert "%" in result | |
| def test_qcancer_with_anaemia_and_endometrial_polyps(self) -> None: | |
| """Test QCancer processes anaemia and endometrial polyps correctly.""" | |
| from sentinel.user_input import ChronicCondition | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=55, | |
| sex=Sex.FEMALE, | |
| anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory(status=SmokingStatus.NEVER), | |
| alcohol_consumption=AlcoholConsumption.LIGHT, | |
| ), | |
| personal_medical_history=PersonalMedicalHistory( | |
| chronic_conditions=[ | |
| ChronicCondition.ANAEMIA, | |
| ChronicCondition.ENDOMETRIAL_POLYPS, | |
| ] | |
| ), | |
| family_history=[], | |
| ) | |
| # Should not raise an error and should include these conditions in calculation | |
| result = self.model.compute_score(user) | |
| assert "No Cancer:" in result | |
| assert "%" in result | |
| # Should have multiple cancer types listed | |
| assert result.count("%") >= 10 | |
| def test_validate_inputs_valid_user(self) -> None: | |
| """Test that valid user input passes validation.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=55, | |
| sex=Sex.FEMALE, | |
| anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory(status=SmokingStatus.NEVER), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| is_valid, errors = self.model.validate_inputs(user) | |
| assert is_valid | |
| assert len(errors) == 0 | |
| def test_validate_inputs_age_out_of_range(self) -> None: | |
| """Test that age outside QCancer range is caught.""" | |
| user = UserInput( | |
| demographics=Demographics( | |
| age_years=20, # Too young for QCancer (requires 25-99) | |
| sex=Sex.FEMALE, | |
| anthropometrics=Anthropometrics(height_cm=165, weight_kg=70.0), | |
| ), | |
| lifestyle=Lifestyle( | |
| smoking=SmokingHistory(status=SmokingStatus.NEVER), | |
| ), | |
| personal_medical_history=PersonalMedicalHistory(), | |
| family_history=[], | |
| ) | |
| is_valid, errors = self.model.validate_inputs(user) | |
| assert not is_valid | |
| assert any("age_years" in err and "25" in err for err in errors) | |