Spaces:
Runtime error
Runtime error
Sync from GitHub (main)
Browse files- apps/api/main.py +2 -1
- apps/cli/main.py +28 -8
- apps/streamlit_ui/page_versions/profile/v2.py +5 -1
- apps/streamlit_ui/pages/1_Profile.py +5 -1
- apps/streamlit_ui/pages/3_Assessment.py +25 -1
- configs/output_format/assessment.yaml +10 -8
- examples/benchmark/benchmark_female.yaml +37 -25
- examples/benchmark/benchmark_male.yaml +41 -25
- prompts/instruction/assessment.md +12 -8
- scripts/generate_documentation.py +1 -1
- src/sentinel/api_clients/canrisk.py +1 -1
- src/sentinel/conversation.py +27 -5
- src/sentinel/models.py +18 -211
- src/sentinel/prompting.py +1 -0
- src/sentinel/reporting.py +181 -148
- src/sentinel/risk_aggregation.py +98 -0
- src/sentinel/risk_models/qcancer.py +102 -0
- src/sentinel/utils.py +1 -1
- tests/test_conversation.py +34 -12
- tests/test_demo.py +47 -39
- tests/test_integration_canrisk_api.py +76 -39
- tests/test_main.py +43 -18
- tests/test_risk_aggregation.py +118 -0
apps/api/main.py
CHANGED
|
@@ -6,7 +6,8 @@ from fastapi import FastAPI, HTTPException
|
|
| 6 |
|
| 7 |
from sentinel.config import AppConfig, ModelConfig, ResourcePaths
|
| 8 |
from sentinel.factory import SentinelFactory
|
| 9 |
-
from sentinel.models import InitialAssessment
|
|
|
|
| 10 |
|
| 11 |
app = FastAPI(
|
| 12 |
title="Cancer Risk Assessment Assistant",
|
|
|
|
| 6 |
|
| 7 |
from sentinel.config import AppConfig, ModelConfig, ResourcePaths
|
| 8 |
from sentinel.factory import SentinelFactory
|
| 9 |
+
from sentinel.models import InitialAssessment
|
| 10 |
+
from sentinel.user_input import UserInput
|
| 11 |
|
| 12 |
app = FastAPI(
|
| 13 |
title="Cancer Risk Assessment Assistant",
|
apps/cli/main.py
CHANGED
|
@@ -12,16 +12,18 @@ from sentinel.config import AppConfig, ModelConfig, ResourcePaths
|
|
| 12 |
from sentinel.factory import SentinelFactory
|
| 13 |
from sentinel.models import (
|
| 14 |
ConversationResponse,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
Demographics,
|
| 16 |
FamilyMemberCancer,
|
| 17 |
FemaleSpecific,
|
| 18 |
-
InitialAssessment,
|
| 19 |
Lifestyle,
|
| 20 |
PersonalMedicalHistory,
|
| 21 |
UserInput,
|
| 22 |
)
|
| 23 |
-
from sentinel.reporting import generate_excel_report, generate_pdf_report
|
| 24 |
-
from sentinel.risk_models import RISK_MODELS
|
| 25 |
from sentinel.utils import load_user_file
|
| 26 |
|
| 27 |
|
|
@@ -461,17 +463,35 @@ def main(cfg: DictConfig) -> None:
|
|
| 461 |
print(f"\n{Colors.OKCYAN}🔄 Running risk scoring tools...{Colors.ENDC}")
|
| 462 |
risks_scores = []
|
| 463 |
for model in RISK_MODELS:
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
-
user.risks_scores = risks_scores
|
| 468 |
for risk_score in risks_scores:
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
print(f"\n{Colors.OKGREEN}🔄 Analyzing your information...{Colors.ENDC}")
|
| 472 |
response = None
|
| 473 |
try:
|
| 474 |
-
response = conversation.initial_assessment(user)
|
| 475 |
format_risk_assessment(response, dev_mode)
|
| 476 |
except Exception as e:
|
| 477 |
print(f"{Colors.FAIL}❌ Error generating assessment: {e}{Colors.ENDC}")
|
|
|
|
| 12 |
from sentinel.factory import SentinelFactory
|
| 13 |
from sentinel.models import (
|
| 14 |
ConversationResponse,
|
| 15 |
+
InitialAssessment,
|
| 16 |
+
)
|
| 17 |
+
from sentinel.reporting import generate_excel_report, generate_pdf_report
|
| 18 |
+
from sentinel.risk_models import RISK_MODELS
|
| 19 |
+
from sentinel.user_input import (
|
| 20 |
Demographics,
|
| 21 |
FamilyMemberCancer,
|
| 22 |
FemaleSpecific,
|
|
|
|
| 23 |
Lifestyle,
|
| 24 |
PersonalMedicalHistory,
|
| 25 |
UserInput,
|
| 26 |
)
|
|
|
|
|
|
|
| 27 |
from sentinel.utils import load_user_file
|
| 28 |
|
| 29 |
|
|
|
|
| 463 |
print(f"\n{Colors.OKCYAN}🔄 Running risk scoring tools...{Colors.ENDC}")
|
| 464 |
risks_scores = []
|
| 465 |
for model in RISK_MODELS:
|
| 466 |
+
try:
|
| 467 |
+
risk_score = model().run(user)
|
| 468 |
+
# Handle models that return multiple scores (e.g., QCancer)
|
| 469 |
+
if isinstance(risk_score, list):
|
| 470 |
+
risks_scores.extend(risk_score)
|
| 471 |
+
else:
|
| 472 |
+
risks_scores.append(risk_score)
|
| 473 |
+
except ValueError as e:
|
| 474 |
+
# Skip models that aren't applicable or have validation errors
|
| 475 |
+
print(f"{Colors.WARNING}⚠️ Skipping {model().name}: {e!s}{Colors.ENDC}")
|
| 476 |
+
continue
|
| 477 |
|
|
|
|
| 478 |
for risk_score in risks_scores:
|
| 479 |
+
# Format output based on whether cancer type is specified
|
| 480 |
+
if risk_score.cancer_type and risk_score.cancer_type not in [
|
| 481 |
+
"multiple",
|
| 482 |
+
"Multiple Cancer Sites",
|
| 483 |
+
]:
|
| 484 |
+
display = (
|
| 485 |
+
f"{risk_score.name} ({risk_score.cancer_type}): {risk_score.score}"
|
| 486 |
+
)
|
| 487 |
+
else:
|
| 488 |
+
display = f"{risk_score.name}: {risk_score.score}"
|
| 489 |
+
print(f"{Colors.OKCYAN}🔄 {display}{Colors.ENDC}")
|
| 490 |
|
| 491 |
print(f"\n{Colors.OKGREEN}🔄 Analyzing your information...{Colors.ENDC}")
|
| 492 |
response = None
|
| 493 |
try:
|
| 494 |
+
response = conversation.initial_assessment(user, risk_scores=risks_scores)
|
| 495 |
format_risk_assessment(response, dev_mode)
|
| 496 |
except Exception as e:
|
| 497 |
print(f"{Colors.FAIL}❌ Error generating assessment: {e}{Colors.ENDC}")
|
apps/streamlit_ui/page_versions/profile/v2.py
CHANGED
|
@@ -232,7 +232,11 @@ def render():
|
|
| 232 |
risks_scores = []
|
| 233 |
for model in RISK_MODELS:
|
| 234 |
risk_score = model().run(updated_profile)
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
# Attach the scores to the object before saving
|
| 238 |
updated_profile.risks_scores = risks_scores
|
|
|
|
| 232 |
risks_scores = []
|
| 233 |
for model in RISK_MODELS:
|
| 234 |
risk_score = model().run(updated_profile)
|
| 235 |
+
# Handle models that return multiple scores (e.g., QCancer)
|
| 236 |
+
if isinstance(risk_score, list):
|
| 237 |
+
risks_scores.extend(risk_score)
|
| 238 |
+
else:
|
| 239 |
+
risks_scores.append(risk_score)
|
| 240 |
|
| 241 |
# Attach the scores to the object before saving
|
| 242 |
updated_profile.risks_scores = risks_scores
|
apps/streamlit_ui/pages/1_Profile.py
CHANGED
|
@@ -257,7 +257,11 @@ with st.expander("Create New Profile Manually"):
|
|
| 257 |
risks_scores = []
|
| 258 |
for model in RISK_MODELS:
|
| 259 |
risk_score = model().run(new_profile)
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
new_profile.risks_scores = risks_scores
|
| 263 |
|
|
|
|
| 257 |
risks_scores = []
|
| 258 |
for model in RISK_MODELS:
|
| 259 |
risk_score = model().run(new_profile)
|
| 260 |
+
# Handle models that return multiple scores (e.g., QCancer)
|
| 261 |
+
if isinstance(risk_score, list):
|
| 262 |
+
risks_scores.extend(risk_score)
|
| 263 |
+
else:
|
| 264 |
+
risks_scores.append(risk_score)
|
| 265 |
|
| 266 |
new_profile.risks_scores = risks_scores
|
| 267 |
|
apps/streamlit_ui/pages/3_Assessment.py
CHANGED
|
@@ -198,7 +198,31 @@ if assessment:
|
|
| 198 |
with st.expander("Overall Summary"):
|
| 199 |
st.markdown(assessment.overall_summary, unsafe_allow_html=True)
|
| 200 |
|
| 201 |
-
with st.expander("Risk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
for ra in sorted_risk_assessments:
|
| 203 |
st.markdown(f"**{ra.cancer_type}** - {ra.risk_level or 'N/A'}/5")
|
| 204 |
st.write(ra.explanation)
|
|
|
|
| 198 |
with st.expander("Overall Summary"):
|
| 199 |
st.markdown(assessment.overall_summary, unsafe_allow_html=True)
|
| 200 |
|
| 201 |
+
with st.expander("Calculated Risk Scores (Ground Truth)"):
|
| 202 |
+
if assessment.calculated_risk_scores:
|
| 203 |
+
st.info(
|
| 204 |
+
"These scores have been calculated using validated clinical risk models "
|
| 205 |
+
"and represent the authoritative risk assessment."
|
| 206 |
+
)
|
| 207 |
+
for cancer_type, scores in sorted(
|
| 208 |
+
assessment.calculated_risk_scores.items()
|
| 209 |
+
):
|
| 210 |
+
st.markdown(f"### {cancer_type}")
|
| 211 |
+
for score in scores:
|
| 212 |
+
st.markdown(f"**{score.name}**: {score.score}")
|
| 213 |
+
if score.description:
|
| 214 |
+
st.write(f"*{score.description}*")
|
| 215 |
+
if score.interpretation:
|
| 216 |
+
st.write(score.interpretation)
|
| 217 |
+
if score.references:
|
| 218 |
+
with st.expander("References"):
|
| 219 |
+
for ref in score.references:
|
| 220 |
+
st.write(f"- {ref}")
|
| 221 |
+
st.divider()
|
| 222 |
+
else:
|
| 223 |
+
st.write("No risk scores calculated.")
|
| 224 |
+
|
| 225 |
+
with st.expander("AI-Generated Risk Interpretations"):
|
| 226 |
for ra in sorted_risk_assessments:
|
| 227 |
st.markdown(f"**{ra.cancer_type}** - {ra.risk_level or 'N/A'}/5")
|
| 228 |
st.write(ra.explanation)
|
configs/output_format/assessment.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
format_instructions: |
|
| 2 |
CRITICAL:
|
| 3 |
- Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
|
| 4 |
-
-
|
| 5 |
- Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
|
| 6 |
- The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}. You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
|
| 7 |
- The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
|
|
@@ -13,15 +13,15 @@ format_instructions: |
|
|
| 13 |
"identified_risk_factors": [
|
| 14 |
{{
|
| 15 |
"description": "string - A human-readable description of the risk factor identified from the user's profile.",
|
| 16 |
-
"category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other). You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. "
|
| 17 |
}}
|
| 18 |
-
]
|
| 19 |
-
"
|
| 20 |
{{
|
| 21 |
-
"cancer_type": "string - Type of cancer",
|
| 22 |
-
"risk_level": "number
|
| 23 |
-
"explanation": "string -
|
| 24 |
-
"recommended_steps": ["string"] or null - Optional steps to mitigate risk
|
| 25 |
"contributing_factors": [
|
| 26 |
{{
|
| 27 |
"description": "string - A human-readable description of the risk factor",
|
|
@@ -47,6 +47,8 @@ format_instructions: |
|
|
| 47 |
|
| 48 |
IMPORTANT:
|
| 49 |
- The `reasoning` field is mandatory for your internal monologue. You must put any and all reasoning you were asked to do in here. This is your internal monologue, and should be as detailed as possible.
|
|
|
|
|
|
|
| 50 |
- Do not add disclaimers; they are handled separately.
|
| 51 |
- Use null for optional fields that don't apply.
|
| 52 |
- Return ONLY the JSON object, nothing else.
|
|
|
|
| 1 |
format_instructions: |
|
| 2 |
CRITICAL:
|
| 3 |
- Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
|
| 4 |
+
- The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
|
| 5 |
- Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
|
| 6 |
- The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}. You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
|
| 7 |
- The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
|
|
|
|
| 13 |
"identified_risk_factors": [
|
| 14 |
{{
|
| 15 |
"description": "string - A human-readable description of the risk factor identified from the user's profile.",
|
| 16 |
+
"category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other). You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. "
|
| 17 |
}}
|
| 18 |
+
],
|
| 19 |
+
"llm_risk_interpretations": [
|
| 20 |
{{
|
| 21 |
+
"cancer_type": "string - Type of cancer from RISK SCORES section",
|
| 22 |
+
"risk_level": "number or null - Optional qualitative score (1-5) that should align with the calculated risk scores. Use null if you cannot confidently map the score to a 1-5 scale.",
|
| 23 |
+
"explanation": "string - Explain what the calculated risk score means for the patient. Identify key contributing factors from their profile. Always reference the actual score from RISK SCORES section.",
|
| 24 |
+
"recommended_steps": ["string"] or null - Optional steps to mitigate risk, particularly for higher calculated scores.
|
| 25 |
"contributing_factors": [
|
| 26 |
{{
|
| 27 |
"description": "string - A human-readable description of the risk factor",
|
|
|
|
| 47 |
|
| 48 |
IMPORTANT:
|
| 49 |
- The `reasoning` field is mandatory for your internal monologue. You must put any and all reasoning you were asked to do in here. This is your internal monologue, and should be as detailed as possible.
|
| 50 |
+
- Do NOT include a `calculated_risk_scores` field in your response - this is populated programmatically from the RISK SCORES section.
|
| 51 |
+
- Focus your `llm_risk_interpretations` on explaining the CALCULATED scores, not generating new risk assessments.
|
| 52 |
- Do not add disclaimers; they are handled separately.
|
| 53 |
- Use null for optional fields that don't apply.
|
| 54 |
- Return ONLY the JSON object, nothing else.
|
examples/benchmark/benchmark_female.yaml
CHANGED
|
@@ -1,23 +1,39 @@
|
|
| 1 |
demographics:
|
| 2 |
-
|
| 3 |
sex: female
|
| 4 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
lifestyle:
|
| 7 |
smoking:
|
| 8 |
status: never
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
family_history:
|
| 13 |
-
-
|
| 14 |
-
cancer_type:
|
| 15 |
age_at_diagnosis: 48
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
age_at_diagnosis: 55
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
personal_medical_history:
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
female_specific:
|
| 23 |
menstrual:
|
|
@@ -25,21 +41,17 @@ female_specific:
|
|
| 25 |
parity:
|
| 26 |
num_live_births: 2
|
| 27 |
age_at_first_live_birth: 28
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
unit: "g/dL"
|
| 39 |
-
date: "2025-09-20"
|
| 40 |
-
|
| 41 |
-
clinical_observations:
|
| 42 |
-
- test_name: "Mammogram"
|
| 43 |
-
value: "BI-RADS 4"
|
| 44 |
-
unit: "category"
|
| 45 |
-
date: "2025-09-20"
|
|
|
|
| 1 |
demographics:
|
| 2 |
+
age_years: 52
|
| 3 |
sex: female
|
| 4 |
+
ethnicity: asian
|
| 5 |
+
education_level: 4
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 165
|
| 8 |
+
weight_kg: 65
|
| 9 |
|
| 10 |
lifestyle:
|
| 11 |
smoking:
|
| 12 |
status: never
|
| 13 |
+
cigarettes_per_day: 0
|
| 14 |
+
years_smoked: 0
|
| 15 |
+
pack_years: 0
|
| 16 |
+
alcohol_consumption: light
|
| 17 |
+
multivitamin_use: true
|
| 18 |
+
moderate_physical_activity_hours_per_day: 0.5
|
| 19 |
+
red_meat_consumption_oz_per_day: 2.0
|
| 20 |
|
| 21 |
family_history:
|
| 22 |
+
- relation: mother
|
| 23 |
+
cancer_type: breast_cancer
|
| 24 |
age_at_diagnosis: 48
|
| 25 |
+
degree: "1"
|
| 26 |
+
side: unknown
|
| 27 |
+
- relation: maternal_aunt
|
| 28 |
+
cancer_type: ovarian_cancer
|
| 29 |
age_at_diagnosis: 55
|
| 30 |
+
degree: "2"
|
| 31 |
+
side: maternal
|
| 32 |
|
| 33 |
+
personal_medical_history:
|
| 34 |
+
chronic_conditions: []
|
| 35 |
+
previous_cancers: []
|
| 36 |
+
nsaid_use: never
|
| 37 |
|
| 38 |
female_specific:
|
| 39 |
menstrual:
|
|
|
|
| 41 |
parity:
|
| 42 |
num_live_births: 2
|
| 43 |
age_at_first_live_birth: 28
|
| 44 |
+
hormone_use:
|
| 45 |
+
estrogen_use: never
|
| 46 |
|
| 47 |
+
symptoms:
|
| 48 |
+
- symptom_type: breast_lump
|
| 49 |
+
- symptom_type: weight_loss
|
| 50 |
|
| 51 |
+
dermatologic:
|
| 52 |
+
region: central
|
| 53 |
+
complexion: medium
|
| 54 |
+
freckling: mild
|
| 55 |
+
female_tan: moderate
|
| 56 |
+
female_small_moles: five_to_eleven
|
| 57 |
+
solar_damage: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/benchmark/benchmark_male.yaml
CHANGED
|
@@ -1,42 +1,58 @@
|
|
| 1 |
demographics:
|
| 2 |
-
|
| 3 |
sex: male
|
| 4 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
lifestyle:
|
| 7 |
smoking:
|
| 8 |
status: former
|
| 9 |
pack_years: 20
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
family_history:
|
| 14 |
-
-
|
| 15 |
-
cancer_type:
|
| 16 |
age_at_diagnosis: 67
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
age_at_diagnosis: 62
|
|
|
|
|
|
|
| 20 |
|
| 21 |
personal_medical_history:
|
| 22 |
chronic_conditions:
|
| 23 |
-
-
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
-
|
| 30 |
-
value: "5.8"
|
| 31 |
-
unit: "ng/mL"
|
| 32 |
-
date: "2025-09-15"
|
| 33 |
-
- test_name: "Hemoglobin A1c"
|
| 34 |
-
value: "7.2"
|
| 35 |
-
unit: "%"
|
| 36 |
-
date: "2025-09-15"
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
demographics:
|
| 2 |
+
age_years: 58
|
| 3 |
sex: male
|
| 4 |
+
ethnicity: white
|
| 5 |
+
education_level: 3
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 178
|
| 8 |
+
weight_kg: 92
|
| 9 |
|
| 10 |
lifestyle:
|
| 11 |
smoking:
|
| 12 |
status: former
|
| 13 |
pack_years: 20
|
| 14 |
+
cigarettes_per_day: 20
|
| 15 |
+
years_smoked: 20
|
| 16 |
+
years_since_quit: 5
|
| 17 |
+
alcohol_consumption: moderate
|
| 18 |
+
multivitamin_use: false
|
| 19 |
+
moderate_physical_activity_hours_per_day: 0.25
|
| 20 |
+
red_meat_consumption_oz_per_day: 4.0
|
| 21 |
|
| 22 |
family_history:
|
| 23 |
+
- relation: father
|
| 24 |
+
cancer_type: lung_cancer
|
| 25 |
age_at_diagnosis: 67
|
| 26 |
+
degree: "1"
|
| 27 |
+
side: unknown
|
| 28 |
+
- relation: brother
|
| 29 |
+
cancer_type: prostate_cancer
|
| 30 |
age_at_diagnosis: 62
|
| 31 |
+
degree: "1"
|
| 32 |
+
side: unknown
|
| 33 |
|
| 34 |
personal_medical_history:
|
| 35 |
chronic_conditions:
|
| 36 |
+
- diabetes
|
| 37 |
+
previous_cancers: []
|
| 38 |
+
aspirin_use: never
|
| 39 |
|
| 40 |
+
clinical_tests:
|
| 41 |
+
psa:
|
| 42 |
+
value_ng_ml: 5.8
|
| 43 |
+
date: 2025-09-15
|
| 44 |
+
dre:
|
| 45 |
+
result: normal
|
| 46 |
+
date: 2025-09-15
|
| 47 |
|
| 48 |
+
symptoms:
|
| 49 |
+
- symptom_type: persistent_cough
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
dermatologic:
|
| 52 |
+
region: northern
|
| 53 |
+
complexion: light
|
| 54 |
+
freckling: moderate
|
| 55 |
+
male_sunburn: true
|
| 56 |
+
male_has_two_or_more_big_moles: true
|
| 57 |
+
male_small_moles: seven_to_sixteen
|
| 58 |
+
solar_damage: true
|
prompts/instruction/assessment.md
CHANGED
|
@@ -2,18 +2,22 @@ You will provide a structured JSON output as specified in the `FORMAT INSTRUCTIO
|
|
| 2 |
|
| 3 |
## Your Task
|
| 4 |
|
| 5 |
-
|
| 6 |
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
## Your Task
|
| 4 |
|
| 5 |
+
The `RISK SCORES (GROUND TRUTH)` section contains deterministic risk scores calculated by validated risk models. **These scores are the authoritative source of truth and must not be contradicted or overridden.**
|
| 6 |
|
| 7 |
+
Your role is to:
|
| 8 |
|
| 9 |
+
1. **Explain the risk scores**: For each cancer type with calculated risk scores, provide clear, empathetic explanations of what these scores mean for the patient. Explain the scores in plain language without generating your own risk levels.
|
| 10 |
|
| 11 |
+
2. **Identify contributing factors**: Analyze the patient's profile in `USER INFORMATION` to highlight the key risk factors that contributed to elevated risk scores. Explain WHY specific scores are higher based on the patient's demographics, lifestyle, medical history, and family history.
|
| 12 |
|
| 13 |
+
3. **Review clinical observations**: If clinical observations are present, identify any abnormalities by comparing values to reference ranges and explain how these relate to the calculated risk scores.
|
| 14 |
|
| 15 |
+
4. **Apply diagnostic protocols**: For each relevant protocol in `DIAGNOSTIC PROTOCOLS`, determine the patient's eligibility and recommended testing frequency based on their risk profile and demographic information.
|
| 16 |
|
| 17 |
+
5. **Provide actionable insights**: Offer evidence-based recommendations and lifestyle advice that patients can use to understand and potentially modify their risk factors.
|
| 18 |
|
| 19 |
+
6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
|
| 20 |
+
|
| 21 |
+
7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
|
| 22 |
+
|
| 23 |
+
**Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.
|
scripts/generate_documentation.py
CHANGED
|
@@ -15,7 +15,6 @@ from annotated_types import Ge, Gt, Le, Lt
|
|
| 15 |
from fpdf import FPDF
|
| 16 |
from pydantic import BaseModel
|
| 17 |
|
| 18 |
-
from sentinel.models import UserInput
|
| 19 |
from sentinel.risk_models.base import RiskModel
|
| 20 |
from sentinel.risk_models.qcancer import (
|
| 21 |
FEMALE_CANCER_TYPES as QC_FEMALE_CANCERS,
|
|
@@ -23,6 +22,7 @@ from sentinel.risk_models.qcancer import (
|
|
| 23 |
from sentinel.risk_models.qcancer import (
|
| 24 |
MALE_CANCER_TYPES as QC_MALE_CANCERS,
|
| 25 |
)
|
|
|
|
| 26 |
|
| 27 |
# Constants
|
| 28 |
HERE = Path(__file__).resolve().parent
|
|
|
|
| 15 |
from fpdf import FPDF
|
| 16 |
from pydantic import BaseModel
|
| 17 |
|
|
|
|
| 18 |
from sentinel.risk_models.base import RiskModel
|
| 19 |
from sentinel.risk_models.qcancer import (
|
| 20 |
FEMALE_CANCER_TYPES as QC_FEMALE_CANCERS,
|
|
|
|
| 22 |
from sentinel.risk_models.qcancer import (
|
| 23 |
MALE_CANCER_TYPES as QC_MALE_CANCERS,
|
| 24 |
)
|
| 25 |
+
from sentinel.user_input import UserInput
|
| 26 |
|
| 27 |
# Constants
|
| 28 |
HERE = Path(__file__).resolve().parent
|
src/sentinel/api_clients/canrisk.py
CHANGED
|
@@ -1513,7 +1513,7 @@ class CanRiskClient:
|
|
| 1513 |
stripped = relative.strip()
|
| 1514 |
if not stripped:
|
| 1515 |
return "Unknown"
|
| 1516 |
-
compact = stripped.title().replace(" ", "")
|
| 1517 |
return compact[:20]
|
| 1518 |
|
| 1519 |
@staticmethod
|
|
|
|
| 1513 |
stripped = relative.strip()
|
| 1514 |
if not stripped:
|
| 1515 |
return "Unknown"
|
| 1516 |
+
compact = stripped.title().replace(" ", "").replace("_", "")
|
| 1517 |
return compact[:20]
|
| 1518 |
|
| 1519 |
@staticmethod
|
src/sentinel/conversation.py
CHANGED
|
@@ -7,7 +7,9 @@ from langchain_core.messages import get_buffer_string
|
|
| 7 |
from langchain_core.runnables.base import Runnable
|
| 8 |
|
| 9 |
from .llm_service import extract_thinking
|
| 10 |
-
from .models import ConversationResponse, InitialAssessment
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
@dataclass
|
|
@@ -37,25 +39,45 @@ class ConversationManager:
|
|
| 37 |
pairs.append((human, ai))
|
| 38 |
return pairs
|
| 39 |
|
| 40 |
-
def initial_assessment(
|
|
|
|
|
|
|
| 41 |
"""Run the structured assessment chain and record the exchange.
|
| 42 |
|
| 43 |
Args:
|
| 44 |
user: The user profile to assess.
|
|
|
|
| 45 |
|
| 46 |
Returns:
|
| 47 |
The structured InitialAssessment result.
|
| 48 |
"""
|
|
|
|
| 49 |
self.user_json = user.model_dump_json()
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
if isinstance(result, InitialAssessment):
|
| 53 |
data = result
|
| 54 |
else:
|
| 55 |
data = InitialAssessment.model_validate(result)
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
# Add to history as a new interaction
|
| 58 |
-
self.chat_history.add_user_message(
|
|
|
|
|
|
|
| 59 |
self.chat_history.add_ai_message(data.model_dump_json())
|
| 60 |
return data
|
| 61 |
|
|
|
|
| 7 |
from langchain_core.runnables.base import Runnable
|
| 8 |
|
| 9 |
from .llm_service import extract_thinking
|
| 10 |
+
from .models import ConversationResponse, InitialAssessment
|
| 11 |
+
from .risk_aggregation import format_scores_for_llm, group_scores_by_cancer_type
|
| 12 |
+
from .user_input import UserInput
|
| 13 |
|
| 14 |
|
| 15 |
@dataclass
|
|
|
|
| 39 |
pairs.append((human, ai))
|
| 40 |
return pairs
|
| 41 |
|
| 42 |
+
def initial_assessment(
|
| 43 |
+
self, user: UserInput, risk_scores: list | None = None
|
| 44 |
+
) -> InitialAssessment:
|
| 45 |
"""Run the structured assessment chain and record the exchange.
|
| 46 |
|
| 47 |
Args:
|
| 48 |
user: The user profile to assess.
|
| 49 |
+
risk_scores: Optional list of RiskScore objects. If not provided, will try to get from user.risk_scores.
|
| 50 |
|
| 51 |
Returns:
|
| 52 |
The structured InitialAssessment result.
|
| 53 |
"""
|
| 54 |
+
|
| 55 |
self.user_json = user.model_dump_json()
|
| 56 |
+
|
| 57 |
+
# Extract and group risk scores
|
| 58 |
+
if risk_scores is None:
|
| 59 |
+
# Try to get from user if it has risk_scores attribute
|
| 60 |
+
risk_scores = getattr(user, "risk_scores", [])
|
| 61 |
+
grouped_scores = group_scores_by_cancer_type(risk_scores)
|
| 62 |
+
formatted_scores = format_scores_for_llm(grouped_scores)
|
| 63 |
+
|
| 64 |
+
# Invoke LLM with scores as separate context
|
| 65 |
+
result = self.structured_chain.invoke(
|
| 66 |
+
{"user_data": self.user_json, "risk_scores": formatted_scores}
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
if isinstance(result, InitialAssessment):
|
| 70 |
data = result
|
| 71 |
else:
|
| 72 |
data = InitialAssessment.model_validate(result)
|
| 73 |
|
| 74 |
+
# Attach the ground truth calculated scores
|
| 75 |
+
data.calculated_risk_scores = grouped_scores
|
| 76 |
+
|
| 77 |
# Add to history as a new interaction
|
| 78 |
+
self.chat_history.add_user_message(
|
| 79 |
+
f"Initial assessment for user profile: {self.user_json}"
|
| 80 |
+
)
|
| 81 |
self.chat_history.add_ai_message(data.model_dump_json())
|
| 82 |
return data
|
| 83 |
|
src/sentinel/models.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""Pydantic models and enums used across the Sentinel application."""
|
| 2 |
|
| 3 |
import re
|
| 4 |
-
from collections.abc import Iterable
|
| 5 |
from enum import Enum, IntEnum
|
| 6 |
from typing import Any, Literal
|
| 7 |
|
|
@@ -1593,213 +1593,6 @@ class RiskScore(SentinelBaseModel):
|
|
| 1593 |
)
|
| 1594 |
|
| 1595 |
|
| 1596 |
-
# ---------------------------------------------------------------------------
|
| 1597 |
-
# Canonical user input
|
| 1598 |
-
# ---------------------------------------------------------------------------
|
| 1599 |
-
|
| 1600 |
-
|
| 1601 |
-
class UserInput(SentinelBaseModel):
|
| 1602 |
-
"""Top-level container for all input required by assessments."""
|
| 1603 |
-
|
| 1604 |
-
schema_version: str = Field(default="2025.10")
|
| 1605 |
-
demographics: Demographics
|
| 1606 |
-
lifestyle: Lifestyle
|
| 1607 |
-
family_history: list[FamilyMemberCancer] = Field(default_factory=list)
|
| 1608 |
-
personal_medical_history: PersonalMedicalHistory
|
| 1609 |
-
female_specific: FemaleSpecific | None = None
|
| 1610 |
-
current_concerns_or_symptoms: str | None = None
|
| 1611 |
-
symptoms: list[SymptomEntry] = Field(default_factory=list)
|
| 1612 |
-
clinical_observations: list[ClinicalObservation] = Field(default_factory=list)
|
| 1613 |
-
lab_results: list[LabResult] = Field(default_factory=list)
|
| 1614 |
-
screening_history: list[ScreeningEvent] = Field(default_factory=list)
|
| 1615 |
-
medications: list[MedicationRecord] = Field(default_factory=list)
|
| 1616 |
-
risk_scores: list[RiskScore] = Field(default_factory=list, alias="risks_scores")
|
| 1617 |
-
notes: str | None = None
|
| 1618 |
-
dermatologic: DermatologicProfile | None = None
|
| 1619 |
-
|
| 1620 |
-
@model_validator(mode="before")
|
| 1621 |
-
def _legacy(cls, values: Any) -> Any:
|
| 1622 |
-
if not isinstance(values, dict):
|
| 1623 |
-
return values
|
| 1624 |
-
data = dict(values)
|
| 1625 |
-
for field_name in (
|
| 1626 |
-
"clinical_observations",
|
| 1627 |
-
"lab_results",
|
| 1628 |
-
"screening_history",
|
| 1629 |
-
"medications",
|
| 1630 |
-
"family_history",
|
| 1631 |
-
"symptoms",
|
| 1632 |
-
"risk_scores",
|
| 1633 |
-
"risks_scores",
|
| 1634 |
-
):
|
| 1635 |
-
if field_name in data and data[field_name] is None:
|
| 1636 |
-
data[field_name] = []
|
| 1637 |
-
if "risks_scores" in data and "risk_scores" not in data:
|
| 1638 |
-
data["risk_scores"] = data.pop("risks_scores")
|
| 1639 |
-
return data
|
| 1640 |
-
|
| 1641 |
-
@property
|
| 1642 |
-
def risks_scores(self) -> list[RiskScore]:
|
| 1643 |
-
"""Get risk scores list.
|
| 1644 |
-
|
| 1645 |
-
Returns:
|
| 1646 |
-
List of risk scores.
|
| 1647 |
-
"""
|
| 1648 |
-
return self.risk_scores
|
| 1649 |
-
|
| 1650 |
-
@risks_scores.setter
|
| 1651 |
-
def risks_scores(self, value: Iterable[RiskScore]) -> None:
|
| 1652 |
-
"""Set risk scores list.
|
| 1653 |
-
|
| 1654 |
-
Args:
|
| 1655 |
-
value: Risk scores to set.
|
| 1656 |
-
"""
|
| 1657 |
-
self.risk_scores = list(value)
|
| 1658 |
-
|
| 1659 |
-
@property
|
| 1660 |
-
def reproductive_history(self) -> FemaleSpecific | None:
|
| 1661 |
-
"""Get reproductive history.
|
| 1662 |
-
|
| 1663 |
-
Returns:
|
| 1664 |
-
Reproductive history or None.
|
| 1665 |
-
"""
|
| 1666 |
-
return self.female_specific
|
| 1667 |
-
|
| 1668 |
-
@reproductive_history.setter
|
| 1669 |
-
def reproductive_history(self, value: FemaleSpecific | None) -> None:
|
| 1670 |
-
"""Set reproductive history.
|
| 1671 |
-
|
| 1672 |
-
Args:
|
| 1673 |
-
value: Reproductive history to set.
|
| 1674 |
-
"""
|
| 1675 |
-
self.female_specific = value
|
| 1676 |
-
|
| 1677 |
-
@property
|
| 1678 |
-
def bmi(self) -> float | None:
|
| 1679 |
-
"""Get BMI value.
|
| 1680 |
-
|
| 1681 |
-
Returns:
|
| 1682 |
-
BMI value or None.
|
| 1683 |
-
"""
|
| 1684 |
-
return self.demographics.bmi
|
| 1685 |
-
|
| 1686 |
-
@property
|
| 1687 |
-
def smoking_history(self) -> SmokingHistory:
|
| 1688 |
-
"""Get smoking history.
|
| 1689 |
-
|
| 1690 |
-
Returns:
|
| 1691 |
-
Smoking history.
|
| 1692 |
-
"""
|
| 1693 |
-
return self.lifestyle.smoking
|
| 1694 |
-
|
| 1695 |
-
@property
|
| 1696 |
-
def is_current_or_former_smoker(self) -> bool:
|
| 1697 |
-
"""Check if user is current or former smoker.
|
| 1698 |
-
|
| 1699 |
-
Returns:
|
| 1700 |
-
True if current or former smoker, False otherwise.
|
| 1701 |
-
"""
|
| 1702 |
-
return self.lifestyle.smoking.status in {
|
| 1703 |
-
SmokingStatus.CURRENT,
|
| 1704 |
-
SmokingStatus.FORMER,
|
| 1705 |
-
}
|
| 1706 |
-
|
| 1707 |
-
def _build_observation_index(self) -> dict[str, ClinicalObservation]:
|
| 1708 |
-
"""Build index of clinical observations by normalized name.
|
| 1709 |
-
|
| 1710 |
-
Returns:
|
| 1711 |
-
Dictionary mapping normalized names to observations.
|
| 1712 |
-
"""
|
| 1713 |
-
index: dict[str, ClinicalObservation] = {}
|
| 1714 |
-
for obs in (*self.clinical_observations, *self.lab_results):
|
| 1715 |
-
key = obs.normalized_name
|
| 1716 |
-
if key and key not in index:
|
| 1717 |
-
index[key] = obs
|
| 1718 |
-
return index
|
| 1719 |
-
|
| 1720 |
-
def get_observation(self, names: Sequence[str]) -> ClinicalObservation | None:
|
| 1721 |
-
"""Get clinical observation by name.
|
| 1722 |
-
|
| 1723 |
-
Args:
|
| 1724 |
-
names: Sequence of observation names to search for.
|
| 1725 |
-
|
| 1726 |
-
Returns:
|
| 1727 |
-
Clinical observation or None.
|
| 1728 |
-
"""
|
| 1729 |
-
index = self._build_observation_index()
|
| 1730 |
-
for name in names:
|
| 1731 |
-
key = _normalize_key(name)
|
| 1732 |
-
if key in index:
|
| 1733 |
-
return index[key]
|
| 1734 |
-
return None
|
| 1735 |
-
|
| 1736 |
-
def get_observation_value(self, names: Sequence[str]) -> str | None:
|
| 1737 |
-
"""Get clinical observation value by name.
|
| 1738 |
-
|
| 1739 |
-
Args:
|
| 1740 |
-
names: Sequence of observation names to search for.
|
| 1741 |
-
|
| 1742 |
-
Returns:
|
| 1743 |
-
Observation value or None.
|
| 1744 |
-
"""
|
| 1745 |
-
observation = self.get_observation(names)
|
| 1746 |
-
return observation.value if observation else None
|
| 1747 |
-
|
| 1748 |
-
def get_numeric_observation(self, names: Sequence[str]) -> float | None:
|
| 1749 |
-
"""Get clinical observation numeric value by name.
|
| 1750 |
-
|
| 1751 |
-
Args:
|
| 1752 |
-
names: Sequence of observation names to search for.
|
| 1753 |
-
|
| 1754 |
-
Returns:
|
| 1755 |
-
Numeric observation value or None.
|
| 1756 |
-
"""
|
| 1757 |
-
observation = self.get_observation(names)
|
| 1758 |
-
return observation.numeric_value if observation else None
|
| 1759 |
-
|
| 1760 |
-
def has_family_history(
|
| 1761 |
-
self, relations: Iterable[FamilyRelation], cancer_keywords: Iterable[str]
|
| 1762 |
-
) -> bool:
|
| 1763 |
-
"""Check if user has family history of specific cancer types.
|
| 1764 |
-
|
| 1765 |
-
Args:
|
| 1766 |
-
relations: Family relations to check.
|
| 1767 |
-
cancer_keywords: Cancer type keywords to search for.
|
| 1768 |
-
|
| 1769 |
-
Returns:
|
| 1770 |
-
True if family history found, False otherwise.
|
| 1771 |
-
"""
|
| 1772 |
-
relation_set = {FamilyRelation.normalize(rel) for rel in relations}
|
| 1773 |
-
keywords = {kw.lower() for kw in cancer_keywords}
|
| 1774 |
-
for record in self.family_history:
|
| 1775 |
-
if relation_set and record.relation not in relation_set:
|
| 1776 |
-
continue
|
| 1777 |
-
if any(
|
| 1778 |
-
keyword in (record.cancer_type or "").lower() for keyword in keywords
|
| 1779 |
-
):
|
| 1780 |
-
return True
|
| 1781 |
-
return False
|
| 1782 |
-
|
| 1783 |
-
def first_degree_cancer_count(self, cancer_keywords: Iterable[str]) -> int:
|
| 1784 |
-
"""Count first-degree relatives with specific cancer types.
|
| 1785 |
-
|
| 1786 |
-
Args:
|
| 1787 |
-
cancer_keywords: Cancer type keywords to search for.
|
| 1788 |
-
|
| 1789 |
-
Returns:
|
| 1790 |
-
Count of first-degree relatives with matching cancer types.
|
| 1791 |
-
"""
|
| 1792 |
-
keywords = {kw.lower() for kw in cancer_keywords}
|
| 1793 |
-
return sum(
|
| 1794 |
-
1
|
| 1795 |
-
for record in self.family_history
|
| 1796 |
-
if record.is_first_degree
|
| 1797 |
-
and any(
|
| 1798 |
-
keyword in (record.cancer_type or "").lower() for keyword in keywords
|
| 1799 |
-
)
|
| 1800 |
-
)
|
| 1801 |
-
|
| 1802 |
-
|
| 1803 |
# ---------------------------------------------------------------------------
|
| 1804 |
# Assessment artefacts
|
| 1805 |
# ---------------------------------------------------------------------------
|
|
@@ -1872,24 +1665,38 @@ class InitialAssessment(SentinelBaseModel):
|
|
| 1872 |
overall_summary: str | None = Field(
|
| 1873 |
default=None, description="A high-level summary of the user's cancer risk."
|
| 1874 |
)
|
| 1875 |
-
overall_risk_score:
|
| 1876 |
default=None,
|
| 1877 |
description="A holistic score from 0 to 100 representing the user's overall cancer risk.",
|
| 1878 |
ge=0,
|
| 1879 |
le=100,
|
| 1880 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1881 |
identified_risk_factors: list[RiskFactor] = Field(
|
| 1882 |
default_factory=list,
|
| 1883 |
description="A comprehensive list of all distinct risk factors identified from the user's profile.",
|
| 1884 |
)
|
| 1885 |
-
|
| 1886 |
default_factory=list,
|
| 1887 |
-
description="
|
|
|
|
| 1888 |
)
|
| 1889 |
dx_recommendations: list[DxRecommendation] = Field(
|
| 1890 |
default_factory=list, description="Recommended diagnostic tests and protocols"
|
| 1891 |
)
|
| 1892 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1893 |
|
| 1894 |
class ConversationResponse(SentinelBaseModel):
|
| 1895 |
"""Structured response for conversational follow-ups."""
|
|
|
|
| 1 |
"""Pydantic models and enums used across the Sentinel application."""
|
| 2 |
|
| 3 |
import re
|
| 4 |
+
from collections.abc import Iterable
|
| 5 |
from enum import Enum, IntEnum
|
| 6 |
from typing import Any, Literal
|
| 7 |
|
|
|
|
| 1593 |
)
|
| 1594 |
|
| 1595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1596 |
# ---------------------------------------------------------------------------
|
| 1597 |
# Assessment artefacts
|
| 1598 |
# ---------------------------------------------------------------------------
|
|
|
|
| 1665 |
overall_summary: str | None = Field(
|
| 1666 |
default=None, description="A high-level summary of the user's cancer risk."
|
| 1667 |
)
|
| 1668 |
+
overall_risk_score: float | None = Field(
|
| 1669 |
default=None,
|
| 1670 |
description="A holistic score from 0 to 100 representing the user's overall cancer risk.",
|
| 1671 |
ge=0,
|
| 1672 |
le=100,
|
| 1673 |
)
|
| 1674 |
+
calculated_risk_scores: dict[str, list[RiskScore]] = Field(
|
| 1675 |
+
default_factory=dict,
|
| 1676 |
+
description="Deterministic risk scores grouped by cancer type (ground truth)",
|
| 1677 |
+
)
|
| 1678 |
identified_risk_factors: list[RiskFactor] = Field(
|
| 1679 |
default_factory=list,
|
| 1680 |
description="A comprehensive list of all distinct risk factors identified from the user's profile.",
|
| 1681 |
)
|
| 1682 |
+
llm_risk_interpretations: list[CancerRiskAssessment] = Field(
|
| 1683 |
default_factory=list,
|
| 1684 |
+
description="LLM explanations and interpretations of calculated risk scores",
|
| 1685 |
+
alias="risk_assessments",
|
| 1686 |
)
|
| 1687 |
dx_recommendations: list[DxRecommendation] = Field(
|
| 1688 |
default_factory=list, description="Recommended diagnostic tests and protocols"
|
| 1689 |
)
|
| 1690 |
|
| 1691 |
+
@property
|
| 1692 |
+
def risk_assessments(self) -> list[CancerRiskAssessment]:
|
| 1693 |
+
"""Get LLM risk interpretations (legacy compatibility).
|
| 1694 |
+
|
| 1695 |
+
Returns:
|
| 1696 |
+
List of cancer risk assessments.
|
| 1697 |
+
"""
|
| 1698 |
+
return self.llm_risk_interpretations
|
| 1699 |
+
|
| 1700 |
|
| 1701 |
class ConversationResponse(SentinelBaseModel):
|
| 1702 |
"""Structured response for conversational follow-ups."""
|
src/sentinel/prompting.py
CHANGED
|
@@ -54,6 +54,7 @@ class PromptBuilder:
|
|
| 54 |
"# PERSONA\n\n{persona}\n\n"
|
| 55 |
"# CANCER MODULES\n\n{cancer_modules}\n\n"
|
| 56 |
"# DIAGNOSTIC PROTOCOLS\n\n{protocols}\n\n"
|
|
|
|
| 57 |
"# USER INFORMATION\n\n{user_data}\n\n"
|
| 58 |
"# INSTRUCTIONS\n\n{instruction}\n\n"
|
| 59 |
"# OUTPUT FORMAT INSTRUCTIONS (FOR INITIAL RESPONSE ONLY)\n\n{format_instructions}"
|
|
|
|
| 54 |
"# PERSONA\n\n{persona}\n\n"
|
| 55 |
"# CANCER MODULES\n\n{cancer_modules}\n\n"
|
| 56 |
"# DIAGNOSTIC PROTOCOLS\n\n{protocols}\n\n"
|
| 57 |
+
"# RISK SCORES (GROUND TRUTH)\n\n{risk_scores}\n\n"
|
| 58 |
"# USER INFORMATION\n\n{user_data}\n\n"
|
| 59 |
"# INSTRUCTIONS\n\n{instruction}\n\n"
|
| 60 |
"# OUTPUT FORMAT INSTRUCTIONS (FOR INITIAL RESPONSE ONLY)\n\n{format_instructions}"
|
src/sentinel/reporting.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
|
| 3 |
import json
|
| 4 |
import math
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
import markdown2
|
|
@@ -35,8 +36,8 @@ from .models import (
|
|
| 35 |
ContributionStrength,
|
| 36 |
InitialAssessment,
|
| 37 |
RiskFactorCategory,
|
| 38 |
-
UserInput,
|
| 39 |
)
|
|
|
|
| 40 |
|
| 41 |
# --- PDF Report Formatting Globals ---
|
| 42 |
# Fonts
|
|
@@ -178,6 +179,7 @@ def generate_excel_report(
|
|
| 178 |
wb = Workbook()
|
| 179 |
|
| 180 |
_create_summary_sheet(wb, assessment, user_input)
|
|
|
|
| 181 |
_create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
|
| 182 |
_create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
|
| 183 |
|
|
@@ -223,7 +225,7 @@ def _create_summary_sheet(
|
|
| 223 |
ws.cell(row=current_row, column=1, value="Demographics").font = bold_font
|
| 224 |
current_row += 1
|
| 225 |
demo_info = {
|
| 226 |
-
"Age": user_input.demographics.
|
| 227 |
"Sex": user_input.demographics.sex,
|
| 228 |
"Ethnicity": user_input.demographics.ethnicity,
|
| 229 |
}
|
|
@@ -238,10 +240,9 @@ def _create_summary_sheet(
|
|
| 238 |
ws.cell(row=current_row, column=1, value="Lifestyle").font = bold_font
|
| 239 |
current_row += 1
|
| 240 |
lifestyle_info = {
|
| 241 |
-
"Smoking Status": user_input.lifestyle.
|
| 242 |
-
"Pack Years": user_input.lifestyle.
|
| 243 |
"Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
|
| 244 |
-
"Dietary Habits": user_input.lifestyle.dietary_habits,
|
| 245 |
"Physical Activity": user_input.lifestyle.physical_activity_level,
|
| 246 |
}
|
| 247 |
for key, val in lifestyle_info.items():
|
|
@@ -253,16 +254,16 @@ def _create_summary_sheet(
|
|
| 253 |
|
| 254 |
# Personal Medical History
|
| 255 |
if user_input.personal_medical_history and (
|
| 256 |
-
user_input.personal_medical_history.
|
| 257 |
or user_input.personal_medical_history.previous_cancers
|
| 258 |
-
or user_input.personal_medical_history.
|
| 259 |
):
|
| 260 |
ws.cell(
|
| 261 |
row=current_row, column=1, value="Personal Medical History"
|
| 262 |
).font = bold_font
|
| 263 |
current_row += 1
|
| 264 |
pmh_texts = []
|
| 265 |
-
if user_input.personal_medical_history.
|
| 266 |
ws.cell(
|
| 267 |
row=current_row, column=1, value="Known Genetic Mutations"
|
| 268 |
).font = bold_font
|
|
@@ -270,7 +271,8 @@ def _create_summary_sheet(
|
|
| 270 |
row=current_row,
|
| 271 |
column=2,
|
| 272 |
value=", ".join(
|
| 273 |
-
|
|
|
|
| 274 |
),
|
| 275 |
).alignment = wrap_alignment
|
| 276 |
current_row += 1
|
|
@@ -281,17 +283,22 @@ def _create_summary_sheet(
|
|
| 281 |
ws.cell(
|
| 282 |
row=current_row,
|
| 283 |
column=2,
|
| 284 |
-
value=", ".join(
|
|
|
|
|
|
|
| 285 |
).alignment = wrap_alignment
|
| 286 |
current_row += 1
|
| 287 |
-
if user_input.personal_medical_history.
|
| 288 |
ws.cell(
|
| 289 |
-
row=current_row, column=1, value="Chronic
|
| 290 |
).font = bold_font
|
| 291 |
ws.cell(
|
| 292 |
row=current_row,
|
| 293 |
column=2,
|
| 294 |
-
value=", ".join(
|
|
|
|
|
|
|
|
|
|
| 295 |
).alignment = wrap_alignment
|
| 296 |
current_row += 1
|
| 297 |
current_row += 1
|
|
@@ -301,7 +308,7 @@ def _create_summary_sheet(
|
|
| 301 |
ws.cell(row=current_row, column=1, value="Family History").font = bold_font
|
| 302 |
current_row += 1
|
| 303 |
family_texts = [
|
| 304 |
-
f"{mem.
|
| 305 |
for mem in user_input.family_history
|
| 306 |
]
|
| 307 |
ws.cell(
|
|
@@ -313,12 +320,16 @@ def _create_summary_sheet(
|
|
| 313 |
if user_input.female_specific:
|
| 314 |
ws.cell(row=current_row, column=1, value="Female-Specific").font = bold_font
|
| 315 |
current_row += 1
|
|
|
|
| 316 |
female_specific_info = {
|
| 317 |
-
"Age at first period":
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
"Age at
|
| 321 |
-
"
|
|
|
|
|
|
|
|
|
|
| 322 |
}
|
| 323 |
for key, val in female_specific_info.items():
|
| 324 |
ws.cell(row=current_row, column=1, value=key).font = bold_font
|
|
@@ -328,35 +339,19 @@ def _create_summary_sheet(
|
|
| 328 |
current_row += 1
|
| 329 |
current_row += 1
|
| 330 |
|
| 331 |
-
# Current
|
| 332 |
-
if user_input.
|
| 333 |
-
ws.cell(row=current_row, column=1, value="Current
|
| 334 |
current_row += 1
|
|
|
|
| 335 |
ws.cell(
|
| 336 |
-
row=current_row, column=2, value=
|
| 337 |
).alignment = wrap_alignment
|
| 338 |
current_row += 2
|
| 339 |
|
| 340 |
-
#
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
ws.cell(
|
| 344 |
-
row=current_row, column=1, value="Clinical Observations"
|
| 345 |
-
).font = bold_font
|
| 346 |
-
current_row += 1
|
| 347 |
-
headers = ["Test Name", "Value", "Unit", "Reference Range", "Date"]
|
| 348 |
-
for col_idx, header in enumerate(headers, 1):
|
| 349 |
-
cell = ws.cell(row=current_row, column=col_idx, value=header)
|
| 350 |
-
cell.font = header_font
|
| 351 |
-
cell.fill = header_fill
|
| 352 |
-
for obs in user_input.clinical_observations:
|
| 353 |
-
current_row += 1
|
| 354 |
-
ws.cell(row=current_row, column=1, value=obs.test_name)
|
| 355 |
-
ws.cell(row=current_row, column=2, value=obs.value)
|
| 356 |
-
ws.cell(row=current_row, column=3, value=obs.unit)
|
| 357 |
-
ws.cell(row=current_row, column=4, value=obs.reference_range)
|
| 358 |
-
ws.cell(row=current_row, column=5, value=obs.date)
|
| 359 |
-
current_row += 1
|
| 360 |
|
| 361 |
ws.merge_cells(
|
| 362 |
start_row=current_row, start_column=1, end_row=current_row, end_column=6
|
|
@@ -457,6 +452,73 @@ def _create_summary_sheet(
|
|
| 457 |
ws.column_dimensions["F"].width = 30
|
| 458 |
|
| 459 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
|
| 461 |
ws = wb.create_sheet(title)
|
| 462 |
pretty_json = json.dumps(data, indent=2)
|
|
@@ -651,7 +713,7 @@ def generate_pdf_report(
|
|
| 651 |
add_section(
|
| 652 |
"Demographics",
|
| 653 |
{
|
| 654 |
-
"Age": user_input.demographics.
|
| 655 |
"Sex": user_input.demographics.sex,
|
| 656 |
"Ethnicity": user_input.demographics.ethnicity or "N/A",
|
| 657 |
},
|
|
@@ -661,10 +723,9 @@ def generate_pdf_report(
|
|
| 661 |
add_section(
|
| 662 |
"Lifestyle",
|
| 663 |
{
|
| 664 |
-
"Smoking Status": user_input.lifestyle.
|
| 665 |
-
"Pack Years": user_input.lifestyle.
|
| 666 |
"Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
|
| 667 |
-
"Dietary Habits": user_input.lifestyle.dietary_habits or "N/A",
|
| 668 |
"Physical Activity": user_input.lifestyle.physical_activity_level or "N/A",
|
| 669 |
},
|
| 670 |
)
|
|
@@ -672,21 +733,27 @@ def generate_pdf_report(
|
|
| 672 |
# --- Personal Medical History ---
|
| 673 |
pmh = user_input.personal_medical_history
|
| 674 |
if pmh and (
|
| 675 |
-
pmh.
|
| 676 |
):
|
| 677 |
pmh_data = {}
|
| 678 |
-
if pmh.
|
| 679 |
-
pmh_data["Known Genetic Mutations"] = ", ".join(
|
|
|
|
|
|
|
| 680 |
if pmh.previous_cancers:
|
| 681 |
-
pmh_data["Previous Cancers"] = ", ".join(
|
| 682 |
-
|
| 683 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
add_section("Personal Medical History", pmh_data)
|
| 685 |
|
| 686 |
# --- Family History ---
|
| 687 |
if user_input.family_history:
|
| 688 |
family_texts = [
|
| 689 |
-
f"{mem.
|
| 690 |
for mem in user_input.family_history
|
| 691 |
]
|
| 692 |
add_list_section("Family History", family_texts)
|
|
@@ -695,80 +762,50 @@ def generate_pdf_report(
|
|
| 695 |
fs = user_input.female_specific
|
| 696 |
if fs:
|
| 697 |
fs_data = {}
|
| 698 |
-
if fs.
|
| 699 |
-
fs_data["Age at first period"] = fs.
|
| 700 |
-
if fs.age_at_menopause is not None:
|
| 701 |
-
fs_data["Age at menopause"] = fs.age_at_menopause
|
| 702 |
-
if fs.num_live_births is not None:
|
| 703 |
-
fs_data["Number of live births"] = fs.num_live_births
|
| 704 |
-
if fs.age_at_first_live_birth is not None:
|
| 705 |
-
fs_data["Age at first live birth"] = fs.age_at_first_live_birth
|
| 706 |
-
if
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
add_list_section("Current
|
| 713 |
|
| 714 |
story.append(Spacer(1, SPACER_NORMAL))
|
| 715 |
|
| 716 |
-
#
|
| 717 |
-
|
| 718 |
-
story.append(Paragraph("Clinical Observations", subheading_style))
|
| 719 |
-
obs_data = [
|
| 720 |
-
[
|
| 721 |
-
Paragraph(h, table_header_style)
|
| 722 |
-
for h in ["Test", "Value", "Unit", "Range", "Date"]
|
| 723 |
-
]
|
| 724 |
-
]
|
| 725 |
-
obs_style_cmds = [
|
| 726 |
-
(
|
| 727 |
-
"BACKGROUND",
|
| 728 |
-
(0, 0),
|
| 729 |
-
(-1, 0),
|
| 730 |
-
colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
|
| 731 |
-
),
|
| 732 |
-
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
| 733 |
-
("GRID", (0, 0), (-1, -1), 1, colors.black),
|
| 734 |
-
("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
|
| 735 |
-
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
|
| 736 |
-
("TOPPADDING", (0, 0), (-1, -1), 4),
|
| 737 |
-
]
|
| 738 |
-
for obs in user_input.clinical_observations:
|
| 739 |
-
obs_data.append(
|
| 740 |
-
[
|
| 741 |
-
Paragraph(obs.test_name, table_body_style),
|
| 742 |
-
Paragraph(obs.value, table_body_style),
|
| 743 |
-
Paragraph(obs.unit, table_body_style),
|
| 744 |
-
Paragraph(obs.reference_range or "N/A", table_body_style),
|
| 745 |
-
Paragraph(obs.date or "N/A", table_body_style),
|
| 746 |
-
]
|
| 747 |
-
)
|
| 748 |
-
obs_widths = [1.75 * inch, 0.75 * inch, 0.75 * inch, 1.75 * inch, 1.5 * inch]
|
| 749 |
-
scaled_widths = [w * (CONTENT_WIDTH / sum(obs_widths)) for w in obs_widths]
|
| 750 |
-
obs_table = Table(
|
| 751 |
-
obs_data, colWidths=scaled_widths, style=obs_style_cmds, splitByRow=1
|
| 752 |
-
)
|
| 753 |
-
story.append(obs_table)
|
| 754 |
-
story.append(Spacer(1, SPACER_NORMAL))
|
| 755 |
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
[
|
| 761 |
Paragraph(h, table_header_style)
|
| 762 |
-
for h in [
|
| 763 |
-
"Model",
|
| 764 |
-
"Score",
|
| 765 |
-
"Cancer Type",
|
| 766 |
-
"Description",
|
| 767 |
-
"Interpretation",
|
| 768 |
-
]
|
| 769 |
]
|
| 770 |
]
|
| 771 |
-
|
| 772 |
(
|
| 773 |
"BACKGROUND",
|
| 774 |
(0, 0),
|
|
@@ -781,28 +818,29 @@ def generate_pdf_report(
|
|
| 781 |
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
|
| 782 |
("TOPPADDING", (0, 0), (-1, -1), 4),
|
| 783 |
]
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 798 |
)
|
| 799 |
-
story.append(
|
| 800 |
story.append(Spacer(1, SPACER_NORMAL))
|
| 801 |
|
| 802 |
-
story.append(PageBreak())
|
| 803 |
-
story.append(Paragraph("Assessment", heading_style))
|
| 804 |
-
story.append(Spacer(1, SPACER_NORMAL))
|
| 805 |
-
|
| 806 |
# --- New 3-Column Summary Section ---
|
| 807 |
headers = [
|
| 808 |
Paragraph("<b>Overall Risk Score</b>", summary_header_style),
|
|
@@ -910,13 +948,12 @@ def generate_pdf_report(
|
|
| 910 |
|
| 911 |
story.append(Spacer(1, SPACER_NORMAL))
|
| 912 |
|
| 913 |
-
story.append(Paragraph("
|
| 914 |
story.append(Spacer(1, SPACER_SMALL))
|
| 915 |
risk_intro_text = """
|
| 916 |
-
The following
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
provided for any and all higher risk cancers (scoring 3-5).
|
| 920 |
"""
|
| 921 |
story.append(Paragraph(risk_intro_text, styles["BodyText"]))
|
| 922 |
story.append(Spacer(1, SPACER_SMALL))
|
|
@@ -1497,8 +1534,6 @@ def _calculate_risk_points(
|
|
| 1497 |
Returns:
|
| 1498 |
Mapping of RiskFactorCategory to integer points.
|
| 1499 |
"""
|
| 1500 |
-
from collections import defaultdict
|
| 1501 |
-
|
| 1502 |
risk_points_by_category = defaultdict(int)
|
| 1503 |
strength_to_points = {
|
| 1504 |
ContributionStrength.MAJOR: 5,
|
|
@@ -1708,8 +1743,6 @@ def _create_risk_factor_table(
|
|
| 1708 |
Returns:
|
| 1709 |
A ReportLab Table or Paragraph to insert in the story.
|
| 1710 |
"""
|
| 1711 |
-
from collections import defaultdict
|
| 1712 |
-
|
| 1713 |
if not assessment.identified_risk_factors:
|
| 1714 |
return Paragraph("No specific risk factors identified.", panel_body_style)
|
| 1715 |
|
|
|
|
| 2 |
|
| 3 |
import json
|
| 4 |
import math
|
| 5 |
+
from collections import defaultdict
|
| 6 |
from datetime import datetime
|
| 7 |
|
| 8 |
import markdown2
|
|
|
|
| 36 |
ContributionStrength,
|
| 37 |
InitialAssessment,
|
| 38 |
RiskFactorCategory,
|
|
|
|
| 39 |
)
|
| 40 |
+
from .user_input import UserInput
|
| 41 |
|
| 42 |
# --- PDF Report Formatting Globals ---
|
| 43 |
# Fonts
|
|
|
|
| 179 |
wb = Workbook()
|
| 180 |
|
| 181 |
_create_summary_sheet(wb, assessment, user_input)
|
| 182 |
+
_create_risk_scores_sheet(wb, assessment)
|
| 183 |
_create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
|
| 184 |
_create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
|
| 185 |
|
|
|
|
| 225 |
ws.cell(row=current_row, column=1, value="Demographics").font = bold_font
|
| 226 |
current_row += 1
|
| 227 |
demo_info = {
|
| 228 |
+
"Age": user_input.demographics.age_years,
|
| 229 |
"Sex": user_input.demographics.sex,
|
| 230 |
"Ethnicity": user_input.demographics.ethnicity,
|
| 231 |
}
|
|
|
|
| 240 |
ws.cell(row=current_row, column=1, value="Lifestyle").font = bold_font
|
| 241 |
current_row += 1
|
| 242 |
lifestyle_info = {
|
| 243 |
+
"Smoking Status": user_input.lifestyle.smoking.status,
|
| 244 |
+
"Pack Years": user_input.lifestyle.smoking.pack_years,
|
| 245 |
"Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
|
|
|
|
| 246 |
"Physical Activity": user_input.lifestyle.physical_activity_level,
|
| 247 |
}
|
| 248 |
for key, val in lifestyle_info.items():
|
|
|
|
| 254 |
|
| 255 |
# Personal Medical History
|
| 256 |
if user_input.personal_medical_history and (
|
| 257 |
+
user_input.personal_medical_history.genetic_mutations
|
| 258 |
or user_input.personal_medical_history.previous_cancers
|
| 259 |
+
or user_input.personal_medical_history.chronic_conditions
|
| 260 |
):
|
| 261 |
ws.cell(
|
| 262 |
row=current_row, column=1, value="Personal Medical History"
|
| 263 |
).font = bold_font
|
| 264 |
current_row += 1
|
| 265 |
pmh_texts = []
|
| 266 |
+
if user_input.personal_medical_history.genetic_mutations:
|
| 267 |
ws.cell(
|
| 268 |
row=current_row, column=1, value="Known Genetic Mutations"
|
| 269 |
).font = bold_font
|
|
|
|
| 271 |
row=current_row,
|
| 272 |
column=2,
|
| 273 |
value=", ".join(
|
| 274 |
+
str(m)
|
| 275 |
+
for m in user_input.personal_medical_history.genetic_mutations
|
| 276 |
),
|
| 277 |
).alignment = wrap_alignment
|
| 278 |
current_row += 1
|
|
|
|
| 283 |
ws.cell(
|
| 284 |
row=current_row,
|
| 285 |
column=2,
|
| 286 |
+
value=", ".join(
|
| 287 |
+
str(c) for c in user_input.personal_medical_history.previous_cancers
|
| 288 |
+
),
|
| 289 |
).alignment = wrap_alignment
|
| 290 |
current_row += 1
|
| 291 |
+
if user_input.personal_medical_history.chronic_conditions:
|
| 292 |
ws.cell(
|
| 293 |
+
row=current_row, column=1, value="Chronic Conditions"
|
| 294 |
).font = bold_font
|
| 295 |
ws.cell(
|
| 296 |
row=current_row,
|
| 297 |
column=2,
|
| 298 |
+
value=", ".join(
|
| 299 |
+
str(c)
|
| 300 |
+
for c in user_input.personal_medical_history.chronic_conditions
|
| 301 |
+
),
|
| 302 |
).alignment = wrap_alignment
|
| 303 |
current_row += 1
|
| 304 |
current_row += 1
|
|
|
|
| 308 |
ws.cell(row=current_row, column=1, value="Family History").font = bold_font
|
| 309 |
current_row += 1
|
| 310 |
family_texts = [
|
| 311 |
+
f"{mem.relation} ({mem.cancer_type} at age {mem.age_at_diagnosis or 'N/A'})"
|
| 312 |
for mem in user_input.family_history
|
| 313 |
]
|
| 314 |
ws.cell(
|
|
|
|
| 320 |
if user_input.female_specific:
|
| 321 |
ws.cell(row=current_row, column=1, value="Female-Specific").font = bold_font
|
| 322 |
current_row += 1
|
| 323 |
+
fs = user_input.female_specific
|
| 324 |
female_specific_info = {
|
| 325 |
+
"Age at first period": fs.menstrual.age_at_menarche
|
| 326 |
+
if fs.menstrual
|
| 327 |
+
else None,
|
| 328 |
+
"Age at menopause": fs.menstrual.age_at_menopause if fs.menstrual else None,
|
| 329 |
+
"Number of live births": fs.parity.num_live_births if fs.parity else None,
|
| 330 |
+
"Age at first live birth": fs.parity.age_at_first_live_birth
|
| 331 |
+
if fs.parity
|
| 332 |
+
else None,
|
| 333 |
}
|
| 334 |
for key, val in female_specific_info.items():
|
| 335 |
ws.cell(row=current_row, column=1, value=key).font = bold_font
|
|
|
|
| 339 |
current_row += 1
|
| 340 |
current_row += 1
|
| 341 |
|
| 342 |
+
# Current Symptoms
|
| 343 |
+
if user_input.symptoms:
|
| 344 |
+
ws.cell(row=current_row, column=1, value="Current Symptoms").font = bold_font
|
| 345 |
current_row += 1
|
| 346 |
+
symptom_texts = [str(s.symptom_type) for s in user_input.symptoms]
|
| 347 |
ws.cell(
|
| 348 |
+
row=current_row, column=2, value=", ".join(symptom_texts)
|
| 349 |
).alignment = wrap_alignment
|
| 350 |
current_row += 2
|
| 351 |
|
| 352 |
+
# Note: clinical_observations doesn't exist in user_input.UserInput (strict schema)
|
| 353 |
+
# The strict schema uses clinical_tests instead (PSA, DRE, etc.)
|
| 354 |
+
# Skipping this section as it requires restructuring
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
ws.merge_cells(
|
| 357 |
start_row=current_row, start_column=1, end_row=current_row, end_column=6
|
|
|
|
| 452 |
ws.column_dimensions["F"].width = 30
|
| 453 |
|
| 454 |
|
| 455 |
+
def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> None:
|
| 456 |
+
"""Create a worksheet with calculated risk scores.
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
wb: An openpyxl workbook.
|
| 460 |
+
assessment: The structured initial assessment containing calculated scores.
|
| 461 |
+
"""
|
| 462 |
+
ws = wb.create_sheet("Risk Model Scores")
|
| 463 |
+
|
| 464 |
+
title_font = Font(bold=True, size=16, name="Calibri")
|
| 465 |
+
header_font = Font(bold=True, color=HEX_COLORS["header_font"], name="Calibri")
|
| 466 |
+
header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
|
| 467 |
+
wrap_alignment = Alignment(wrap_text=True, vertical="top")
|
| 468 |
+
|
| 469 |
+
ws.merge_cells("A1:E1")
|
| 470 |
+
ws["A1"] = "Calculated Risk Scores (Ground Truth)"
|
| 471 |
+
ws["A1"].font = title_font
|
| 472 |
+
ws["A1"].alignment = Alignment(horizontal="center")
|
| 473 |
+
|
| 474 |
+
ws.merge_cells("A2:E2")
|
| 475 |
+
ws["A2"] = "Scores calculated using validated clinical risk models"
|
| 476 |
+
ws["A2"].alignment = Alignment(horizontal="center")
|
| 477 |
+
|
| 478 |
+
current_row = 4
|
| 479 |
+
|
| 480 |
+
if not assessment.calculated_risk_scores:
|
| 481 |
+
ws.cell(row=current_row, column=1, value="No risk scores calculated")
|
| 482 |
+
return
|
| 483 |
+
|
| 484 |
+
# Create headers
|
| 485 |
+
headers = ["Cancer Type", "Model Name", "Score", "Interpretation", "References"]
|
| 486 |
+
for col_idx, header in enumerate(headers, 1):
|
| 487 |
+
cell = ws.cell(row=current_row, column=col_idx, value=header)
|
| 488 |
+
cell.font = header_font
|
| 489 |
+
cell.fill = header_fill
|
| 490 |
+
|
| 491 |
+
current_row += 1
|
| 492 |
+
|
| 493 |
+
# Add risk scores grouped by cancer type
|
| 494 |
+
for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
|
| 495 |
+
for i, score in enumerate(scores):
|
| 496 |
+
# Show cancer type only on first row for each cancer
|
| 497 |
+
if i == 0:
|
| 498 |
+
ws.cell(row=current_row, column=1, value=cancer_type)
|
| 499 |
+
|
| 500 |
+
ws.cell(row=current_row, column=2, value=score.name)
|
| 501 |
+
ws.cell(row=current_row, column=3, value=score.score or "N/A")
|
| 502 |
+
|
| 503 |
+
interp_cell = ws.cell(
|
| 504 |
+
row=current_row, column=4, value=score.interpretation or "N/A"
|
| 505 |
+
)
|
| 506 |
+
interp_cell.alignment = wrap_alignment
|
| 507 |
+
|
| 508 |
+
refs = "; ".join(score.references) if score.references else "N/A"
|
| 509 |
+
refs_cell = ws.cell(row=current_row, column=5, value=refs)
|
| 510 |
+
refs_cell.alignment = wrap_alignment
|
| 511 |
+
|
| 512 |
+
current_row += 1
|
| 513 |
+
|
| 514 |
+
# Set column widths
|
| 515 |
+
ws.column_dimensions["A"].width = 20
|
| 516 |
+
ws.column_dimensions["B"].width = 25
|
| 517 |
+
ws.column_dimensions["C"].width = 15
|
| 518 |
+
ws.column_dimensions["D"].width = 50
|
| 519 |
+
ws.column_dimensions["E"].width = 40
|
| 520 |
+
|
| 521 |
+
|
| 522 |
def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
|
| 523 |
ws = wb.create_sheet(title)
|
| 524 |
pretty_json = json.dumps(data, indent=2)
|
|
|
|
| 713 |
add_section(
|
| 714 |
"Demographics",
|
| 715 |
{
|
| 716 |
+
"Age": user_input.demographics.age_years,
|
| 717 |
"Sex": user_input.demographics.sex,
|
| 718 |
"Ethnicity": user_input.demographics.ethnicity or "N/A",
|
| 719 |
},
|
|
|
|
| 723 |
add_section(
|
| 724 |
"Lifestyle",
|
| 725 |
{
|
| 726 |
+
"Smoking Status": user_input.lifestyle.smoking.status,
|
| 727 |
+
"Pack Years": user_input.lifestyle.smoking.pack_years or "N/A",
|
| 728 |
"Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
|
|
|
|
| 729 |
"Physical Activity": user_input.lifestyle.physical_activity_level or "N/A",
|
| 730 |
},
|
| 731 |
)
|
|
|
|
| 733 |
# --- Personal Medical History ---
|
| 734 |
pmh = user_input.personal_medical_history
|
| 735 |
if pmh and (
|
| 736 |
+
pmh.genetic_mutations or pmh.previous_cancers or pmh.chronic_conditions
|
| 737 |
):
|
| 738 |
pmh_data = {}
|
| 739 |
+
if pmh.genetic_mutations:
|
| 740 |
+
pmh_data["Known Genetic Mutations"] = ", ".join(
|
| 741 |
+
str(m) for m in pmh.genetic_mutations
|
| 742 |
+
)
|
| 743 |
if pmh.previous_cancers:
|
| 744 |
+
pmh_data["Previous Cancers"] = ", ".join(
|
| 745 |
+
str(c) for c in pmh.previous_cancers
|
| 746 |
+
)
|
| 747 |
+
if pmh.chronic_conditions:
|
| 748 |
+
pmh_data["Chronic Conditions"] = ", ".join(
|
| 749 |
+
str(c) for c in pmh.chronic_conditions
|
| 750 |
+
)
|
| 751 |
add_section("Personal Medical History", pmh_data)
|
| 752 |
|
| 753 |
# --- Family History ---
|
| 754 |
if user_input.family_history:
|
| 755 |
family_texts = [
|
| 756 |
+
f"{mem.relation} - {mem.cancer_type} (Age: {mem.age_at_diagnosis or 'N/A'})"
|
| 757 |
for mem in user_input.family_history
|
| 758 |
]
|
| 759 |
add_list_section("Family History", family_texts)
|
|
|
|
| 762 |
fs = user_input.female_specific
|
| 763 |
if fs:
|
| 764 |
fs_data = {}
|
| 765 |
+
if fs.menstrual and fs.menstrual.age_at_menarche is not None:
|
| 766 |
+
fs_data["Age at first period"] = fs.menstrual.age_at_menarche
|
| 767 |
+
if fs.menstrual and fs.menstrual.age_at_menopause is not None:
|
| 768 |
+
fs_data["Age at menopause"] = fs.menstrual.age_at_menopause
|
| 769 |
+
if fs.parity and fs.parity.num_live_births is not None:
|
| 770 |
+
fs_data["Number of live births"] = fs.parity.num_live_births
|
| 771 |
+
if fs.parity and fs.parity.age_at_first_live_birth is not None:
|
| 772 |
+
fs_data["Age at first live birth"] = fs.parity.age_at_first_live_birth
|
| 773 |
+
if fs_data: # Only add section if we have data
|
| 774 |
+
add_section("Female-Specific", fs_data)
|
| 775 |
+
|
| 776 |
+
# --- Current Symptoms ---
|
| 777 |
+
if user_input.symptoms:
|
| 778 |
+
symptom_texts = [str(s.symptom_type) for s in user_input.symptoms]
|
| 779 |
+
add_list_section("Current Symptoms", symptom_texts)
|
| 780 |
|
| 781 |
story.append(Spacer(1, SPACER_NORMAL))
|
| 782 |
|
| 783 |
+
# Note: clinical_observations doesn't exist in user_input.UserInput (strict schema)
|
| 784 |
+
# The strict schema uses clinical_tests instead - skipping this section
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
|
| 786 |
+
story.append(PageBreak())
|
| 787 |
+
story.append(Paragraph("Assessment", heading_style))
|
| 788 |
+
story.append(Spacer(1, SPACER_NORMAL))
|
| 789 |
+
|
| 790 |
+
# --- NEW: Calculated Risk Scores Section ---
|
| 791 |
+
if assessment.calculated_risk_scores:
|
| 792 |
+
story.append(Paragraph("Calculated Risk Scores", subheading_style))
|
| 793 |
+
story.append(Spacer(1, SPACER_SMALL))
|
| 794 |
+
risk_scores_intro = """
|
| 795 |
+
The following risk scores have been calculated using validated clinical risk models.
|
| 796 |
+
Each score represents a quantitative assessment based on your specific profile.
|
| 797 |
+
"""
|
| 798 |
+
story.append(Paragraph(risk_scores_intro, styles["BodyText"]))
|
| 799 |
+
story.append(Spacer(1, SPACER_SMALL))
|
| 800 |
+
|
| 801 |
+
# Create table for calculated risk scores
|
| 802 |
+
score_data = [
|
| 803 |
[
|
| 804 |
Paragraph(h, table_header_style)
|
| 805 |
+
for h in ["Cancer Type", "Model", "Score", "Interpretation"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
]
|
| 807 |
]
|
| 808 |
+
score_style_cmds = [
|
| 809 |
(
|
| 810 |
"BACKGROUND",
|
| 811 |
(0, 0),
|
|
|
|
| 818 |
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
|
| 819 |
("TOPPADDING", (0, 0), (-1, -1), 4),
|
| 820 |
]
|
| 821 |
+
|
| 822 |
+
# Sort by cancer type and add rows
|
| 823 |
+
for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
|
| 824 |
+
for i, score in enumerate(scores):
|
| 825 |
+
# Only show cancer type on first row for each cancer
|
| 826 |
+
cancer_cell = Paragraph(cancer_type, table_body_style) if i == 0 else ""
|
| 827 |
+
score_data.append(
|
| 828 |
+
[
|
| 829 |
+
cancer_cell,
|
| 830 |
+
Paragraph(score.name, table_body_style),
|
| 831 |
+
Paragraph(score.score or "N/A", table_body_style),
|
| 832 |
+
Paragraph(score.interpretation or "N/A", table_body_style),
|
| 833 |
+
]
|
| 834 |
+
)
|
| 835 |
+
|
| 836 |
+
score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
|
| 837 |
+
scaled_widths = [w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths]
|
| 838 |
+
scores_table = Table(
|
| 839 |
+
score_data, colWidths=scaled_widths, style=score_style_cmds, splitByRow=1
|
| 840 |
)
|
| 841 |
+
story.append(scores_table)
|
| 842 |
story.append(Spacer(1, SPACER_NORMAL))
|
| 843 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 844 |
# --- New 3-Column Summary Section ---
|
| 845 |
headers = [
|
| 846 |
Paragraph("<b>Overall Risk Score</b>", summary_header_style),
|
|
|
|
| 948 |
|
| 949 |
story.append(Spacer(1, SPACER_NORMAL))
|
| 950 |
|
| 951 |
+
story.append(Paragraph("AI-Generated Risk Interpretations", subheading_style))
|
| 952 |
story.append(Spacer(1, SPACER_SMALL))
|
| 953 |
risk_intro_text = """
|
| 954 |
+
The following interpretations provide context and explanation for the calculated risk scores above.
|
| 955 |
+
These AI-generated insights identify key contributing factors and provide actionable recommendations.
|
| 956 |
+
For cancers with higher risk levels (3-5), additional details on risk factors and recommendations are provided.
|
|
|
|
| 957 |
"""
|
| 958 |
story.append(Paragraph(risk_intro_text, styles["BodyText"]))
|
| 959 |
story.append(Spacer(1, SPACER_SMALL))
|
|
|
|
| 1534 |
Returns:
|
| 1535 |
Mapping of RiskFactorCategory to integer points.
|
| 1536 |
"""
|
|
|
|
|
|
|
| 1537 |
risk_points_by_category = defaultdict(int)
|
| 1538 |
strength_to_points = {
|
| 1539 |
ContributionStrength.MAJOR: 5,
|
|
|
|
| 1743 |
Returns:
|
| 1744 |
A ReportLab Table or Paragraph to insert in the story.
|
| 1745 |
"""
|
|
|
|
|
|
|
| 1746 |
if not assessment.identified_risk_factors:
|
| 1747 |
return Paragraph("No specific risk factors identified.", panel_body_style)
|
| 1748 |
|
src/sentinel/risk_aggregation.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Risk score aggregation and formatting utilities."""
|
| 2 |
+
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
|
| 5 |
+
from .models import RiskScore
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
|
| 9 |
+
"""Group risk scores by cancer type.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
scores: List of risk scores from various models.
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
Dictionary mapping cancer type to list of risk scores.
|
| 16 |
+
|
| 17 |
+
Raises:
|
| 18 |
+
ValueError: If any score is missing a cancer_type.
|
| 19 |
+
"""
|
| 20 |
+
grouped: dict[str, list[RiskScore]] = defaultdict(list)
|
| 21 |
+
|
| 22 |
+
for score in scores:
|
| 23 |
+
if not score.cancer_type:
|
| 24 |
+
raise ValueError(
|
| 25 |
+
f"Risk score '{score.name}' is missing cancer_type. "
|
| 26 |
+
"All risk scores must have a cancer_type specified."
|
| 27 |
+
)
|
| 28 |
+
# Normalize cancer type for grouping
|
| 29 |
+
cancer_type = score.cancer_type.strip()
|
| 30 |
+
grouped[cancer_type].append(score)
|
| 31 |
+
|
| 32 |
+
# Convert defaultdict to regular dict and sort by cancer type
|
| 33 |
+
return dict(sorted(grouped.items()))
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def format_scores_for_llm(grouped_scores: dict[str, list[RiskScore]]) -> str:
|
| 37 |
+
"""Format grouped risk scores for LLM context.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
grouped_scores: Dictionary mapping cancer type to list of risk scores.
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
Formatted string representation of all risk scores.
|
| 44 |
+
"""
|
| 45 |
+
if not grouped_scores:
|
| 46 |
+
return "No risk scores calculated."
|
| 47 |
+
|
| 48 |
+
lines = []
|
| 49 |
+
lines.append("# Calculated Risk Scores (Ground Truth)\n")
|
| 50 |
+
lines.append(
|
| 51 |
+
"The following risk scores have been calculated using validated models:\n"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
for cancer_type, scores in grouped_scores.items():
|
| 55 |
+
lines.append(f"\n## {cancer_type}\n")
|
| 56 |
+
|
| 57 |
+
for score in scores:
|
| 58 |
+
lines.append(f"### {score.name}")
|
| 59 |
+
lines.append(f"- **Score**: {score.score}")
|
| 60 |
+
|
| 61 |
+
if score.description:
|
| 62 |
+
lines.append(f"- **Description**: {score.description}")
|
| 63 |
+
|
| 64 |
+
if score.interpretation:
|
| 65 |
+
lines.append(f"- **Interpretation**: {score.interpretation}")
|
| 66 |
+
|
| 67 |
+
if score.references:
|
| 68 |
+
refs = "; ".join(score.references)
|
| 69 |
+
lines.append(f"- **References**: {refs}")
|
| 70 |
+
|
| 71 |
+
lines.append("") # Empty line between models
|
| 72 |
+
|
| 73 |
+
lines.append("\n---\n")
|
| 74 |
+
lines.append("**Important**: These scores are the ground truth. Your task is to:")
|
| 75 |
+
lines.append("1. Explain what these scores mean for the patient in clear language")
|
| 76 |
+
lines.append(
|
| 77 |
+
"2. Identify and highlight key risk factors contributing to elevated scores"
|
| 78 |
+
)
|
| 79 |
+
lines.append("3. Provide actionable context and insights based on these scores")
|
| 80 |
+
lines.append(
|
| 81 |
+
"4. DO NOT generate your own risk levels - explain and contextualize the calculated ones\n"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
return "\n".join(lines)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def format_scores_for_pdf(
|
| 88 |
+
grouped_scores: dict[str, list[RiskScore]],
|
| 89 |
+
) -> list[tuple[str, list[RiskScore]]]:
|
| 90 |
+
"""Format grouped risk scores for PDF presentation.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
grouped_scores: Dictionary mapping cancer type to list of risk scores.
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
List of (cancer_type, scores) tuples sorted by cancer type.
|
| 97 |
+
"""
|
| 98 |
+
return sorted(grouped_scores.items())
|
src/sentinel/risk_models/qcancer.py
CHANGED
|
@@ -23,6 +23,7 @@ from typing import Annotated
|
|
| 23 |
|
| 24 |
from pydantic import Field
|
| 25 |
|
|
|
|
| 26 |
from sentinel.risk_models.base import RiskModel
|
| 27 |
from sentinel.user_input import (
|
| 28 |
AlcoholConsumption,
|
|
@@ -1780,6 +1781,107 @@ class QCancerRiskModel(RiskModel):
|
|
| 1780 |
"Values sum to 100% and reflect relative likelihoods over the next 10 years; higher percentages warrant clinical review."
|
| 1781 |
)
|
| 1782 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1783 |
def references(self) -> list[str]:
|
| 1784 |
return [
|
| 1785 |
"Hippisley-Cox J, Coupland C. QCancer (10 year risk) BMJ. 2014;349:g4606.",
|
|
|
|
| 23 |
|
| 24 |
from pydantic import Field
|
| 25 |
|
| 26 |
+
from sentinel.models import RiskScore
|
| 27 |
from sentinel.risk_models.base import RiskModel
|
| 28 |
from sentinel.user_input import (
|
| 29 |
AlcoholConsumption,
|
|
|
|
| 1781 |
"Values sum to 100% and reflect relative likelihoods over the next 10 years; higher percentages warrant clinical review."
|
| 1782 |
)
|
| 1783 |
|
| 1784 |
+
def run(self, user: UserInput) -> list:
|
| 1785 |
+
"""Compute QCancer scores and return as list of RiskScore objects.
|
| 1786 |
+
|
| 1787 |
+
Overrides base class to return multiple scores (one per cancer type).
|
| 1788 |
+
|
| 1789 |
+
Args:
|
| 1790 |
+
user: The user profile to score.
|
| 1791 |
+
|
| 1792 |
+
Returns:
|
| 1793 |
+
List of RiskScore objects, one for each cancer type assessed.
|
| 1794 |
+
"""
|
| 1795 |
+
sex = (user.demographics.sex or "").strip().lower()
|
| 1796 |
+
scores = []
|
| 1797 |
+
|
| 1798 |
+
try:
|
| 1799 |
+
if sex.startswith("f"):
|
| 1800 |
+
params = self._extract_female_params(user)
|
| 1801 |
+
probabilities = compute_female_probabilities(**params)
|
| 1802 |
+
scores = self._create_individual_scores(probabilities, is_female=True)
|
| 1803 |
+
elif sex.startswith("m"):
|
| 1804 |
+
params = self._extract_male_params(user)
|
| 1805 |
+
probabilities = compute_male_probabilities(**params)
|
| 1806 |
+
scores = self._create_individual_scores(probabilities, is_female=False)
|
| 1807 |
+
else:
|
| 1808 |
+
# Return single N/A score
|
| 1809 |
+
scores = [
|
| 1810 |
+
RiskScore(
|
| 1811 |
+
name=self.name,
|
| 1812 |
+
score="N/A: QCancer requires patient sex (male or female).",
|
| 1813 |
+
cancer_type="Multiple Cancer Sites",
|
| 1814 |
+
description=self.description(),
|
| 1815 |
+
interpretation=self.interpretation(),
|
| 1816 |
+
references=self.references(),
|
| 1817 |
+
)
|
| 1818 |
+
]
|
| 1819 |
+
except ValueError as exc:
|
| 1820 |
+
# Return single N/A score with error message
|
| 1821 |
+
scores = [
|
| 1822 |
+
RiskScore(
|
| 1823 |
+
name=self.name,
|
| 1824 |
+
score=f"N/A: {exc}",
|
| 1825 |
+
cancer_type="Multiple Cancer Sites",
|
| 1826 |
+
description=self.description(),
|
| 1827 |
+
interpretation=self.interpretation(),
|
| 1828 |
+
references=self.references(),
|
| 1829 |
+
)
|
| 1830 |
+
]
|
| 1831 |
+
|
| 1832 |
+
return scores
|
| 1833 |
+
|
| 1834 |
+
def _create_individual_scores(
|
| 1835 |
+
self, risks: dict[str, float], is_female: bool
|
| 1836 |
+
) -> list:
|
| 1837 |
+
"""Create individual RiskScore objects for each cancer type.
|
| 1838 |
+
|
| 1839 |
+
Args:
|
| 1840 |
+
risks: Dictionary of cancer names to probabilities.
|
| 1841 |
+
is_female: Whether results are for female patient.
|
| 1842 |
+
|
| 1843 |
+
Returns:
|
| 1844 |
+
List of RiskScore objects.
|
| 1845 |
+
"""
|
| 1846 |
+
from sentinel.models import RiskScore
|
| 1847 |
+
|
| 1848 |
+
order = FEMALE_CANCER_TYPES if is_female else MALE_CANCER_TYPES
|
| 1849 |
+
scores = []
|
| 1850 |
+
|
| 1851 |
+
# Add "No Cancer" score first
|
| 1852 |
+
no_cancer_pct = risks.get("none", 0.0)
|
| 1853 |
+
scores.append(
|
| 1854 |
+
RiskScore(
|
| 1855 |
+
name="QCancer",
|
| 1856 |
+
score=f"{no_cancer_pct:.1f}%",
|
| 1857 |
+
cancer_type="No Cancer",
|
| 1858 |
+
description="10-year probability of not developing cancer",
|
| 1859 |
+
interpretation="Baseline probability - higher values indicate lower overall cancer risk",
|
| 1860 |
+
references=self.references(),
|
| 1861 |
+
)
|
| 1862 |
+
)
|
| 1863 |
+
|
| 1864 |
+
# Add each cancer type
|
| 1865 |
+
for cancer_name in order:
|
| 1866 |
+
pct = risks.get(cancer_name, 0.0)
|
| 1867 |
+
display_name = cancer_name.replace("_", " ").title()
|
| 1868 |
+
|
| 1869 |
+
scores.append(
|
| 1870 |
+
RiskScore(
|
| 1871 |
+
name="QCancer",
|
| 1872 |
+
score=f"{pct:.1f}%",
|
| 1873 |
+
cancer_type=display_name,
|
| 1874 |
+
description=f"10-year probability of {display_name.lower()}",
|
| 1875 |
+
interpretation=(
|
| 1876 |
+
"Percentages reflect relative likelihood over next 10 years. "
|
| 1877 |
+
"Values >1% warrant clinical review."
|
| 1878 |
+
),
|
| 1879 |
+
references=self.references(),
|
| 1880 |
+
)
|
| 1881 |
+
)
|
| 1882 |
+
|
| 1883 |
+
return scores
|
| 1884 |
+
|
| 1885 |
def references(self) -> list[str]:
|
| 1886 |
return [
|
| 1887 |
"Hippisley-Cox J, Coupland C. QCancer (10 year risk) BMJ. 2014;349:g4606.",
|
src/sentinel/utils.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Any, Literal
|
|
| 5 |
|
| 6 |
import yaml
|
| 7 |
|
| 8 |
-
from .
|
| 9 |
|
| 10 |
|
| 11 |
def load_user_file(source: str | Any) -> UserInput:
|
|
|
|
| 5 |
|
| 6 |
import yaml
|
| 7 |
|
| 8 |
+
from .user_input import UserInput
|
| 9 |
|
| 10 |
|
| 11 |
def load_user_file(source: str | Any) -> UserInput:
|
tests/test_conversation.py
CHANGED
|
@@ -4,19 +4,36 @@ from unittest.mock import MagicMock, patch
|
|
| 4 |
from sentinel.conversation import ConversationManager
|
| 5 |
from sentinel.models import (
|
| 6 |
ConversationResponse,
|
| 7 |
-
Demographics,
|
| 8 |
InitialAssessment,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
Lifestyle,
|
| 10 |
PersonalMedicalHistory,
|
|
|
|
| 11 |
UserInput,
|
| 12 |
)
|
| 13 |
|
| 14 |
|
| 15 |
def sample_user() -> UserInput:
|
| 16 |
return UserInput(
|
| 17 |
-
demographics=Demographics(
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
family_history=[],
|
| 21 |
)
|
| 22 |
|
|
@@ -25,11 +42,10 @@ def sample_user() -> UserInput:
|
|
| 25 |
@patch("sentinel.llm_service.create_conversational_chain")
|
| 26 |
def test_conversation_flow(mock_create_conversational_chain, mock_create_initial_chain):
|
| 27 |
structured = MagicMock()
|
| 28 |
-
structured.prompt.format.return_value = "full prompt"
|
| 29 |
freeform = MagicMock()
|
| 30 |
structured.invoke.return_value = {
|
| 31 |
"overall_summary": "ok",
|
| 32 |
-
"
|
| 33 |
"dx_recommendations": [],
|
| 34 |
}
|
| 35 |
freeform.invoke.return_value = "hi"
|
|
@@ -37,15 +53,21 @@ def test_conversation_flow(mock_create_conversational_chain, mock_create_initial
|
|
| 37 |
mock_create_conversational_chain.return_value = freeform
|
| 38 |
|
| 39 |
conv = ConversationManager(structured, freeform)
|
| 40 |
-
|
|
|
|
| 41 |
assert isinstance(result, InitialAssessment)
|
| 42 |
assert result.overall_summary == "ok"
|
| 43 |
-
assert
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
answer = conv.follow_up("question")
|
| 46 |
assert isinstance(answer, ConversationResponse)
|
| 47 |
assert answer.response == "hi"
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
]
|
|
|
|
| 4 |
from sentinel.conversation import ConversationManager
|
| 5 |
from sentinel.models import (
|
| 6 |
ConversationResponse,
|
|
|
|
| 7 |
InitialAssessment,
|
| 8 |
+
)
|
| 9 |
+
from sentinel.user_input import (
|
| 10 |
+
Anthropometrics,
|
| 11 |
+
Demographics,
|
| 12 |
Lifestyle,
|
| 13 |
PersonalMedicalHistory,
|
| 14 |
+
SmokingHistory,
|
| 15 |
UserInput,
|
| 16 |
)
|
| 17 |
|
| 18 |
|
| 19 |
def sample_user() -> UserInput:
|
| 20 |
return UserInput(
|
| 21 |
+
demographics=Demographics(
|
| 22 |
+
age_years=30,
|
| 23 |
+
sex="male",
|
| 24 |
+
anthropometrics=Anthropometrics(height_cm=175, weight_kg=70),
|
| 25 |
+
),
|
| 26 |
+
lifestyle=Lifestyle(
|
| 27 |
+
smoking=SmokingHistory(
|
| 28 |
+
status="never",
|
| 29 |
+
cigarettes_per_day=0,
|
| 30 |
+
years_smoked=0,
|
| 31 |
+
),
|
| 32 |
+
),
|
| 33 |
+
personal_medical_history=PersonalMedicalHistory(
|
| 34 |
+
chronic_conditions=[],
|
| 35 |
+
previous_cancers=[],
|
| 36 |
+
),
|
| 37 |
family_history=[],
|
| 38 |
)
|
| 39 |
|
|
|
|
| 42 |
@patch("sentinel.llm_service.create_conversational_chain")
|
| 43 |
def test_conversation_flow(mock_create_conversational_chain, mock_create_initial_chain):
|
| 44 |
structured = MagicMock()
|
|
|
|
| 45 |
freeform = MagicMock()
|
| 46 |
structured.invoke.return_value = {
|
| 47 |
"overall_summary": "ok",
|
| 48 |
+
"llm_risk_interpretations": [],
|
| 49 |
"dx_recommendations": [],
|
| 50 |
}
|
| 51 |
freeform.invoke.return_value = "hi"
|
|
|
|
| 53 |
mock_create_conversational_chain.return_value = freeform
|
| 54 |
|
| 55 |
conv = ConversationManager(structured, freeform)
|
| 56 |
+
user = sample_user()
|
| 57 |
+
result = conv.initial_assessment(user)
|
| 58 |
assert isinstance(result, InitialAssessment)
|
| 59 |
assert result.overall_summary == "ok"
|
| 60 |
+
assert result.calculated_risk_scores == {}
|
| 61 |
+
|
| 62 |
+
# Verify history contains initial assessment message
|
| 63 |
+
assert len(conv.history) == 1
|
| 64 |
+
assert conv.history[0][0].startswith("Initial assessment for user profile:")
|
| 65 |
+
assert conv.history[0][1] == result.model_dump_json()
|
| 66 |
|
| 67 |
answer = conv.follow_up("question")
|
| 68 |
assert isinstance(answer, ConversationResponse)
|
| 69 |
assert answer.response == "hi"
|
| 70 |
+
|
| 71 |
+
# Verify follow-up added to history
|
| 72 |
+
assert len(conv.history) == 2
|
| 73 |
+
assert conv.history[1] == ("question", "hi")
|
tests/test_demo.py
CHANGED
|
@@ -7,20 +7,31 @@ import yaml
|
|
| 7 |
|
| 8 |
from sentinel.models import (
|
| 9 |
CancerRiskAssessment,
|
| 10 |
-
ClinicalObservation,
|
| 11 |
ContributingFactor,
|
| 12 |
ContributionStrength,
|
| 13 |
-
Demographics,
|
| 14 |
DxRecommendation,
|
| 15 |
-
FamilyMemberCancer,
|
| 16 |
InitialAssessment,
|
| 17 |
-
Lifestyle,
|
| 18 |
-
PersonalMedicalHistory,
|
| 19 |
RiskFactor,
|
| 20 |
RiskFactorCategory,
|
| 21 |
-
UserInput,
|
| 22 |
)
|
| 23 |
from sentinel.reporting import generate_excel_report, generate_pdf_report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
from sentinel.utils import load_user_file
|
| 25 |
|
| 26 |
|
|
@@ -32,8 +43,15 @@ def test_load_user_file_yaml(tmp_path):
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
data = {
|
| 35 |
-
"demographics": {
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"personal_medical_history": {},
|
| 38 |
"family_history": [],
|
| 39 |
}
|
|
@@ -42,9 +60,9 @@ def test_load_user_file_yaml(tmp_path):
|
|
| 42 |
|
| 43 |
user = load_user_file(str(path))
|
| 44 |
assert isinstance(user, UserInput)
|
| 45 |
-
assert user.demographics.
|
| 46 |
-
assert user.lifestyle.
|
| 47 |
-
assert user.
|
| 48 |
|
| 49 |
|
| 50 |
@pytest.mark.parametrize("save_files", [True, False])
|
|
@@ -57,41 +75,31 @@ def test_generate_reports(tmp_path, save_files):
|
|
| 57 |
"""
|
| 58 |
# 1. Create mock UserInput data with all fields
|
| 59 |
user = UserInput(
|
| 60 |
-
demographics=Demographics(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
lifestyle=Lifestyle(
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
),
|
| 68 |
personal_medical_history=PersonalMedicalHistory(
|
| 69 |
-
previous_cancers=[
|
| 70 |
-
known_genetic_mutations=["BRCA2"],
|
| 71 |
-
chronic_illnesses=["IBS"],
|
| 72 |
),
|
| 73 |
family_history=[
|
| 74 |
FamilyMemberCancer(
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
)
|
| 77 |
],
|
| 78 |
-
clinical_observations=[
|
| 79 |
-
ClinicalObservation(
|
| 80 |
-
test_name="Blood Pressure",
|
| 81 |
-
value="120/80",
|
| 82 |
-
unit="mmHg",
|
| 83 |
-
reference_range="<130/85",
|
| 84 |
-
date="2023-05-10",
|
| 85 |
-
),
|
| 86 |
-
ClinicalObservation(
|
| 87 |
-
test_name="Cholesterol",
|
| 88 |
-
value="190",
|
| 89 |
-
unit="mg/dL",
|
| 90 |
-
reference_range="<200",
|
| 91 |
-
date="2023-05-10",
|
| 92 |
-
),
|
| 93 |
-
],
|
| 94 |
-
current_concerns_or_symptoms="Occasional headaches.",
|
| 95 |
)
|
| 96 |
|
| 97 |
# 2. Create mock InitialAssessment data
|
|
@@ -122,7 +130,7 @@ def test_generate_reports(tmp_path, save_files):
|
|
| 122 |
category=RiskFactorCategory.LIFESTYLE,
|
| 123 |
),
|
| 124 |
],
|
| 125 |
-
|
| 126 |
CancerRiskAssessment(
|
| 127 |
cancer_type="Breast Cancer",
|
| 128 |
risk_level=4,
|
|
|
|
| 7 |
|
| 8 |
from sentinel.models import (
|
| 9 |
CancerRiskAssessment,
|
|
|
|
| 10 |
ContributingFactor,
|
| 11 |
ContributionStrength,
|
|
|
|
| 12 |
DxRecommendation,
|
|
|
|
| 13 |
InitialAssessment,
|
|
|
|
|
|
|
| 14 |
RiskFactor,
|
| 15 |
RiskFactorCategory,
|
|
|
|
| 16 |
)
|
| 17 |
from sentinel.reporting import generate_excel_report, generate_pdf_report
|
| 18 |
+
from sentinel.user_input import (
|
| 19 |
+
AlcoholConsumption,
|
| 20 |
+
Anthropometrics,
|
| 21 |
+
CancerType,
|
| 22 |
+
Demographics,
|
| 23 |
+
Ethnicity,
|
| 24 |
+
FamilyMemberCancer,
|
| 25 |
+
FamilyRelation,
|
| 26 |
+
FamilySide,
|
| 27 |
+
Lifestyle,
|
| 28 |
+
PersonalMedicalHistory,
|
| 29 |
+
RelationshipDegree,
|
| 30 |
+
Sex,
|
| 31 |
+
SmokingHistory,
|
| 32 |
+
SmokingStatus,
|
| 33 |
+
UserInput,
|
| 34 |
+
)
|
| 35 |
from sentinel.utils import load_user_file
|
| 36 |
|
| 37 |
|
|
|
|
| 43 |
"""
|
| 44 |
|
| 45 |
data = {
|
| 46 |
+
"demographics": {
|
| 47 |
+
"age_years": 30,
|
| 48 |
+
"sex": "male",
|
| 49 |
+
"anthropometrics": {"height_cm": 175, "weight_kg": 70},
|
| 50 |
+
},
|
| 51 |
+
"lifestyle": {
|
| 52 |
+
"smoking": {"status": "never"},
|
| 53 |
+
"alcohol_consumption": "none",
|
| 54 |
+
},
|
| 55 |
"personal_medical_history": {},
|
| 56 |
"family_history": [],
|
| 57 |
}
|
|
|
|
| 60 |
|
| 61 |
user = load_user_file(str(path))
|
| 62 |
assert isinstance(user, UserInput)
|
| 63 |
+
assert user.demographics.age_years == 30
|
| 64 |
+
assert user.lifestyle.smoking.status == SmokingStatus.NEVER
|
| 65 |
+
assert user.symptoms == []
|
| 66 |
|
| 67 |
|
| 68 |
@pytest.mark.parametrize("save_files", [True, False])
|
|
|
|
| 75 |
"""
|
| 76 |
# 1. Create mock UserInput data with all fields
|
| 77 |
user = UserInput(
|
| 78 |
+
demographics=Demographics(
|
| 79 |
+
age_years=45,
|
| 80 |
+
sex=Sex.FEMALE,
|
| 81 |
+
ethnicity=Ethnicity.WHITE,
|
| 82 |
+
anthropometrics=Anthropometrics(height_cm=165, weight_kg=70),
|
| 83 |
+
),
|
| 84 |
lifestyle=Lifestyle(
|
| 85 |
+
smoking=SmokingHistory(
|
| 86 |
+
status=SmokingStatus.FORMER,
|
| 87 |
+
pack_years=10,
|
| 88 |
+
),
|
| 89 |
+
alcohol_consumption=AlcoholConsumption.LIGHT,
|
| 90 |
),
|
| 91 |
personal_medical_history=PersonalMedicalHistory(
|
| 92 |
+
previous_cancers=[CancerType.MELANOMA],
|
|
|
|
|
|
|
| 93 |
),
|
| 94 |
family_history=[
|
| 95 |
FamilyMemberCancer(
|
| 96 |
+
relation=FamilyRelation.MOTHER,
|
| 97 |
+
cancer_type=CancerType.BREAST,
|
| 98 |
+
age_at_diagnosis=50,
|
| 99 |
+
degree=RelationshipDegree.FIRST,
|
| 100 |
+
side=FamilySide.MATERNAL,
|
| 101 |
)
|
| 102 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
)
|
| 104 |
|
| 105 |
# 2. Create mock InitialAssessment data
|
|
|
|
| 130 |
category=RiskFactorCategory.LIFESTYLE,
|
| 131 |
),
|
| 132 |
],
|
| 133 |
+
llm_risk_interpretations=[
|
| 134 |
CancerRiskAssessment(
|
| 135 |
cancer_type="Breast Cancer",
|
| 136 |
risk_level=4,
|
tests/test_integration_canrisk_api.py
CHANGED
|
@@ -15,15 +15,30 @@ from pathlib import Path
|
|
| 15 |
import pytest
|
| 16 |
|
| 17 |
from sentinel.api_clients.canrisk import BOADICEAInput, CanRiskClient
|
| 18 |
-
from sentinel.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
Demographics,
|
|
|
|
| 20 |
FamilyMemberCancer,
|
|
|
|
|
|
|
| 21 |
FemaleSpecific,
|
|
|
|
|
|
|
|
|
|
| 22 |
Lifestyle,
|
|
|
|
|
|
|
| 23 |
PersonalMedicalHistory,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
UserInput,
|
| 25 |
)
|
| 26 |
-
from sentinel.risk_models.boadicea import BOADICEARiskModel
|
| 27 |
|
| 28 |
CREDENTIALS_AVAILABLE = bool(
|
| 29 |
os.getenv("CANRISK_USERNAME") and os.getenv("CANRISK_PASSWORD")
|
|
@@ -48,28 +63,37 @@ class Scenario:
|
|
| 48 |
def _high_risk_user() -> UserInput:
|
| 49 |
return UserInput(
|
| 50 |
demographics=Demographics(
|
| 51 |
-
|
| 52 |
-
sex=
|
| 53 |
-
ethnicity=
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
| 56 |
),
|
| 57 |
-
lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="none"),
|
| 58 |
personal_medical_history=PersonalMedicalHistory(
|
| 59 |
-
|
| 60 |
),
|
| 61 |
female_specific=FemaleSpecific(
|
| 62 |
-
|
| 63 |
-
age_at_first_live_birth=28,
|
| 64 |
-
|
| 65 |
-
hormone_therapy_use="N",
|
| 66 |
),
|
| 67 |
family_history=[
|
| 68 |
FamilyMemberCancer(
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
),
|
| 71 |
FamilyMemberCancer(
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
),
|
| 74 |
],
|
| 75 |
)
|
|
@@ -78,28 +102,37 @@ def _high_risk_user() -> UserInput:
|
|
| 78 |
def _moderate_risk_user() -> UserInput:
|
| 79 |
return UserInput(
|
| 80 |
demographics=Demographics(
|
| 81 |
-
|
| 82 |
-
sex=
|
| 83 |
-
ethnicity=
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
| 86 |
),
|
| 87 |
-
lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="light"),
|
| 88 |
personal_medical_history=PersonalMedicalHistory(
|
| 89 |
-
|
| 90 |
),
|
| 91 |
female_specific=FemaleSpecific(
|
| 92 |
-
|
| 93 |
-
age_at_first_live_birth=30,
|
| 94 |
-
|
| 95 |
-
hormone_therapy_use="former",
|
| 96 |
),
|
| 97 |
family_history=[
|
| 98 |
FamilyMemberCancer(
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
),
|
| 101 |
FamilyMemberCancer(
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
),
|
| 104 |
],
|
| 105 |
)
|
|
@@ -108,24 +141,28 @@ def _moderate_risk_user() -> UserInput:
|
|
| 108 |
def _average_risk_user() -> UserInput:
|
| 109 |
return UserInput(
|
| 110 |
demographics=Demographics(
|
| 111 |
-
|
| 112 |
-
sex=
|
| 113 |
-
ethnicity=
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
| 116 |
),
|
| 117 |
-
lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="moderate"),
|
| 118 |
personal_medical_history=PersonalMedicalHistory(),
|
| 119 |
female_specific=FemaleSpecific(
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
),
|
| 124 |
family_history=[
|
| 125 |
FamilyMemberCancer(
|
| 126 |
-
|
| 127 |
-
cancer_type=
|
| 128 |
age_at_diagnosis=67,
|
|
|
|
|
|
|
| 129 |
),
|
| 130 |
],
|
| 131 |
)
|
|
|
|
| 15 |
import pytest
|
| 16 |
|
| 17 |
from sentinel.api_clients.canrisk import BOADICEAInput, CanRiskClient
|
| 18 |
+
from sentinel.risk_models.boadicea import BOADICEARiskModel
|
| 19 |
+
from sentinel.user_input import (
|
| 20 |
+
AlcoholConsumption,
|
| 21 |
+
Anthropometrics,
|
| 22 |
+
CancerType,
|
| 23 |
Demographics,
|
| 24 |
+
Ethnicity,
|
| 25 |
FamilyMemberCancer,
|
| 26 |
+
FamilyRelation,
|
| 27 |
+
FamilySide,
|
| 28 |
FemaleSpecific,
|
| 29 |
+
GeneticMutation,
|
| 30 |
+
HormoneUse,
|
| 31 |
+
HormoneUseHistory,
|
| 32 |
Lifestyle,
|
| 33 |
+
MenstrualHistory,
|
| 34 |
+
ParityHistory,
|
| 35 |
PersonalMedicalHistory,
|
| 36 |
+
RelationshipDegree,
|
| 37 |
+
Sex,
|
| 38 |
+
SmokingHistory,
|
| 39 |
+
SmokingStatus,
|
| 40 |
UserInput,
|
| 41 |
)
|
|
|
|
| 42 |
|
| 43 |
CREDENTIALS_AVAILABLE = bool(
|
| 44 |
os.getenv("CANRISK_USERNAME") and os.getenv("CANRISK_PASSWORD")
|
|
|
|
| 63 |
def _high_risk_user() -> UserInput:
|
| 64 |
return UserInput(
|
| 65 |
demographics=Demographics(
|
| 66 |
+
age_years=42,
|
| 67 |
+
sex=Sex.FEMALE,
|
| 68 |
+
ethnicity=Ethnicity.ASHKENAZI_JEWISH,
|
| 69 |
+
anthropometrics=Anthropometrics(height_cm=165, weight_kg=65.0),
|
| 70 |
+
),
|
| 71 |
+
lifestyle=Lifestyle(
|
| 72 |
+
smoking=SmokingHistory(status=SmokingStatus.NEVER),
|
| 73 |
+
alcohol_consumption=AlcoholConsumption.NONE,
|
| 74 |
),
|
|
|
|
| 75 |
personal_medical_history=PersonalMedicalHistory(
|
| 76 |
+
genetic_mutations=[GeneticMutation.BRCA1, GeneticMutation.BRCA2],
|
| 77 |
),
|
| 78 |
female_specific=FemaleSpecific(
|
| 79 |
+
menstrual=MenstrualHistory(age_at_menarche=13),
|
| 80 |
+
parity=ParityHistory(age_at_first_live_birth=28, num_live_births=1),
|
| 81 |
+
hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.NEVER),
|
|
|
|
| 82 |
),
|
| 83 |
family_history=[
|
| 84 |
FamilyMemberCancer(
|
| 85 |
+
relation=FamilyRelation.MOTHER,
|
| 86 |
+
cancer_type=CancerType.BREAST,
|
| 87 |
+
age_at_diagnosis=52,
|
| 88 |
+
degree=RelationshipDegree.FIRST,
|
| 89 |
+
side=FamilySide.MATERNAL,
|
| 90 |
),
|
| 91 |
FamilyMemberCancer(
|
| 92 |
+
relation=FamilyRelation.SISTER,
|
| 93 |
+
cancer_type=CancerType.OVARIAN,
|
| 94 |
+
age_at_diagnosis=48,
|
| 95 |
+
degree=RelationshipDegree.FIRST,
|
| 96 |
+
side=FamilySide.UNKNOWN,
|
| 97 |
),
|
| 98 |
],
|
| 99 |
)
|
|
|
|
| 102 |
def _moderate_risk_user() -> UserInput:
|
| 103 |
return UserInput(
|
| 104 |
demographics=Demographics(
|
| 105 |
+
age_years=50,
|
| 106 |
+
sex=Sex.FEMALE,
|
| 107 |
+
ethnicity=Ethnicity.HISPANIC,
|
| 108 |
+
anthropometrics=Anthropometrics(height_cm=160, weight_kg=70.0),
|
| 109 |
+
),
|
| 110 |
+
lifestyle=Lifestyle(
|
| 111 |
+
smoking=SmokingHistory(status=SmokingStatus.NEVER),
|
| 112 |
+
alcohol_consumption=AlcoholConsumption.LIGHT,
|
| 113 |
),
|
|
|
|
| 114 |
personal_medical_history=PersonalMedicalHistory(
|
| 115 |
+
genetic_mutations=[GeneticMutation.BRCA1],
|
| 116 |
),
|
| 117 |
female_specific=FemaleSpecific(
|
| 118 |
+
menstrual=MenstrualHistory(age_at_menarche=12),
|
| 119 |
+
parity=ParityHistory(age_at_first_live_birth=30, num_live_births=2),
|
| 120 |
+
hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.FORMER),
|
|
|
|
| 121 |
),
|
| 122 |
family_history=[
|
| 123 |
FamilyMemberCancer(
|
| 124 |
+
relation=FamilyRelation.MOTHER,
|
| 125 |
+
cancer_type=CancerType.BREAST,
|
| 126 |
+
age_at_diagnosis=60,
|
| 127 |
+
degree=RelationshipDegree.FIRST,
|
| 128 |
+
side=FamilySide.MATERNAL,
|
| 129 |
),
|
| 130 |
FamilyMemberCancer(
|
| 131 |
+
relation=FamilyRelation.MATERNAL_AUNT,
|
| 132 |
+
cancer_type=CancerType.BREAST,
|
| 133 |
+
age_at_diagnosis=55,
|
| 134 |
+
degree=RelationshipDegree.SECOND,
|
| 135 |
+
side=FamilySide.MATERNAL,
|
| 136 |
),
|
| 137 |
],
|
| 138 |
)
|
|
|
|
| 141 |
def _average_risk_user() -> UserInput:
|
| 142 |
return UserInput(
|
| 143 |
demographics=Demographics(
|
| 144 |
+
age_years=38,
|
| 145 |
+
sex=Sex.FEMALE,
|
| 146 |
+
ethnicity=Ethnicity.WHITE,
|
| 147 |
+
anthropometrics=Anthropometrics(height_cm=168, weight_kg=62.0),
|
| 148 |
+
),
|
| 149 |
+
lifestyle=Lifestyle(
|
| 150 |
+
smoking=SmokingHistory(status=SmokingStatus.NEVER),
|
| 151 |
+
alcohol_consumption=AlcoholConsumption.MODERATE,
|
| 152 |
),
|
|
|
|
| 153 |
personal_medical_history=PersonalMedicalHistory(),
|
| 154 |
female_specific=FemaleSpecific(
|
| 155 |
+
menstrual=MenstrualHistory(age_at_menarche=12),
|
| 156 |
+
parity=ParityHistory(num_live_births=0),
|
| 157 |
+
hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.NEVER),
|
| 158 |
),
|
| 159 |
family_history=[
|
| 160 |
FamilyMemberCancer(
|
| 161 |
+
relation=FamilyRelation.PATERNAL_GRANDMOTHER,
|
| 162 |
+
cancer_type=CancerType.BREAST,
|
| 163 |
age_at_diagnosis=67,
|
| 164 |
+
degree=RelationshipDegree.SECOND,
|
| 165 |
+
side=FamilySide.PATERNAL,
|
| 166 |
),
|
| 167 |
],
|
| 168 |
)
|
tests/test_main.py
CHANGED
|
@@ -31,18 +31,30 @@ def test_root():
|
|
| 31 |
@patch("apps.api.main.SentinelFactory")
|
| 32 |
def test_assess_local(mock_factory):
|
| 33 |
payload = {
|
| 34 |
-
"demographics": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
"lifestyle": {
|
| 36 |
-
"
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
"alcohol_consumption": "moderate",
|
| 39 |
},
|
| 40 |
"family_history": [
|
| 41 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
],
|
| 43 |
"personal_medical_history": {
|
| 44 |
"previous_cancers": ["melanoma"],
|
| 45 |
-
"chronic_illnesses": [],
|
| 46 |
},
|
| 47 |
}
|
| 48 |
expected = {
|
|
@@ -51,6 +63,7 @@ def test_assess_local(mock_factory):
|
|
| 51 |
"response": None,
|
| 52 |
"overall_summary": "ok",
|
| 53 |
"overall_risk_score": None,
|
|
|
|
| 54 |
"identified_risk_factors": [],
|
| 55 |
"risk_assessments": [],
|
| 56 |
"dx_recommendations": [],
|
|
@@ -73,10 +86,17 @@ def test_assess_local(mock_factory):
|
|
| 73 |
@patch("apps.api.main.SentinelFactory")
|
| 74 |
def test_assess_bad_provider(mock_factory):
|
| 75 |
payload = {
|
| 76 |
-
"demographics": {
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
"family_history": [],
|
| 79 |
-
"personal_medical_history": {
|
| 80 |
}
|
| 81 |
mock_factory.side_effect = ValueError("bad")
|
| 82 |
response = client.post("/assess/invalid", json={"user_input": payload})
|
|
@@ -86,18 +106,22 @@ def test_assess_bad_provider(mock_factory):
|
|
| 86 |
@patch("apps.api.main.SentinelFactory")
|
| 87 |
def test_assess_with_observations(mock_factory):
|
| 88 |
payload = {
|
| 89 |
-
"demographics": {
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
"family_history": [],
|
| 93 |
-
"
|
| 94 |
-
{
|
| 95 |
-
"
|
| 96 |
-
"value": "5",
|
| 97 |
-
"unit": "ng/mL",
|
| 98 |
-
"reference_range": "<4",
|
| 99 |
}
|
| 100 |
-
|
| 101 |
}
|
| 102 |
expected = {
|
| 103 |
"thinking": None,
|
|
@@ -105,6 +129,7 @@ def test_assess_with_observations(mock_factory):
|
|
| 105 |
"response": None,
|
| 106 |
"overall_summary": "ok",
|
| 107 |
"overall_risk_score": None,
|
|
|
|
| 108 |
"identified_risk_factors": [],
|
| 109 |
"risk_assessments": [],
|
| 110 |
"dx_recommendations": [],
|
|
|
|
| 31 |
@patch("apps.api.main.SentinelFactory")
|
| 32 |
def test_assess_local(mock_factory):
|
| 33 |
payload = {
|
| 34 |
+
"demographics": {
|
| 35 |
+
"age_years": 55,
|
| 36 |
+
"sex": "male",
|
| 37 |
+
"ethnicity": "white",
|
| 38 |
+
"anthropometrics": {"height_cm": 175, "weight_kg": 80},
|
| 39 |
+
},
|
| 40 |
"lifestyle": {
|
| 41 |
+
"smoking": {
|
| 42 |
+
"status": "former",
|
| 43 |
+
"pack_years": 10,
|
| 44 |
+
},
|
| 45 |
"alcohol_consumption": "moderate",
|
| 46 |
},
|
| 47 |
"family_history": [
|
| 48 |
+
{
|
| 49 |
+
"relation": "father",
|
| 50 |
+
"cancer_type": "lung_cancer",
|
| 51 |
+
"age_at_diagnosis": 60,
|
| 52 |
+
"degree": "1",
|
| 53 |
+
"side": "paternal",
|
| 54 |
+
}
|
| 55 |
],
|
| 56 |
"personal_medical_history": {
|
| 57 |
"previous_cancers": ["melanoma"],
|
|
|
|
| 58 |
},
|
| 59 |
}
|
| 60 |
expected = {
|
|
|
|
| 63 |
"response": None,
|
| 64 |
"overall_summary": "ok",
|
| 65 |
"overall_risk_score": None,
|
| 66 |
+
"calculated_risk_scores": {},
|
| 67 |
"identified_risk_factors": [],
|
| 68 |
"risk_assessments": [],
|
| 69 |
"dx_recommendations": [],
|
|
|
|
| 86 |
@patch("apps.api.main.SentinelFactory")
|
| 87 |
def test_assess_bad_provider(mock_factory):
|
| 88 |
payload = {
|
| 89 |
+
"demographics": {
|
| 90 |
+
"age_years": 30,
|
| 91 |
+
"sex": "male",
|
| 92 |
+
"anthropometrics": {"height_cm": 175, "weight_kg": 70},
|
| 93 |
+
},
|
| 94 |
+
"lifestyle": {
|
| 95 |
+
"smoking": {"status": "never"},
|
| 96 |
+
"alcohol_consumption": "none",
|
| 97 |
+
},
|
| 98 |
"family_history": [],
|
| 99 |
+
"personal_medical_history": {},
|
| 100 |
}
|
| 101 |
mock_factory.side_effect = ValueError("bad")
|
| 102 |
response = client.post("/assess/invalid", json={"user_input": payload})
|
|
|
|
| 106 |
@patch("apps.api.main.SentinelFactory")
|
| 107 |
def test_assess_with_observations(mock_factory):
|
| 108 |
payload = {
|
| 109 |
+
"demographics": {
|
| 110 |
+
"age_years": 60,
|
| 111 |
+
"sex": "male",
|
| 112 |
+
"anthropometrics": {"height_cm": 175, "weight_kg": 75},
|
| 113 |
+
},
|
| 114 |
+
"lifestyle": {
|
| 115 |
+
"smoking": {"status": "never"},
|
| 116 |
+
"alcohol_consumption": "none",
|
| 117 |
+
},
|
| 118 |
+
"personal_medical_history": {},
|
| 119 |
"family_history": [],
|
| 120 |
+
"clinical_tests": {
|
| 121 |
+
"psa": {
|
| 122 |
+
"value_ng_ml": 5.0,
|
|
|
|
|
|
|
|
|
|
| 123 |
}
|
| 124 |
+
},
|
| 125 |
}
|
| 126 |
expected = {
|
| 127 |
"thinking": None,
|
|
|
|
| 129 |
"response": None,
|
| 130 |
"overall_summary": "ok",
|
| 131 |
"overall_risk_score": None,
|
| 132 |
+
"calculated_risk_scores": {},
|
| 133 |
"identified_risk_factors": [],
|
| 134 |
"risk_assessments": [],
|
| 135 |
"dx_recommendations": [],
|
tests/test_risk_aggregation.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for risk aggregation utilities."""
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from sentinel.models import RiskScore
|
| 6 |
+
from sentinel.risk_aggregation import (
|
| 7 |
+
format_scores_for_llm,
|
| 8 |
+
format_scores_for_pdf,
|
| 9 |
+
group_scores_by_cancer_type,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def test_group_scores_by_cancer_type():
|
| 14 |
+
"""Test grouping risk scores by cancer type."""
|
| 15 |
+
scores = [
|
| 16 |
+
RiskScore(
|
| 17 |
+
name="Gail Model",
|
| 18 |
+
score="5%",
|
| 19 |
+
cancer_type="Breast Cancer",
|
| 20 |
+
description="5-year risk",
|
| 21 |
+
),
|
| 22 |
+
RiskScore(
|
| 23 |
+
name="Claus Model",
|
| 24 |
+
score="3%",
|
| 25 |
+
cancer_type="Breast Cancer",
|
| 26 |
+
description="Lifetime risk",
|
| 27 |
+
),
|
| 28 |
+
RiskScore(
|
| 29 |
+
name="PLCOm2012",
|
| 30 |
+
score="2%",
|
| 31 |
+
cancer_type="Lung Cancer",
|
| 32 |
+
description="6-year risk",
|
| 33 |
+
),
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
grouped = group_scores_by_cancer_type(scores)
|
| 37 |
+
|
| 38 |
+
assert len(grouped) == 2
|
| 39 |
+
assert "Breast Cancer" in grouped
|
| 40 |
+
assert "Lung Cancer" in grouped
|
| 41 |
+
assert len(grouped["Breast Cancer"]) == 2
|
| 42 |
+
assert len(grouped["Lung Cancer"]) == 1
|
| 43 |
+
assert grouped["Breast Cancer"][0].name == "Gail Model"
|
| 44 |
+
assert grouped["Breast Cancer"][1].name == "Claus Model"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def test_group_scores_empty():
|
| 48 |
+
"""Test grouping with empty list."""
|
| 49 |
+
grouped = group_scores_by_cancer_type([])
|
| 50 |
+
assert grouped == {}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_group_scores_no_cancer_type():
|
| 54 |
+
"""Test grouping with scores that have no cancer type."""
|
| 55 |
+
|
| 56 |
+
scores = [
|
| 57 |
+
RiskScore(name="Test Model", score="5%", cancer_type=None),
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
with pytest.raises(ValueError, match=r"Test Model.*missing cancer_type"):
|
| 61 |
+
group_scores_by_cancer_type(scores)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def test_format_scores_for_llm():
|
| 65 |
+
"""Test formatting scores for LLM context."""
|
| 66 |
+
scores = [
|
| 67 |
+
RiskScore(
|
| 68 |
+
name="Gail Model",
|
| 69 |
+
score="5%",
|
| 70 |
+
cancer_type="Breast Cancer",
|
| 71 |
+
description="5-year risk",
|
| 72 |
+
interpretation="Low to moderate risk",
|
| 73 |
+
references=["Gail et al., 1989"],
|
| 74 |
+
),
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
+
grouped = group_scores_by_cancer_type(scores)
|
| 78 |
+
formatted = format_scores_for_llm(grouped)
|
| 79 |
+
|
| 80 |
+
assert "# Calculated Risk Scores (Ground Truth)" in formatted
|
| 81 |
+
assert "Breast Cancer" in formatted
|
| 82 |
+
assert "Gail Model" in formatted
|
| 83 |
+
assert "5%" in formatted
|
| 84 |
+
assert "5-year risk" in formatted
|
| 85 |
+
assert "Low to moderate risk" in formatted
|
| 86 |
+
assert "Gail et al., 1989" in formatted
|
| 87 |
+
assert "DO NOT generate your own risk levels" in formatted
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def test_format_scores_for_llm_empty():
|
| 91 |
+
"""Test formatting empty scores for LLM."""
|
| 92 |
+
formatted = format_scores_for_llm({})
|
| 93 |
+
assert formatted == "No risk scores calculated."
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def test_format_scores_for_pdf():
|
| 97 |
+
"""Test formatting scores for PDF presentation."""
|
| 98 |
+
scores = [
|
| 99 |
+
RiskScore(
|
| 100 |
+
name="Gail Model",
|
| 101 |
+
score="5%",
|
| 102 |
+
cancer_type="Breast Cancer",
|
| 103 |
+
),
|
| 104 |
+
RiskScore(
|
| 105 |
+
name="PLCOm2012",
|
| 106 |
+
score="2%",
|
| 107 |
+
cancer_type="Lung Cancer",
|
| 108 |
+
),
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
grouped = group_scores_by_cancer_type(scores)
|
| 112 |
+
formatted = format_scores_for_pdf(grouped)
|
| 113 |
+
|
| 114 |
+
assert len(formatted) == 2
|
| 115 |
+
assert formatted[0][0] == "Breast Cancer"
|
| 116 |
+
assert formatted[1][0] == "Lung Cancer"
|
| 117 |
+
assert len(formatted[0][1]) == 1
|
| 118 |
+
assert len(formatted[1][1]) == 1
|