jeuko commited on
Commit
cc034ee
·
verified ·
1 Parent(s): 8018595

Sync from GitHub (main)

Browse files
apps/api/main.py CHANGED
@@ -6,7 +6,8 @@ from fastapi import FastAPI, HTTPException
6
 
7
  from sentinel.config import AppConfig, ModelConfig, ResourcePaths
8
  from sentinel.factory import SentinelFactory
9
- from sentinel.models import InitialAssessment, UserInput
 
10
 
11
  app = FastAPI(
12
  title="Cancer Risk Assessment Assistant",
 
6
 
7
  from sentinel.config import AppConfig, ModelConfig, ResourcePaths
8
  from sentinel.factory import SentinelFactory
9
+ from sentinel.models import InitialAssessment
10
+ from sentinel.user_input import UserInput
11
 
12
  app = FastAPI(
13
  title="Cancer Risk Assessment Assistant",
apps/cli/main.py CHANGED
@@ -12,16 +12,18 @@ from sentinel.config import AppConfig, ModelConfig, ResourcePaths
12
  from sentinel.factory import SentinelFactory
13
  from sentinel.models import (
14
  ConversationResponse,
 
 
 
 
 
15
  Demographics,
16
  FamilyMemberCancer,
17
  FemaleSpecific,
18
- InitialAssessment,
19
  Lifestyle,
20
  PersonalMedicalHistory,
21
  UserInput,
22
  )
23
- from sentinel.reporting import generate_excel_report, generate_pdf_report
24
- from sentinel.risk_models import RISK_MODELS
25
  from sentinel.utils import load_user_file
26
 
27
 
@@ -461,17 +463,35 @@ def main(cfg: DictConfig) -> None:
461
  print(f"\n{Colors.OKCYAN}🔄 Running risk scoring tools...{Colors.ENDC}")
462
  risks_scores = []
463
  for model in RISK_MODELS:
464
- risk_score = model().run(user)
465
- risks_scores.append(risk_score)
 
 
 
 
 
 
 
 
 
466
 
467
- user.risks_scores = risks_scores
468
  for risk_score in risks_scores:
469
- print(f"{Colors.OKCYAN}🔄 {risk_score.name}: {risk_score.score}{Colors.ENDC}")
 
 
 
 
 
 
 
 
 
 
470
 
471
  print(f"\n{Colors.OKGREEN}🔄 Analyzing your information...{Colors.ENDC}")
472
  response = None
473
  try:
474
- response = conversation.initial_assessment(user)
475
  format_risk_assessment(response, dev_mode)
476
  except Exception as e:
477
  print(f"{Colors.FAIL}❌ Error generating assessment: {e}{Colors.ENDC}")
 
12
  from sentinel.factory import SentinelFactory
13
  from sentinel.models import (
14
  ConversationResponse,
15
+ InitialAssessment,
16
+ )
17
+ from sentinel.reporting import generate_excel_report, generate_pdf_report
18
+ from sentinel.risk_models import RISK_MODELS
19
+ from sentinel.user_input import (
20
  Demographics,
21
  FamilyMemberCancer,
22
  FemaleSpecific,
 
23
  Lifestyle,
24
  PersonalMedicalHistory,
25
  UserInput,
26
  )
 
 
27
  from sentinel.utils import load_user_file
28
 
29
 
 
463
  print(f"\n{Colors.OKCYAN}🔄 Running risk scoring tools...{Colors.ENDC}")
464
  risks_scores = []
465
  for model in RISK_MODELS:
466
+ try:
467
+ risk_score = model().run(user)
468
+ # Handle models that return multiple scores (e.g., QCancer)
469
+ if isinstance(risk_score, list):
470
+ risks_scores.extend(risk_score)
471
+ else:
472
+ risks_scores.append(risk_score)
473
+ except ValueError as e:
474
+ # Skip models that aren't applicable or have validation errors
475
+ print(f"{Colors.WARNING}⚠️ Skipping {model().name}: {e!s}{Colors.ENDC}")
476
+ continue
477
 
 
478
  for risk_score in risks_scores:
479
+ # Format output based on whether cancer type is specified
480
+ if risk_score.cancer_type and risk_score.cancer_type not in [
481
+ "multiple",
482
+ "Multiple Cancer Sites",
483
+ ]:
484
+ display = (
485
+ f"{risk_score.name} ({risk_score.cancer_type}): {risk_score.score}"
486
+ )
487
+ else:
488
+ display = f"{risk_score.name}: {risk_score.score}"
489
+ print(f"{Colors.OKCYAN}🔄 {display}{Colors.ENDC}")
490
 
491
  print(f"\n{Colors.OKGREEN}🔄 Analyzing your information...{Colors.ENDC}")
492
  response = None
493
  try:
494
+ response = conversation.initial_assessment(user, risk_scores=risks_scores)
495
  format_risk_assessment(response, dev_mode)
496
  except Exception as e:
497
  print(f"{Colors.FAIL}❌ Error generating assessment: {e}{Colors.ENDC}")
apps/streamlit_ui/page_versions/profile/v2.py CHANGED
@@ -232,7 +232,11 @@ def render():
232
  risks_scores = []
233
  for model in RISK_MODELS:
234
  risk_score = model().run(updated_profile)
235
- risks_scores.append(risk_score)
 
 
 
 
236
 
237
  # Attach the scores to the object before saving
238
  updated_profile.risks_scores = risks_scores
 
232
  risks_scores = []
233
  for model in RISK_MODELS:
234
  risk_score = model().run(updated_profile)
235
+ # Handle models that return multiple scores (e.g., QCancer)
236
+ if isinstance(risk_score, list):
237
+ risks_scores.extend(risk_score)
238
+ else:
239
+ risks_scores.append(risk_score)
240
 
241
  # Attach the scores to the object before saving
242
  updated_profile.risks_scores = risks_scores
apps/streamlit_ui/pages/1_Profile.py CHANGED
@@ -257,7 +257,11 @@ with st.expander("Create New Profile Manually"):
257
  risks_scores = []
258
  for model in RISK_MODELS:
259
  risk_score = model().run(new_profile)
260
- risks_scores.append(risk_score)
 
 
 
 
261
 
262
  new_profile.risks_scores = risks_scores
263
 
 
257
  risks_scores = []
258
  for model in RISK_MODELS:
259
  risk_score = model().run(new_profile)
260
+ # Handle models that return multiple scores (e.g., QCancer)
261
+ if isinstance(risk_score, list):
262
+ risks_scores.extend(risk_score)
263
+ else:
264
+ risks_scores.append(risk_score)
265
 
266
  new_profile.risks_scores = risks_scores
267
 
apps/streamlit_ui/pages/3_Assessment.py CHANGED
@@ -198,7 +198,31 @@ if assessment:
198
  with st.expander("Overall Summary"):
199
  st.markdown(assessment.overall_summary, unsafe_allow_html=True)
200
 
201
- with st.expander("Risk Assessments"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  for ra in sorted_risk_assessments:
203
  st.markdown(f"**{ra.cancer_type}** - {ra.risk_level or 'N/A'}/5")
204
  st.write(ra.explanation)
 
198
  with st.expander("Overall Summary"):
199
  st.markdown(assessment.overall_summary, unsafe_allow_html=True)
200
 
201
+ with st.expander("Calculated Risk Scores (Ground Truth)"):
202
+ if assessment.calculated_risk_scores:
203
+ st.info(
204
+ "These scores have been calculated using validated clinical risk models "
205
+ "and represent the authoritative risk assessment."
206
+ )
207
+ for cancer_type, scores in sorted(
208
+ assessment.calculated_risk_scores.items()
209
+ ):
210
+ st.markdown(f"### {cancer_type}")
211
+ for score in scores:
212
+ st.markdown(f"**{score.name}**: {score.score}")
213
+ if score.description:
214
+ st.write(f"*{score.description}*")
215
+ if score.interpretation:
216
+ st.write(score.interpretation)
217
+ if score.references:
218
+ with st.expander("References"):
219
+ for ref in score.references:
220
+ st.write(f"- {ref}")
221
+ st.divider()
222
+ else:
223
+ st.write("No risk scores calculated.")
224
+
225
+ with st.expander("AI-Generated Risk Interpretations"):
226
  for ra in sorted_risk_assessments:
227
  st.markdown(f"**{ra.cancer_type}** - {ra.risk_level or 'N/A'}/5")
228
  st.write(ra.explanation)
configs/output_format/assessment.yaml CHANGED
@@ -1,7 +1,7 @@
1
  format_instructions: |
2
  CRITICAL:
3
  - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
4
- - Provide a risk assessment for each cancer type that has a computed risk score in the `RISK SCORES` section of `USER INFORMATION`.
5
  - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
6
  - The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}. You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
7
  - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
@@ -13,15 +13,15 @@ format_instructions: |
13
  "identified_risk_factors": [
14
  {{
15
  "description": "string - A human-readable description of the risk factor identified from the user's profile.",
16
- "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other). You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. ",
17
  }}
18
- ]
19
- "risk_assessments": [
20
  {{
21
- "cancer_type": "string - Type of cancer",
22
- "risk_level": "number - A score from 1 (lowest risk), 2 (low risk - proactive screening not needed but user should be aware of symptoms), 3 (moderate risk - some screening recommended), 4 (high risk - screening important), 5 (very high risk - screening critical, short-term action required)",
23
- "explanation": "string - Reasoning behind the assessment. Always relate the explanation to information provided in the `User Information` and `Clinical Observations` as much as possible.",
24
- "recommended_steps": ["string"] or null - Optional steps to mitigate risk. This field is only required if the risk level is 3 or higher, otherwise leave this field blank.
25
  "contributing_factors": [
26
  {{
27
  "description": "string - A human-readable description of the risk factor",
@@ -47,6 +47,8 @@ format_instructions: |
47
 
48
  IMPORTANT:
49
  - The `reasoning` field is mandatory for your internal monologue. You must put any and all reasoning you were asked to do in here. This is your internal monologue, and should be as detailed as possible.
 
 
50
  - Do not add disclaimers; they are handled separately.
51
  - Use null for optional fields that don't apply.
52
  - Return ONLY the JSON object, nothing else.
 
1
  format_instructions: |
2
  CRITICAL:
3
  - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
4
+ - The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
5
  - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
6
  - The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}. You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
7
  - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
 
13
  "identified_risk_factors": [
14
  {{
15
  "description": "string - A human-readable description of the risk factor identified from the user's profile.",
16
+ "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other). You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. "
17
  }}
18
+ ],
19
+ "llm_risk_interpretations": [
20
  {{
21
+ "cancer_type": "string - Type of cancer from RISK SCORES section",
22
+ "risk_level": "number or null - Optional qualitative score (1-5) that should align with the calculated risk scores. Use null if you cannot confidently map the score to a 1-5 scale.",
23
+ "explanation": "string - Explain what the calculated risk score means for the patient. Identify key contributing factors from their profile. Always reference the actual score from RISK SCORES section.",
24
+ "recommended_steps": ["string"] or null - Optional steps to mitigate risk, particularly for higher calculated scores.
25
  "contributing_factors": [
26
  {{
27
  "description": "string - A human-readable description of the risk factor",
 
47
 
48
  IMPORTANT:
49
  - The `reasoning` field is mandatory for your internal monologue. You must put any and all reasoning you were asked to do in here. This is your internal monologue, and should be as detailed as possible.
50
+ - Do NOT include a `calculated_risk_scores` field in your response - this is populated programmatically from the RISK SCORES section.
51
+ - Focus your `llm_risk_interpretations` on explaining the CALCULATED scores, not generating new risk assessments.
52
  - Do not add disclaimers; they are handled separately.
53
  - Use null for optional fields that don't apply.
54
  - Return ONLY the JSON object, nothing else.
examples/benchmark/benchmark_female.yaml CHANGED
@@ -1,23 +1,39 @@
1
  demographics:
2
- age: 52
3
  sex: female
4
- ethnicity: "Asian"
 
 
 
 
5
 
6
  lifestyle:
7
  smoking:
8
  status: never
9
- alcohol:
10
- consumption_level: light
 
 
 
 
 
11
 
12
  family_history:
13
- - relative: mother
14
- cancer_type: breast
15
  age_at_diagnosis: 48
16
- - relative: aunt
17
- cancer_type: ovarian
 
 
18
  age_at_diagnosis: 55
 
 
19
 
20
- personal_medical_history: {}
 
 
 
21
 
22
  female_specific:
23
  menstrual:
@@ -25,21 +41,17 @@ female_specific:
25
  parity:
26
  num_live_births: 2
27
  age_at_first_live_birth: 28
 
 
28
 
29
- current_concerns_or_symptoms: "Small lump in left breast. Fatigue and irregular periods."
 
 
30
 
31
- lab_results:
32
- - test_name: "CA 15-3"
33
- value: "32"
34
- unit: "U/mL"
35
- date: "2025-09-20"
36
- - test_name: "Hemoglobin"
37
- value: "12.8"
38
- unit: "g/dL"
39
- date: "2025-09-20"
40
-
41
- clinical_observations:
42
- - test_name: "Mammogram"
43
- value: "BI-RADS 4"
44
- unit: "category"
45
- date: "2025-09-20"
 
1
  demographics:
2
+ age_years: 52
3
  sex: female
4
+ ethnicity: asian
5
+ education_level: 4
6
+ anthropometrics:
7
+ height_cm: 165
8
+ weight_kg: 65
9
 
10
  lifestyle:
11
  smoking:
12
  status: never
13
+ cigarettes_per_day: 0
14
+ years_smoked: 0
15
+ pack_years: 0
16
+ alcohol_consumption: light
17
+ multivitamin_use: true
18
+ moderate_physical_activity_hours_per_day: 0.5
19
+ red_meat_consumption_oz_per_day: 2.0
20
 
21
  family_history:
22
+ - relation: mother
23
+ cancer_type: breast_cancer
24
  age_at_diagnosis: 48
25
+ degree: "1"
26
+ side: unknown
27
+ - relation: maternal_aunt
28
+ cancer_type: ovarian_cancer
29
  age_at_diagnosis: 55
30
+ degree: "2"
31
+ side: maternal
32
 
33
+ personal_medical_history:
34
+ chronic_conditions: []
35
+ previous_cancers: []
36
+ nsaid_use: never
37
 
38
  female_specific:
39
  menstrual:
 
41
  parity:
42
  num_live_births: 2
43
  age_at_first_live_birth: 28
44
+ hormone_use:
45
+ estrogen_use: never
46
 
47
+ symptoms:
48
+ - symptom_type: breast_lump
49
+ - symptom_type: weight_loss
50
 
51
+ dermatologic:
52
+ region: central
53
+ complexion: medium
54
+ freckling: mild
55
+ female_tan: moderate
56
+ female_small_moles: five_to_eleven
57
+ solar_damage: false
 
 
 
 
 
 
 
 
examples/benchmark/benchmark_male.yaml CHANGED
@@ -1,42 +1,58 @@
1
  demographics:
2
- age: 58
3
  sex: male
4
- ethnicity: "Caucasian"
 
 
 
 
5
 
6
  lifestyle:
7
  smoking:
8
  status: former
9
  pack_years: 20
10
- alcohol:
11
- consumption_level: moderate
 
 
 
 
 
12
 
13
  family_history:
14
- - relative: father
15
- cancer_type: lung
16
  age_at_diagnosis: 67
17
- - relative: brother
18
- cancer_type: prostate
 
 
19
  age_at_diagnosis: 62
 
 
20
 
21
  personal_medical_history:
22
  chronic_conditions:
23
- - "Type 2 diabetes"
24
- - "Hypertension"
 
25
 
26
- current_concerns_or_symptoms: "Difficulty with urination and persistent cough."
 
 
 
 
 
 
27
 
28
- lab_results:
29
- - test_name: "PSA"
30
- value: "5.8"
31
- unit: "ng/mL"
32
- date: "2025-09-15"
33
- - test_name: "Hemoglobin A1c"
34
- value: "7.2"
35
- unit: "%"
36
- date: "2025-09-15"
37
 
38
- clinical_observations:
39
- - test_name: "Blood Pressure"
40
- value: "142/88"
41
- unit: "mmHg"
42
- date: "2025-09-15"
 
 
 
 
1
  demographics:
2
+ age_years: 58
3
  sex: male
4
+ ethnicity: white
5
+ education_level: 3
6
+ anthropometrics:
7
+ height_cm: 178
8
+ weight_kg: 92
9
 
10
  lifestyle:
11
  smoking:
12
  status: former
13
  pack_years: 20
14
+ cigarettes_per_day: 20
15
+ years_smoked: 20
16
+ years_since_quit: 5
17
+ alcohol_consumption: moderate
18
+ multivitamin_use: false
19
+ moderate_physical_activity_hours_per_day: 0.25
20
+ red_meat_consumption_oz_per_day: 4.0
21
 
22
  family_history:
23
+ - relation: father
24
+ cancer_type: lung_cancer
25
  age_at_diagnosis: 67
26
+ degree: "1"
27
+ side: unknown
28
+ - relation: brother
29
+ cancer_type: prostate_cancer
30
  age_at_diagnosis: 62
31
+ degree: "1"
32
+ side: unknown
33
 
34
  personal_medical_history:
35
  chronic_conditions:
36
+ - diabetes
37
+ previous_cancers: []
38
+ aspirin_use: never
39
 
40
+ clinical_tests:
41
+ psa:
42
+ value_ng_ml: 5.8
43
+ date: 2025-09-15
44
+ dre:
45
+ result: normal
46
+ date: 2025-09-15
47
 
48
+ symptoms:
49
+ - symptom_type: persistent_cough
 
 
 
 
 
 
 
50
 
51
+ dermatologic:
52
+ region: northern
53
+ complexion: light
54
+ freckling: moderate
55
+ male_sunburn: true
56
+ male_has_two_or_more_big_moles: true
57
+ male_small_moles: seven_to_sixteen
58
+ solar_damage: true
prompts/instruction/assessment.md CHANGED
@@ -2,18 +2,22 @@ You will provide a structured JSON output as specified in the `FORMAT INSTRUCTIO
2
 
3
  ## Your Task
4
 
5
- Review the pre-computed risk scores in `USER INFORMATION` and synthesize them into a clear, structured assessment:
6
 
7
- 1. **Analyze the risk scores**: Review each risk score provided in `RISK SCORES`. These scores have been calculated by validated risk models and represent the primary basis for the assessment.
8
 
9
- 2. **Review clinical observations**: If `CLINICAL OBSERVATIONS` is not empty, carefully consider each item by comparing the `value` to the `reference_range` to identify abnormalities.
10
 
11
- 3. **Apply diagnostic protocols**: For each relevant protocol in `DIAGNOSTIC PROTOCOLS`, determine the user's eligibility and recommended frequency based on their risk profile and demographic information.
12
 
13
- 4. **Generate clear explanations**: Transform the technical risk data into user-friendly explanations that are empathetic, actionable, and evidence-based.
14
 
15
- 5. **Critical review**: Before generating final output, verify that your recommendations are consistent with the risk scores and guidelines. Look for contradictions or omissions.
16
 
17
- 6. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
18
 
19
- Your role is to explain and contextualize the pre-computed risk assessments, NOT to recalculate or second-guess them.
 
 
 
 
 
2
 
3
  ## Your Task
4
 
5
+ The `RISK SCORES (GROUND TRUTH)` section contains deterministic risk scores calculated by validated risk models. **These scores are the authoritative source of truth and must not be contradicted or overridden.**
6
 
7
+ Your role is to:
8
 
9
+ 1. **Explain the risk scores**: For each cancer type with calculated risk scores, provide clear, empathetic explanations of what these scores mean for the patient. Explain the scores in plain language without generating your own risk levels.
10
 
11
+ 2. **Identify contributing factors**: Analyze the patient's profile in `USER INFORMATION` to highlight the key risk factors that contributed to elevated risk scores. Explain WHY specific scores are higher based on the patient's demographics, lifestyle, medical history, and family history.
12
 
13
+ 3. **Review clinical observations**: If clinical observations are present, identify any abnormalities by comparing values to reference ranges and explain how these relate to the calculated risk scores.
14
 
15
+ 4. **Apply diagnostic protocols**: For each relevant protocol in `DIAGNOSTIC PROTOCOLS`, determine the patient's eligibility and recommended testing frequency based on their risk profile and demographic information.
16
 
17
+ 5. **Provide actionable insights**: Offer evidence-based recommendations and lifestyle advice that patients can use to understand and potentially modify their risk factors.
18
 
19
+ 6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
20
+
21
+ 7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
22
+
23
+ **Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.
scripts/generate_documentation.py CHANGED
@@ -15,7 +15,6 @@ from annotated_types import Ge, Gt, Le, Lt
15
  from fpdf import FPDF
16
  from pydantic import BaseModel
17
 
18
- from sentinel.models import UserInput
19
  from sentinel.risk_models.base import RiskModel
20
  from sentinel.risk_models.qcancer import (
21
  FEMALE_CANCER_TYPES as QC_FEMALE_CANCERS,
@@ -23,6 +22,7 @@ from sentinel.risk_models.qcancer import (
23
  from sentinel.risk_models.qcancer import (
24
  MALE_CANCER_TYPES as QC_MALE_CANCERS,
25
  )
 
26
 
27
  # Constants
28
  HERE = Path(__file__).resolve().parent
 
15
  from fpdf import FPDF
16
  from pydantic import BaseModel
17
 
 
18
  from sentinel.risk_models.base import RiskModel
19
  from sentinel.risk_models.qcancer import (
20
  FEMALE_CANCER_TYPES as QC_FEMALE_CANCERS,
 
22
  from sentinel.risk_models.qcancer import (
23
  MALE_CANCER_TYPES as QC_MALE_CANCERS,
24
  )
25
+ from sentinel.user_input import UserInput
26
 
27
  # Constants
28
  HERE = Path(__file__).resolve().parent
src/sentinel/api_clients/canrisk.py CHANGED
@@ -1513,7 +1513,7 @@ class CanRiskClient:
1513
  stripped = relative.strip()
1514
  if not stripped:
1515
  return "Unknown"
1516
- compact = stripped.title().replace(" ", "")
1517
  return compact[:20]
1518
 
1519
  @staticmethod
 
1513
  stripped = relative.strip()
1514
  if not stripped:
1515
  return "Unknown"
1516
+ compact = stripped.title().replace(" ", "").replace("_", "")
1517
  return compact[:20]
1518
 
1519
  @staticmethod
src/sentinel/conversation.py CHANGED
@@ -7,7 +7,9 @@ from langchain_core.messages import get_buffer_string
7
  from langchain_core.runnables.base import Runnable
8
 
9
  from .llm_service import extract_thinking
10
- from .models import ConversationResponse, InitialAssessment, UserInput
 
 
11
 
12
 
13
  @dataclass
@@ -37,25 +39,45 @@ class ConversationManager:
37
  pairs.append((human, ai))
38
  return pairs
39
 
40
- def initial_assessment(self, user: UserInput) -> InitialAssessment:
 
 
41
  """Run the structured assessment chain and record the exchange.
42
 
43
  Args:
44
  user: The user profile to assess.
 
45
 
46
  Returns:
47
  The structured InitialAssessment result.
48
  """
 
49
  self.user_json = user.model_dump_json()
50
- prompt = self.structured_chain.prompt.format(user_data=self.user_json)
51
- result = self.structured_chain.invoke({"user_data": self.user_json})
 
 
 
 
 
 
 
 
 
 
 
52
  if isinstance(result, InitialAssessment):
53
  data = result
54
  else:
55
  data = InitialAssessment.model_validate(result)
56
 
 
 
 
57
  # Add to history as a new interaction
58
- self.chat_history.add_user_message(prompt)
 
 
59
  self.chat_history.add_ai_message(data.model_dump_json())
60
  return data
61
 
 
7
  from langchain_core.runnables.base import Runnable
8
 
9
  from .llm_service import extract_thinking
10
+ from .models import ConversationResponse, InitialAssessment
11
+ from .risk_aggregation import format_scores_for_llm, group_scores_by_cancer_type
12
+ from .user_input import UserInput
13
 
14
 
15
  @dataclass
 
39
  pairs.append((human, ai))
40
  return pairs
41
 
42
+ def initial_assessment(
43
+ self, user: UserInput, risk_scores: list | None = None
44
+ ) -> InitialAssessment:
45
  """Run the structured assessment chain and record the exchange.
46
 
47
  Args:
48
  user: The user profile to assess.
49
+ risk_scores: Optional list of RiskScore objects. If not provided, will try to get from user.risk_scores.
50
 
51
  Returns:
52
  The structured InitialAssessment result.
53
  """
54
+
55
  self.user_json = user.model_dump_json()
56
+
57
+ # Extract and group risk scores
58
+ if risk_scores is None:
59
+ # Try to get from user if it has risk_scores attribute
60
+ risk_scores = getattr(user, "risk_scores", [])
61
+ grouped_scores = group_scores_by_cancer_type(risk_scores)
62
+ formatted_scores = format_scores_for_llm(grouped_scores)
63
+
64
+ # Invoke LLM with scores as separate context
65
+ result = self.structured_chain.invoke(
66
+ {"user_data": self.user_json, "risk_scores": formatted_scores}
67
+ )
68
+
69
  if isinstance(result, InitialAssessment):
70
  data = result
71
  else:
72
  data = InitialAssessment.model_validate(result)
73
 
74
+ # Attach the ground truth calculated scores
75
+ data.calculated_risk_scores = grouped_scores
76
+
77
  # Add to history as a new interaction
78
+ self.chat_history.add_user_message(
79
+ f"Initial assessment for user profile: {self.user_json}"
80
+ )
81
  self.chat_history.add_ai_message(data.model_dump_json())
82
  return data
83
 
src/sentinel/models.py CHANGED
@@ -1,7 +1,7 @@
1
  """Pydantic models and enums used across the Sentinel application."""
2
 
3
  import re
4
- from collections.abc import Iterable, Sequence
5
  from enum import Enum, IntEnum
6
  from typing import Any, Literal
7
 
@@ -1593,213 +1593,6 @@ class RiskScore(SentinelBaseModel):
1593
  )
1594
 
1595
 
1596
- # ---------------------------------------------------------------------------
1597
- # Canonical user input
1598
- # ---------------------------------------------------------------------------
1599
-
1600
-
1601
- class UserInput(SentinelBaseModel):
1602
- """Top-level container for all input required by assessments."""
1603
-
1604
- schema_version: str = Field(default="2025.10")
1605
- demographics: Demographics
1606
- lifestyle: Lifestyle
1607
- family_history: list[FamilyMemberCancer] = Field(default_factory=list)
1608
- personal_medical_history: PersonalMedicalHistory
1609
- female_specific: FemaleSpecific | None = None
1610
- current_concerns_or_symptoms: str | None = None
1611
- symptoms: list[SymptomEntry] = Field(default_factory=list)
1612
- clinical_observations: list[ClinicalObservation] = Field(default_factory=list)
1613
- lab_results: list[LabResult] = Field(default_factory=list)
1614
- screening_history: list[ScreeningEvent] = Field(default_factory=list)
1615
- medications: list[MedicationRecord] = Field(default_factory=list)
1616
- risk_scores: list[RiskScore] = Field(default_factory=list, alias="risks_scores")
1617
- notes: str | None = None
1618
- dermatologic: DermatologicProfile | None = None
1619
-
1620
- @model_validator(mode="before")
1621
- def _legacy(cls, values: Any) -> Any:
1622
- if not isinstance(values, dict):
1623
- return values
1624
- data = dict(values)
1625
- for field_name in (
1626
- "clinical_observations",
1627
- "lab_results",
1628
- "screening_history",
1629
- "medications",
1630
- "family_history",
1631
- "symptoms",
1632
- "risk_scores",
1633
- "risks_scores",
1634
- ):
1635
- if field_name in data and data[field_name] is None:
1636
- data[field_name] = []
1637
- if "risks_scores" in data and "risk_scores" not in data:
1638
- data["risk_scores"] = data.pop("risks_scores")
1639
- return data
1640
-
1641
- @property
1642
- def risks_scores(self) -> list[RiskScore]:
1643
- """Get risk scores list.
1644
-
1645
- Returns:
1646
- List of risk scores.
1647
- """
1648
- return self.risk_scores
1649
-
1650
- @risks_scores.setter
1651
- def risks_scores(self, value: Iterable[RiskScore]) -> None:
1652
- """Set risk scores list.
1653
-
1654
- Args:
1655
- value: Risk scores to set.
1656
- """
1657
- self.risk_scores = list(value)
1658
-
1659
- @property
1660
- def reproductive_history(self) -> FemaleSpecific | None:
1661
- """Get reproductive history.
1662
-
1663
- Returns:
1664
- Reproductive history or None.
1665
- """
1666
- return self.female_specific
1667
-
1668
- @reproductive_history.setter
1669
- def reproductive_history(self, value: FemaleSpecific | None) -> None:
1670
- """Set reproductive history.
1671
-
1672
- Args:
1673
- value: Reproductive history to set.
1674
- """
1675
- self.female_specific = value
1676
-
1677
- @property
1678
- def bmi(self) -> float | None:
1679
- """Get BMI value.
1680
-
1681
- Returns:
1682
- BMI value or None.
1683
- """
1684
- return self.demographics.bmi
1685
-
1686
- @property
1687
- def smoking_history(self) -> SmokingHistory:
1688
- """Get smoking history.
1689
-
1690
- Returns:
1691
- Smoking history.
1692
- """
1693
- return self.lifestyle.smoking
1694
-
1695
- @property
1696
- def is_current_or_former_smoker(self) -> bool:
1697
- """Check if user is current or former smoker.
1698
-
1699
- Returns:
1700
- True if current or former smoker, False otherwise.
1701
- """
1702
- return self.lifestyle.smoking.status in {
1703
- SmokingStatus.CURRENT,
1704
- SmokingStatus.FORMER,
1705
- }
1706
-
1707
- def _build_observation_index(self) -> dict[str, ClinicalObservation]:
1708
- """Build index of clinical observations by normalized name.
1709
-
1710
- Returns:
1711
- Dictionary mapping normalized names to observations.
1712
- """
1713
- index: dict[str, ClinicalObservation] = {}
1714
- for obs in (*self.clinical_observations, *self.lab_results):
1715
- key = obs.normalized_name
1716
- if key and key not in index:
1717
- index[key] = obs
1718
- return index
1719
-
1720
- def get_observation(self, names: Sequence[str]) -> ClinicalObservation | None:
1721
- """Get clinical observation by name.
1722
-
1723
- Args:
1724
- names: Sequence of observation names to search for.
1725
-
1726
- Returns:
1727
- Clinical observation or None.
1728
- """
1729
- index = self._build_observation_index()
1730
- for name in names:
1731
- key = _normalize_key(name)
1732
- if key in index:
1733
- return index[key]
1734
- return None
1735
-
1736
- def get_observation_value(self, names: Sequence[str]) -> str | None:
1737
- """Get clinical observation value by name.
1738
-
1739
- Args:
1740
- names: Sequence of observation names to search for.
1741
-
1742
- Returns:
1743
- Observation value or None.
1744
- """
1745
- observation = self.get_observation(names)
1746
- return observation.value if observation else None
1747
-
1748
- def get_numeric_observation(self, names: Sequence[str]) -> float | None:
1749
- """Get clinical observation numeric value by name.
1750
-
1751
- Args:
1752
- names: Sequence of observation names to search for.
1753
-
1754
- Returns:
1755
- Numeric observation value or None.
1756
- """
1757
- observation = self.get_observation(names)
1758
- return observation.numeric_value if observation else None
1759
-
1760
- def has_family_history(
1761
- self, relations: Iterable[FamilyRelation], cancer_keywords: Iterable[str]
1762
- ) -> bool:
1763
- """Check if user has family history of specific cancer types.
1764
-
1765
- Args:
1766
- relations: Family relations to check.
1767
- cancer_keywords: Cancer type keywords to search for.
1768
-
1769
- Returns:
1770
- True if family history found, False otherwise.
1771
- """
1772
- relation_set = {FamilyRelation.normalize(rel) for rel in relations}
1773
- keywords = {kw.lower() for kw in cancer_keywords}
1774
- for record in self.family_history:
1775
- if relation_set and record.relation not in relation_set:
1776
- continue
1777
- if any(
1778
- keyword in (record.cancer_type or "").lower() for keyword in keywords
1779
- ):
1780
- return True
1781
- return False
1782
-
1783
- def first_degree_cancer_count(self, cancer_keywords: Iterable[str]) -> int:
1784
- """Count first-degree relatives with specific cancer types.
1785
-
1786
- Args:
1787
- cancer_keywords: Cancer type keywords to search for.
1788
-
1789
- Returns:
1790
- Count of first-degree relatives with matching cancer types.
1791
- """
1792
- keywords = {kw.lower() for kw in cancer_keywords}
1793
- return sum(
1794
- 1
1795
- for record in self.family_history
1796
- if record.is_first_degree
1797
- and any(
1798
- keyword in (record.cancer_type or "").lower() for keyword in keywords
1799
- )
1800
- )
1801
-
1802
-
1803
  # ---------------------------------------------------------------------------
1804
  # Assessment artefacts
1805
  # ---------------------------------------------------------------------------
@@ -1872,24 +1665,38 @@ class InitialAssessment(SentinelBaseModel):
1872
  overall_summary: str | None = Field(
1873
  default=None, description="A high-level summary of the user's cancer risk."
1874
  )
1875
- overall_risk_score: int | None = Field(
1876
  default=None,
1877
  description="A holistic score from 0 to 100 representing the user's overall cancer risk.",
1878
  ge=0,
1879
  le=100,
1880
  )
 
 
 
 
1881
  identified_risk_factors: list[RiskFactor] = Field(
1882
  default_factory=list,
1883
  description="A comprehensive list of all distinct risk factors identified from the user's profile.",
1884
  )
1885
- risk_assessments: list[CancerRiskAssessment] = Field(
1886
  default_factory=list,
1887
- description="Detailed risk assessments for specific cancers",
 
1888
  )
1889
  dx_recommendations: list[DxRecommendation] = Field(
1890
  default_factory=list, description="Recommended diagnostic tests and protocols"
1891
  )
1892
 
 
 
 
 
 
 
 
 
 
1893
 
1894
  class ConversationResponse(SentinelBaseModel):
1895
  """Structured response for conversational follow-ups."""
 
1
  """Pydantic models and enums used across the Sentinel application."""
2
 
3
  import re
4
+ from collections.abc import Iterable
5
  from enum import Enum, IntEnum
6
  from typing import Any, Literal
7
 
 
1593
  )
1594
 
1595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1596
  # ---------------------------------------------------------------------------
1597
  # Assessment artefacts
1598
  # ---------------------------------------------------------------------------
 
1665
  overall_summary: str | None = Field(
1666
  default=None, description="A high-level summary of the user's cancer risk."
1667
  )
1668
+ overall_risk_score: float | None = Field(
1669
  default=None,
1670
  description="A holistic score from 0 to 100 representing the user's overall cancer risk.",
1671
  ge=0,
1672
  le=100,
1673
  )
1674
+ calculated_risk_scores: dict[str, list[RiskScore]] = Field(
1675
+ default_factory=dict,
1676
+ description="Deterministic risk scores grouped by cancer type (ground truth)",
1677
+ )
1678
  identified_risk_factors: list[RiskFactor] = Field(
1679
  default_factory=list,
1680
  description="A comprehensive list of all distinct risk factors identified from the user's profile.",
1681
  )
1682
+ llm_risk_interpretations: list[CancerRiskAssessment] = Field(
1683
  default_factory=list,
1684
+ description="LLM explanations and interpretations of calculated risk scores",
1685
+ alias="risk_assessments",
1686
  )
1687
  dx_recommendations: list[DxRecommendation] = Field(
1688
  default_factory=list, description="Recommended diagnostic tests and protocols"
1689
  )
1690
 
1691
+ @property
1692
+ def risk_assessments(self) -> list[CancerRiskAssessment]:
1693
+ """Get LLM risk interpretations (legacy compatibility).
1694
+
1695
+ Returns:
1696
+ List of cancer risk assessments.
1697
+ """
1698
+ return self.llm_risk_interpretations
1699
+
1700
 
1701
  class ConversationResponse(SentinelBaseModel):
1702
  """Structured response for conversational follow-ups."""
src/sentinel/prompting.py CHANGED
@@ -54,6 +54,7 @@ class PromptBuilder:
54
  "# PERSONA\n\n{persona}\n\n"
55
  "# CANCER MODULES\n\n{cancer_modules}\n\n"
56
  "# DIAGNOSTIC PROTOCOLS\n\n{protocols}\n\n"
 
57
  "# USER INFORMATION\n\n{user_data}\n\n"
58
  "# INSTRUCTIONS\n\n{instruction}\n\n"
59
  "# OUTPUT FORMAT INSTRUCTIONS (FOR INITIAL RESPONSE ONLY)\n\n{format_instructions}"
 
54
  "# PERSONA\n\n{persona}\n\n"
55
  "# CANCER MODULES\n\n{cancer_modules}\n\n"
56
  "# DIAGNOSTIC PROTOCOLS\n\n{protocols}\n\n"
57
+ "# RISK SCORES (GROUND TRUTH)\n\n{risk_scores}\n\n"
58
  "# USER INFORMATION\n\n{user_data}\n\n"
59
  "# INSTRUCTIONS\n\n{instruction}\n\n"
60
  "# OUTPUT FORMAT INSTRUCTIONS (FOR INITIAL RESPONSE ONLY)\n\n{format_instructions}"
src/sentinel/reporting.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  import json
4
  import math
 
5
  from datetime import datetime
6
 
7
  import markdown2
@@ -35,8 +36,8 @@ from .models import (
35
  ContributionStrength,
36
  InitialAssessment,
37
  RiskFactorCategory,
38
- UserInput,
39
  )
 
40
 
41
  # --- PDF Report Formatting Globals ---
42
  # Fonts
@@ -178,6 +179,7 @@ def generate_excel_report(
178
  wb = Workbook()
179
 
180
  _create_summary_sheet(wb, assessment, user_input)
 
181
  _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
182
  _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
183
 
@@ -223,7 +225,7 @@ def _create_summary_sheet(
223
  ws.cell(row=current_row, column=1, value="Demographics").font = bold_font
224
  current_row += 1
225
  demo_info = {
226
- "Age": user_input.demographics.age,
227
  "Sex": user_input.demographics.sex,
228
  "Ethnicity": user_input.demographics.ethnicity,
229
  }
@@ -238,10 +240,9 @@ def _create_summary_sheet(
238
  ws.cell(row=current_row, column=1, value="Lifestyle").font = bold_font
239
  current_row += 1
240
  lifestyle_info = {
241
- "Smoking Status": user_input.lifestyle.smoking_status,
242
- "Pack Years": user_input.lifestyle.smoking_pack_years,
243
  "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
244
- "Dietary Habits": user_input.lifestyle.dietary_habits,
245
  "Physical Activity": user_input.lifestyle.physical_activity_level,
246
  }
247
  for key, val in lifestyle_info.items():
@@ -253,16 +254,16 @@ def _create_summary_sheet(
253
 
254
  # Personal Medical History
255
  if user_input.personal_medical_history and (
256
- user_input.personal_medical_history.known_genetic_mutations
257
  or user_input.personal_medical_history.previous_cancers
258
- or user_input.personal_medical_history.chronic_illnesses
259
  ):
260
  ws.cell(
261
  row=current_row, column=1, value="Personal Medical History"
262
  ).font = bold_font
263
  current_row += 1
264
  pmh_texts = []
265
- if user_input.personal_medical_history.known_genetic_mutations:
266
  ws.cell(
267
  row=current_row, column=1, value="Known Genetic Mutations"
268
  ).font = bold_font
@@ -270,7 +271,8 @@ def _create_summary_sheet(
270
  row=current_row,
271
  column=2,
272
  value=", ".join(
273
- user_input.personal_medical_history.known_genetic_mutations
 
274
  ),
275
  ).alignment = wrap_alignment
276
  current_row += 1
@@ -281,17 +283,22 @@ def _create_summary_sheet(
281
  ws.cell(
282
  row=current_row,
283
  column=2,
284
- value=", ".join(user_input.personal_medical_history.previous_cancers),
 
 
285
  ).alignment = wrap_alignment
286
  current_row += 1
287
- if user_input.personal_medical_history.chronic_illnesses:
288
  ws.cell(
289
- row=current_row, column=1, value="Chronic Illnesses"
290
  ).font = bold_font
291
  ws.cell(
292
  row=current_row,
293
  column=2,
294
- value=", ".join(user_input.personal_medical_history.chronic_illnesses),
 
 
 
295
  ).alignment = wrap_alignment
296
  current_row += 1
297
  current_row += 1
@@ -301,7 +308,7 @@ def _create_summary_sheet(
301
  ws.cell(row=current_row, column=1, value="Family History").font = bold_font
302
  current_row += 1
303
  family_texts = [
304
- f"{mem.relative} ({mem.cancer_type} at age {mem.age_at_diagnosis or 'N/A'})"
305
  for mem in user_input.family_history
306
  ]
307
  ws.cell(
@@ -313,12 +320,16 @@ def _create_summary_sheet(
313
  if user_input.female_specific:
314
  ws.cell(row=current_row, column=1, value="Female-Specific").font = bold_font
315
  current_row += 1
 
316
  female_specific_info = {
317
- "Age at first period": user_input.female_specific.age_at_first_period,
318
- "Age at menopause": user_input.female_specific.age_at_menopause,
319
- "Number of live births": user_input.female_specific.num_live_births,
320
- "Age at first live birth": user_input.female_specific.age_at_first_live_birth,
321
- "Hormone therapy use": user_input.female_specific.hormone_therapy_use,
 
 
 
322
  }
323
  for key, val in female_specific_info.items():
324
  ws.cell(row=current_row, column=1, value=key).font = bold_font
@@ -328,35 +339,19 @@ def _create_summary_sheet(
328
  current_row += 1
329
  current_row += 1
330
 
331
- # Current Concerns
332
- if user_input.current_concerns_or_symptoms:
333
- ws.cell(row=current_row, column=1, value="Current Concerns").font = bold_font
334
  current_row += 1
 
335
  ws.cell(
336
- row=current_row, column=2, value=user_input.current_concerns_or_symptoms
337
  ).alignment = wrap_alignment
338
  current_row += 2
339
 
340
- # Clinical Observations
341
- if user_input.clinical_observations:
342
- ws.merge_cells(f"A{current_row}:F{current_row}")
343
- ws.cell(
344
- row=current_row, column=1, value="Clinical Observations"
345
- ).font = bold_font
346
- current_row += 1
347
- headers = ["Test Name", "Value", "Unit", "Reference Range", "Date"]
348
- for col_idx, header in enumerate(headers, 1):
349
- cell = ws.cell(row=current_row, column=col_idx, value=header)
350
- cell.font = header_font
351
- cell.fill = header_fill
352
- for obs in user_input.clinical_observations:
353
- current_row += 1
354
- ws.cell(row=current_row, column=1, value=obs.test_name)
355
- ws.cell(row=current_row, column=2, value=obs.value)
356
- ws.cell(row=current_row, column=3, value=obs.unit)
357
- ws.cell(row=current_row, column=4, value=obs.reference_range)
358
- ws.cell(row=current_row, column=5, value=obs.date)
359
- current_row += 1
360
 
361
  ws.merge_cells(
362
  start_row=current_row, start_column=1, end_row=current_row, end_column=6
@@ -457,6 +452,73 @@ def _create_summary_sheet(
457
  ws.column_dimensions["F"].width = 30
458
 
459
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
461
  ws = wb.create_sheet(title)
462
  pretty_json = json.dumps(data, indent=2)
@@ -651,7 +713,7 @@ def generate_pdf_report(
651
  add_section(
652
  "Demographics",
653
  {
654
- "Age": user_input.demographics.age,
655
  "Sex": user_input.demographics.sex,
656
  "Ethnicity": user_input.demographics.ethnicity or "N/A",
657
  },
@@ -661,10 +723,9 @@ def generate_pdf_report(
661
  add_section(
662
  "Lifestyle",
663
  {
664
- "Smoking Status": user_input.lifestyle.smoking_status,
665
- "Pack Years": user_input.lifestyle.smoking_pack_years or "N/A",
666
  "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
667
- "Dietary Habits": user_input.lifestyle.dietary_habits or "N/A",
668
  "Physical Activity": user_input.lifestyle.physical_activity_level or "N/A",
669
  },
670
  )
@@ -672,21 +733,27 @@ def generate_pdf_report(
672
  # --- Personal Medical History ---
673
  pmh = user_input.personal_medical_history
674
  if pmh and (
675
- pmh.known_genetic_mutations or pmh.previous_cancers or pmh.chronic_illnesses
676
  ):
677
  pmh_data = {}
678
- if pmh.known_genetic_mutations:
679
- pmh_data["Known Genetic Mutations"] = ", ".join(pmh.known_genetic_mutations)
 
 
680
  if pmh.previous_cancers:
681
- pmh_data["Previous Cancers"] = ", ".join(pmh.previous_cancers)
682
- if pmh.chronic_illnesses:
683
- pmh_data["Chronic Illnesses"] = ", ".join(pmh.chronic_illnesses)
 
 
 
 
684
  add_section("Personal Medical History", pmh_data)
685
 
686
  # --- Family History ---
687
  if user_input.family_history:
688
  family_texts = [
689
- f"{mem.relative} - {mem.cancer_type} (Age: {mem.age_at_diagnosis or 'N/A'})"
690
  for mem in user_input.family_history
691
  ]
692
  add_list_section("Family History", family_texts)
@@ -695,80 +762,50 @@ def generate_pdf_report(
695
  fs = user_input.female_specific
696
  if fs:
697
  fs_data = {}
698
- if fs.age_at_first_period is not None:
699
- fs_data["Age at first period"] = fs.age_at_first_period
700
- if fs.age_at_menopause is not None:
701
- fs_data["Age at menopause"] = fs.age_at_menopause
702
- if fs.num_live_births is not None:
703
- fs_data["Number of live births"] = fs.num_live_births
704
- if fs.age_at_first_live_birth is not None:
705
- fs_data["Age at first live birth"] = fs.age_at_first_live_birth
706
- if fs.hormone_therapy_use:
707
- fs_data["Hormone therapy"] = fs.hormone_therapy_use
708
- add_section("Female-Specific", fs_data)
709
-
710
- # --- Current Concerns ---
711
- if user_input.current_concerns_or_symptoms:
712
- add_list_section("Current Concerns", [user_input.current_concerns_or_symptoms])
713
 
714
  story.append(Spacer(1, SPACER_NORMAL))
715
 
716
- # --- Clinical Observations Table ---
717
- if user_input.clinical_observations:
718
- story.append(Paragraph("Clinical Observations", subheading_style))
719
- obs_data = [
720
- [
721
- Paragraph(h, table_header_style)
722
- for h in ["Test", "Value", "Unit", "Range", "Date"]
723
- ]
724
- ]
725
- obs_style_cmds = [
726
- (
727
- "BACKGROUND",
728
- (0, 0),
729
- (-1, 0),
730
- colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
731
- ),
732
- ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
733
- ("GRID", (0, 0), (-1, -1), 1, colors.black),
734
- ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
735
- ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
736
- ("TOPPADDING", (0, 0), (-1, -1), 4),
737
- ]
738
- for obs in user_input.clinical_observations:
739
- obs_data.append(
740
- [
741
- Paragraph(obs.test_name, table_body_style),
742
- Paragraph(obs.value, table_body_style),
743
- Paragraph(obs.unit, table_body_style),
744
- Paragraph(obs.reference_range or "N/A", table_body_style),
745
- Paragraph(obs.date or "N/A", table_body_style),
746
- ]
747
- )
748
- obs_widths = [1.75 * inch, 0.75 * inch, 0.75 * inch, 1.75 * inch, 1.5 * inch]
749
- scaled_widths = [w * (CONTENT_WIDTH / sum(obs_widths)) for w in obs_widths]
750
- obs_table = Table(
751
- obs_data, colWidths=scaled_widths, style=obs_style_cmds, splitByRow=1
752
- )
753
- story.append(obs_table)
754
- story.append(Spacer(1, SPACER_NORMAL))
755
 
756
- # --- Risk Scores Table ---
757
- if user_input.risks_scores:
758
- story.append(Paragraph("Risk Scores", subheading_style))
759
- obs_data = [
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  [
761
  Paragraph(h, table_header_style)
762
- for h in [
763
- "Model",
764
- "Score",
765
- "Cancer Type",
766
- "Description",
767
- "Interpretation",
768
- ]
769
  ]
770
  ]
771
- obs_style_cmds = [
772
  (
773
  "BACKGROUND",
774
  (0, 0),
@@ -781,28 +818,29 @@ def generate_pdf_report(
781
  ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
782
  ("TOPPADDING", (0, 0), (-1, -1), 4),
783
  ]
784
- for risk_score in user_input.risks_scores:
785
- obs_data.append(
786
- [
787
- Paragraph(risk_score.name, table_body_style),
788
- Paragraph(risk_score.score, table_body_style),
789
- Paragraph(risk_score.cancer_type or "N/A", table_body_style),
790
- Paragraph(risk_score.description or "N/A", table_body_style),
791
- Paragraph(risk_score.interpretation or "N/A", table_body_style),
792
- ]
793
- )
794
- obs_widths = [1.75 * inch, 0.75 * inch, 0.75 * inch, 1.75 * inch, 1.5 * inch]
795
- scaled_widths = [w * (CONTENT_WIDTH / sum(obs_widths)) for w in obs_widths]
796
- obs_table = Table(
797
- obs_data, colWidths=scaled_widths, style=obs_style_cmds, splitByRow=1
 
 
 
 
 
798
  )
799
- story.append(obs_table)
800
  story.append(Spacer(1, SPACER_NORMAL))
801
 
802
- story.append(PageBreak())
803
- story.append(Paragraph("Assessment", heading_style))
804
- story.append(Spacer(1, SPACER_NORMAL))
805
-
806
  # --- New 3-Column Summary Section ---
807
  headers = [
808
  Paragraph("<b>Overall Risk Score</b>", summary_header_style),
@@ -910,13 +948,12 @@ def generate_pdf_report(
910
 
911
  story.append(Spacer(1, SPACER_NORMAL))
912
 
913
- story.append(Paragraph("Detailed Risk Assessments", subheading_style))
914
  story.append(Spacer(1, SPACER_SMALL))
915
  risk_intro_text = """
916
- The following table outlines your personalized cancer risk assessment. The risk level is graded
917
- on a scale from 1 (lowest risk) to 5 (highest risk) based on the information provided.
918
- Additional detail on the contributing risk factors and possible recommendation are then
919
- provided for any and all higher risk cancers (scoring 3-5).
920
  """
921
  story.append(Paragraph(risk_intro_text, styles["BodyText"]))
922
  story.append(Spacer(1, SPACER_SMALL))
@@ -1497,8 +1534,6 @@ def _calculate_risk_points(
1497
  Returns:
1498
  Mapping of RiskFactorCategory to integer points.
1499
  """
1500
- from collections import defaultdict
1501
-
1502
  risk_points_by_category = defaultdict(int)
1503
  strength_to_points = {
1504
  ContributionStrength.MAJOR: 5,
@@ -1708,8 +1743,6 @@ def _create_risk_factor_table(
1708
  Returns:
1709
  A ReportLab Table or Paragraph to insert in the story.
1710
  """
1711
- from collections import defaultdict
1712
-
1713
  if not assessment.identified_risk_factors:
1714
  return Paragraph("No specific risk factors identified.", panel_body_style)
1715
 
 
2
 
3
  import json
4
  import math
5
+ from collections import defaultdict
6
  from datetime import datetime
7
 
8
  import markdown2
 
36
  ContributionStrength,
37
  InitialAssessment,
38
  RiskFactorCategory,
 
39
  )
40
+ from .user_input import UserInput
41
 
42
  # --- PDF Report Formatting Globals ---
43
  # Fonts
 
179
  wb = Workbook()
180
 
181
  _create_summary_sheet(wb, assessment, user_input)
182
+ _create_risk_scores_sheet(wb, assessment)
183
  _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
184
  _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
185
 
 
225
  ws.cell(row=current_row, column=1, value="Demographics").font = bold_font
226
  current_row += 1
227
  demo_info = {
228
+ "Age": user_input.demographics.age_years,
229
  "Sex": user_input.demographics.sex,
230
  "Ethnicity": user_input.demographics.ethnicity,
231
  }
 
240
  ws.cell(row=current_row, column=1, value="Lifestyle").font = bold_font
241
  current_row += 1
242
  lifestyle_info = {
243
+ "Smoking Status": user_input.lifestyle.smoking.status,
244
+ "Pack Years": user_input.lifestyle.smoking.pack_years,
245
  "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
 
246
  "Physical Activity": user_input.lifestyle.physical_activity_level,
247
  }
248
  for key, val in lifestyle_info.items():
 
254
 
255
  # Personal Medical History
256
  if user_input.personal_medical_history and (
257
+ user_input.personal_medical_history.genetic_mutations
258
  or user_input.personal_medical_history.previous_cancers
259
+ or user_input.personal_medical_history.chronic_conditions
260
  ):
261
  ws.cell(
262
  row=current_row, column=1, value="Personal Medical History"
263
  ).font = bold_font
264
  current_row += 1
265
  pmh_texts = []
266
+ if user_input.personal_medical_history.genetic_mutations:
267
  ws.cell(
268
  row=current_row, column=1, value="Known Genetic Mutations"
269
  ).font = bold_font
 
271
  row=current_row,
272
  column=2,
273
  value=", ".join(
274
+ str(m)
275
+ for m in user_input.personal_medical_history.genetic_mutations
276
  ),
277
  ).alignment = wrap_alignment
278
  current_row += 1
 
283
  ws.cell(
284
  row=current_row,
285
  column=2,
286
+ value=", ".join(
287
+ str(c) for c in user_input.personal_medical_history.previous_cancers
288
+ ),
289
  ).alignment = wrap_alignment
290
  current_row += 1
291
+ if user_input.personal_medical_history.chronic_conditions:
292
  ws.cell(
293
+ row=current_row, column=1, value="Chronic Conditions"
294
  ).font = bold_font
295
  ws.cell(
296
  row=current_row,
297
  column=2,
298
+ value=", ".join(
299
+ str(c)
300
+ for c in user_input.personal_medical_history.chronic_conditions
301
+ ),
302
  ).alignment = wrap_alignment
303
  current_row += 1
304
  current_row += 1
 
308
  ws.cell(row=current_row, column=1, value="Family History").font = bold_font
309
  current_row += 1
310
  family_texts = [
311
+ f"{mem.relation} ({mem.cancer_type} at age {mem.age_at_diagnosis or 'N/A'})"
312
  for mem in user_input.family_history
313
  ]
314
  ws.cell(
 
320
  if user_input.female_specific:
321
  ws.cell(row=current_row, column=1, value="Female-Specific").font = bold_font
322
  current_row += 1
323
+ fs = user_input.female_specific
324
  female_specific_info = {
325
+ "Age at first period": fs.menstrual.age_at_menarche
326
+ if fs.menstrual
327
+ else None,
328
+ "Age at menopause": fs.menstrual.age_at_menopause if fs.menstrual else None,
329
+ "Number of live births": fs.parity.num_live_births if fs.parity else None,
330
+ "Age at first live birth": fs.parity.age_at_first_live_birth
331
+ if fs.parity
332
+ else None,
333
  }
334
  for key, val in female_specific_info.items():
335
  ws.cell(row=current_row, column=1, value=key).font = bold_font
 
339
  current_row += 1
340
  current_row += 1
341
 
342
+ # Current Symptoms
343
+ if user_input.symptoms:
344
+ ws.cell(row=current_row, column=1, value="Current Symptoms").font = bold_font
345
  current_row += 1
346
+ symptom_texts = [str(s.symptom_type) for s in user_input.symptoms]
347
  ws.cell(
348
+ row=current_row, column=2, value=", ".join(symptom_texts)
349
  ).alignment = wrap_alignment
350
  current_row += 2
351
 
352
+ # Note: clinical_observations doesn't exist in user_input.UserInput (strict schema)
353
+ # The strict schema uses clinical_tests instead (PSA, DRE, etc.)
354
+ # Skipping this section as it requires restructuring
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
  ws.merge_cells(
357
  start_row=current_row, start_column=1, end_row=current_row, end_column=6
 
452
  ws.column_dimensions["F"].width = 30
453
 
454
 
455
+ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> None:
456
+ """Create a worksheet with calculated risk scores.
457
+
458
+ Args:
459
+ wb: An openpyxl workbook.
460
+ assessment: The structured initial assessment containing calculated scores.
461
+ """
462
+ ws = wb.create_sheet("Risk Model Scores")
463
+
464
+ title_font = Font(bold=True, size=16, name="Calibri")
465
+ header_font = Font(bold=True, color=HEX_COLORS["header_font"], name="Calibri")
466
+ header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
467
+ wrap_alignment = Alignment(wrap_text=True, vertical="top")
468
+
469
+ ws.merge_cells("A1:E1")
470
+ ws["A1"] = "Calculated Risk Scores (Ground Truth)"
471
+ ws["A1"].font = title_font
472
+ ws["A1"].alignment = Alignment(horizontal="center")
473
+
474
+ ws.merge_cells("A2:E2")
475
+ ws["A2"] = "Scores calculated using validated clinical risk models"
476
+ ws["A2"].alignment = Alignment(horizontal="center")
477
+
478
+ current_row = 4
479
+
480
+ if not assessment.calculated_risk_scores:
481
+ ws.cell(row=current_row, column=1, value="No risk scores calculated")
482
+ return
483
+
484
+ # Create headers
485
+ headers = ["Cancer Type", "Model Name", "Score", "Interpretation", "References"]
486
+ for col_idx, header in enumerate(headers, 1):
487
+ cell = ws.cell(row=current_row, column=col_idx, value=header)
488
+ cell.font = header_font
489
+ cell.fill = header_fill
490
+
491
+ current_row += 1
492
+
493
+ # Add risk scores grouped by cancer type
494
+ for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
495
+ for i, score in enumerate(scores):
496
+ # Show cancer type only on first row for each cancer
497
+ if i == 0:
498
+ ws.cell(row=current_row, column=1, value=cancer_type)
499
+
500
+ ws.cell(row=current_row, column=2, value=score.name)
501
+ ws.cell(row=current_row, column=3, value=score.score or "N/A")
502
+
503
+ interp_cell = ws.cell(
504
+ row=current_row, column=4, value=score.interpretation or "N/A"
505
+ )
506
+ interp_cell.alignment = wrap_alignment
507
+
508
+ refs = "; ".join(score.references) if score.references else "N/A"
509
+ refs_cell = ws.cell(row=current_row, column=5, value=refs)
510
+ refs_cell.alignment = wrap_alignment
511
+
512
+ current_row += 1
513
+
514
+ # Set column widths
515
+ ws.column_dimensions["A"].width = 20
516
+ ws.column_dimensions["B"].width = 25
517
+ ws.column_dimensions["C"].width = 15
518
+ ws.column_dimensions["D"].width = 50
519
+ ws.column_dimensions["E"].width = 40
520
+
521
+
522
  def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
523
  ws = wb.create_sheet(title)
524
  pretty_json = json.dumps(data, indent=2)
 
713
  add_section(
714
  "Demographics",
715
  {
716
+ "Age": user_input.demographics.age_years,
717
  "Sex": user_input.demographics.sex,
718
  "Ethnicity": user_input.demographics.ethnicity or "N/A",
719
  },
 
723
  add_section(
724
  "Lifestyle",
725
  {
726
+ "Smoking Status": user_input.lifestyle.smoking.status,
727
+ "Pack Years": user_input.lifestyle.smoking.pack_years or "N/A",
728
  "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
 
729
  "Physical Activity": user_input.lifestyle.physical_activity_level or "N/A",
730
  },
731
  )
 
733
  # --- Personal Medical History ---
734
  pmh = user_input.personal_medical_history
735
  if pmh and (
736
+ pmh.genetic_mutations or pmh.previous_cancers or pmh.chronic_conditions
737
  ):
738
  pmh_data = {}
739
+ if pmh.genetic_mutations:
740
+ pmh_data["Known Genetic Mutations"] = ", ".join(
741
+ str(m) for m in pmh.genetic_mutations
742
+ )
743
  if pmh.previous_cancers:
744
+ pmh_data["Previous Cancers"] = ", ".join(
745
+ str(c) for c in pmh.previous_cancers
746
+ )
747
+ if pmh.chronic_conditions:
748
+ pmh_data["Chronic Conditions"] = ", ".join(
749
+ str(c) for c in pmh.chronic_conditions
750
+ )
751
  add_section("Personal Medical History", pmh_data)
752
 
753
  # --- Family History ---
754
  if user_input.family_history:
755
  family_texts = [
756
+ f"{mem.relation} - {mem.cancer_type} (Age: {mem.age_at_diagnosis or 'N/A'})"
757
  for mem in user_input.family_history
758
  ]
759
  add_list_section("Family History", family_texts)
 
762
  fs = user_input.female_specific
763
  if fs:
764
  fs_data = {}
765
+ if fs.menstrual and fs.menstrual.age_at_menarche is not None:
766
+ fs_data["Age at first period"] = fs.menstrual.age_at_menarche
767
+ if fs.menstrual and fs.menstrual.age_at_menopause is not None:
768
+ fs_data["Age at menopause"] = fs.menstrual.age_at_menopause
769
+ if fs.parity and fs.parity.num_live_births is not None:
770
+ fs_data["Number of live births"] = fs.parity.num_live_births
771
+ if fs.parity and fs.parity.age_at_first_live_birth is not None:
772
+ fs_data["Age at first live birth"] = fs.parity.age_at_first_live_birth
773
+ if fs_data: # Only add section if we have data
774
+ add_section("Female-Specific", fs_data)
775
+
776
+ # --- Current Symptoms ---
777
+ if user_input.symptoms:
778
+ symptom_texts = [str(s.symptom_type) for s in user_input.symptoms]
779
+ add_list_section("Current Symptoms", symptom_texts)
780
 
781
  story.append(Spacer(1, SPACER_NORMAL))
782
 
783
+ # Note: clinical_observations doesn't exist in user_input.UserInput (strict schema)
784
+ # The strict schema uses clinical_tests instead - skipping this section
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
 
786
+ story.append(PageBreak())
787
+ story.append(Paragraph("Assessment", heading_style))
788
+ story.append(Spacer(1, SPACER_NORMAL))
789
+
790
+ # --- NEW: Calculated Risk Scores Section ---
791
+ if assessment.calculated_risk_scores:
792
+ story.append(Paragraph("Calculated Risk Scores", subheading_style))
793
+ story.append(Spacer(1, SPACER_SMALL))
794
+ risk_scores_intro = """
795
+ The following risk scores have been calculated using validated clinical risk models.
796
+ Each score represents a quantitative assessment based on your specific profile.
797
+ """
798
+ story.append(Paragraph(risk_scores_intro, styles["BodyText"]))
799
+ story.append(Spacer(1, SPACER_SMALL))
800
+
801
+ # Create table for calculated risk scores
802
+ score_data = [
803
  [
804
  Paragraph(h, table_header_style)
805
+ for h in ["Cancer Type", "Model", "Score", "Interpretation"]
 
 
 
 
 
 
806
  ]
807
  ]
808
+ score_style_cmds = [
809
  (
810
  "BACKGROUND",
811
  (0, 0),
 
818
  ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
819
  ("TOPPADDING", (0, 0), (-1, -1), 4),
820
  ]
821
+
822
+ # Sort by cancer type and add rows
823
+ for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
824
+ for i, score in enumerate(scores):
825
+ # Only show cancer type on first row for each cancer
826
+ cancer_cell = Paragraph(cancer_type, table_body_style) if i == 0 else ""
827
+ score_data.append(
828
+ [
829
+ cancer_cell,
830
+ Paragraph(score.name, table_body_style),
831
+ Paragraph(score.score or "N/A", table_body_style),
832
+ Paragraph(score.interpretation or "N/A", table_body_style),
833
+ ]
834
+ )
835
+
836
+ score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
837
+ scaled_widths = [w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths]
838
+ scores_table = Table(
839
+ score_data, colWidths=scaled_widths, style=score_style_cmds, splitByRow=1
840
  )
841
+ story.append(scores_table)
842
  story.append(Spacer(1, SPACER_NORMAL))
843
 
 
 
 
 
844
  # --- New 3-Column Summary Section ---
845
  headers = [
846
  Paragraph("<b>Overall Risk Score</b>", summary_header_style),
 
948
 
949
  story.append(Spacer(1, SPACER_NORMAL))
950
 
951
+ story.append(Paragraph("AI-Generated Risk Interpretations", subheading_style))
952
  story.append(Spacer(1, SPACER_SMALL))
953
  risk_intro_text = """
954
+ The following interpretations provide context and explanation for the calculated risk scores above.
955
+ These AI-generated insights identify key contributing factors and provide actionable recommendations.
956
+ For cancers with higher risk levels (3-5), additional details on risk factors and recommendations are provided.
 
957
  """
958
  story.append(Paragraph(risk_intro_text, styles["BodyText"]))
959
  story.append(Spacer(1, SPACER_SMALL))
 
1534
  Returns:
1535
  Mapping of RiskFactorCategory to integer points.
1536
  """
 
 
1537
  risk_points_by_category = defaultdict(int)
1538
  strength_to_points = {
1539
  ContributionStrength.MAJOR: 5,
 
1743
  Returns:
1744
  A ReportLab Table or Paragraph to insert in the story.
1745
  """
 
 
1746
  if not assessment.identified_risk_factors:
1747
  return Paragraph("No specific risk factors identified.", panel_body_style)
1748
 
src/sentinel/risk_aggregation.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Risk score aggregation and formatting utilities."""
2
+
3
+ from collections import defaultdict
4
+
5
+ from .models import RiskScore
6
+
7
+
8
+ def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
9
+ """Group risk scores by cancer type.
10
+
11
+ Args:
12
+ scores: List of risk scores from various models.
13
+
14
+ Returns:
15
+ Dictionary mapping cancer type to list of risk scores.
16
+
17
+ Raises:
18
+ ValueError: If any score is missing a cancer_type.
19
+ """
20
+ grouped: dict[str, list[RiskScore]] = defaultdict(list)
21
+
22
+ for score in scores:
23
+ if not score.cancer_type:
24
+ raise ValueError(
25
+ f"Risk score '{score.name}' is missing cancer_type. "
26
+ "All risk scores must have a cancer_type specified."
27
+ )
28
+ # Normalize cancer type for grouping
29
+ cancer_type = score.cancer_type.strip()
30
+ grouped[cancer_type].append(score)
31
+
32
+ # Convert defaultdict to regular dict and sort by cancer type
33
+ return dict(sorted(grouped.items()))
34
+
35
+
36
+ def format_scores_for_llm(grouped_scores: dict[str, list[RiskScore]]) -> str:
37
+ """Format grouped risk scores for LLM context.
38
+
39
+ Args:
40
+ grouped_scores: Dictionary mapping cancer type to list of risk scores.
41
+
42
+ Returns:
43
+ Formatted string representation of all risk scores.
44
+ """
45
+ if not grouped_scores:
46
+ return "No risk scores calculated."
47
+
48
+ lines = []
49
+ lines.append("# Calculated Risk Scores (Ground Truth)\n")
50
+ lines.append(
51
+ "The following risk scores have been calculated using validated models:\n"
52
+ )
53
+
54
+ for cancer_type, scores in grouped_scores.items():
55
+ lines.append(f"\n## {cancer_type}\n")
56
+
57
+ for score in scores:
58
+ lines.append(f"### {score.name}")
59
+ lines.append(f"- **Score**: {score.score}")
60
+
61
+ if score.description:
62
+ lines.append(f"- **Description**: {score.description}")
63
+
64
+ if score.interpretation:
65
+ lines.append(f"- **Interpretation**: {score.interpretation}")
66
+
67
+ if score.references:
68
+ refs = "; ".join(score.references)
69
+ lines.append(f"- **References**: {refs}")
70
+
71
+ lines.append("") # Empty line between models
72
+
73
+ lines.append("\n---\n")
74
+ lines.append("**Important**: These scores are the ground truth. Your task is to:")
75
+ lines.append("1. Explain what these scores mean for the patient in clear language")
76
+ lines.append(
77
+ "2. Identify and highlight key risk factors contributing to elevated scores"
78
+ )
79
+ lines.append("3. Provide actionable context and insights based on these scores")
80
+ lines.append(
81
+ "4. DO NOT generate your own risk levels - explain and contextualize the calculated ones\n"
82
+ )
83
+
84
+ return "\n".join(lines)
85
+
86
+
87
+ def format_scores_for_pdf(
88
+ grouped_scores: dict[str, list[RiskScore]],
89
+ ) -> list[tuple[str, list[RiskScore]]]:
90
+ """Format grouped risk scores for PDF presentation.
91
+
92
+ Args:
93
+ grouped_scores: Dictionary mapping cancer type to list of risk scores.
94
+
95
+ Returns:
96
+ List of (cancer_type, scores) tuples sorted by cancer type.
97
+ """
98
+ return sorted(grouped_scores.items())
src/sentinel/risk_models/qcancer.py CHANGED
@@ -23,6 +23,7 @@ from typing import Annotated
23
 
24
  from pydantic import Field
25
 
 
26
  from sentinel.risk_models.base import RiskModel
27
  from sentinel.user_input import (
28
  AlcoholConsumption,
@@ -1780,6 +1781,107 @@ class QCancerRiskModel(RiskModel):
1780
  "Values sum to 100% and reflect relative likelihoods over the next 10 years; higher percentages warrant clinical review."
1781
  )
1782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1783
  def references(self) -> list[str]:
1784
  return [
1785
  "Hippisley-Cox J, Coupland C. QCancer (10 year risk) BMJ. 2014;349:g4606.",
 
23
 
24
  from pydantic import Field
25
 
26
+ from sentinel.models import RiskScore
27
  from sentinel.risk_models.base import RiskModel
28
  from sentinel.user_input import (
29
  AlcoholConsumption,
 
1781
  "Values sum to 100% and reflect relative likelihoods over the next 10 years; higher percentages warrant clinical review."
1782
  )
1783
 
1784
+ def run(self, user: UserInput) -> list:
1785
+ """Compute QCancer scores and return as list of RiskScore objects.
1786
+
1787
+ Overrides base class to return multiple scores (one per cancer type).
1788
+
1789
+ Args:
1790
+ user: The user profile to score.
1791
+
1792
+ Returns:
1793
+ List of RiskScore objects, one for each cancer type assessed.
1794
+ """
1795
+ sex = (user.demographics.sex or "").strip().lower()
1796
+ scores = []
1797
+
1798
+ try:
1799
+ if sex.startswith("f"):
1800
+ params = self._extract_female_params(user)
1801
+ probabilities = compute_female_probabilities(**params)
1802
+ scores = self._create_individual_scores(probabilities, is_female=True)
1803
+ elif sex.startswith("m"):
1804
+ params = self._extract_male_params(user)
1805
+ probabilities = compute_male_probabilities(**params)
1806
+ scores = self._create_individual_scores(probabilities, is_female=False)
1807
+ else:
1808
+ # Return single N/A score
1809
+ scores = [
1810
+ RiskScore(
1811
+ name=self.name,
1812
+ score="N/A: QCancer requires patient sex (male or female).",
1813
+ cancer_type="Multiple Cancer Sites",
1814
+ description=self.description(),
1815
+ interpretation=self.interpretation(),
1816
+ references=self.references(),
1817
+ )
1818
+ ]
1819
+ except ValueError as exc:
1820
+ # Return single N/A score with error message
1821
+ scores = [
1822
+ RiskScore(
1823
+ name=self.name,
1824
+ score=f"N/A: {exc}",
1825
+ cancer_type="Multiple Cancer Sites",
1826
+ description=self.description(),
1827
+ interpretation=self.interpretation(),
1828
+ references=self.references(),
1829
+ )
1830
+ ]
1831
+
1832
+ return scores
1833
+
1834
+ def _create_individual_scores(
1835
+ self, risks: dict[str, float], is_female: bool
1836
+ ) -> list:
1837
+ """Create individual RiskScore objects for each cancer type.
1838
+
1839
+ Args:
1840
+ risks: Dictionary of cancer names to probabilities.
1841
+ is_female: Whether results are for female patient.
1842
+
1843
+ Returns:
1844
+ List of RiskScore objects.
1845
+ """
1846
+ from sentinel.models import RiskScore
1847
+
1848
+ order = FEMALE_CANCER_TYPES if is_female else MALE_CANCER_TYPES
1849
+ scores = []
1850
+
1851
+ # Add "No Cancer" score first
1852
+ no_cancer_pct = risks.get("none", 0.0)
1853
+ scores.append(
1854
+ RiskScore(
1855
+ name="QCancer",
1856
+ score=f"{no_cancer_pct:.1f}%",
1857
+ cancer_type="No Cancer",
1858
+ description="10-year probability of not developing cancer",
1859
+ interpretation="Baseline probability - higher values indicate lower overall cancer risk",
1860
+ references=self.references(),
1861
+ )
1862
+ )
1863
+
1864
+ # Add each cancer type
1865
+ for cancer_name in order:
1866
+ pct = risks.get(cancer_name, 0.0)
1867
+ display_name = cancer_name.replace("_", " ").title()
1868
+
1869
+ scores.append(
1870
+ RiskScore(
1871
+ name="QCancer",
1872
+ score=f"{pct:.1f}%",
1873
+ cancer_type=display_name,
1874
+ description=f"10-year probability of {display_name.lower()}",
1875
+ interpretation=(
1876
+ "Percentages reflect relative likelihood over next 10 years. "
1877
+ "Values >1% warrant clinical review."
1878
+ ),
1879
+ references=self.references(),
1880
+ )
1881
+ )
1882
+
1883
+ return scores
1884
+
1885
  def references(self) -> list[str]:
1886
  return [
1887
  "Hippisley-Cox J, Coupland C. QCancer (10 year risk) BMJ. 2014;349:g4606.",
src/sentinel/utils.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any, Literal
5
 
6
  import yaml
7
 
8
- from .models import UserInput
9
 
10
 
11
  def load_user_file(source: str | Any) -> UserInput:
 
5
 
6
  import yaml
7
 
8
+ from .user_input import UserInput
9
 
10
 
11
  def load_user_file(source: str | Any) -> UserInput:
tests/test_conversation.py CHANGED
@@ -4,19 +4,36 @@ from unittest.mock import MagicMock, patch
4
  from sentinel.conversation import ConversationManager
5
  from sentinel.models import (
6
  ConversationResponse,
7
- Demographics,
8
  InitialAssessment,
 
 
 
 
9
  Lifestyle,
10
  PersonalMedicalHistory,
 
11
  UserInput,
12
  )
13
 
14
 
15
  def sample_user() -> UserInput:
16
  return UserInput(
17
- demographics=Demographics(age=30, sex="male"),
18
- lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="none"),
19
- personal_medical_history=PersonalMedicalHistory(),
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  family_history=[],
21
  )
22
 
@@ -25,11 +42,10 @@ def sample_user() -> UserInput:
25
  @patch("sentinel.llm_service.create_conversational_chain")
26
  def test_conversation_flow(mock_create_conversational_chain, mock_create_initial_chain):
27
  structured = MagicMock()
28
- structured.prompt.format.return_value = "full prompt"
29
  freeform = MagicMock()
30
  structured.invoke.return_value = {
31
  "overall_summary": "ok",
32
- "risk_assessments": [],
33
  "dx_recommendations": [],
34
  }
35
  freeform.invoke.return_value = "hi"
@@ -37,15 +53,21 @@ def test_conversation_flow(mock_create_conversational_chain, mock_create_initial
37
  mock_create_conversational_chain.return_value = freeform
38
 
39
  conv = ConversationManager(structured, freeform)
40
- result = conv.initial_assessment(sample_user())
 
41
  assert isinstance(result, InitialAssessment)
42
  assert result.overall_summary == "ok"
43
- assert conv.history == [("full prompt", result.model_dump_json())]
 
 
 
 
 
44
 
45
  answer = conv.follow_up("question")
46
  assert isinstance(answer, ConversationResponse)
47
  assert answer.response == "hi"
48
- assert conv.history == [
49
- ("full prompt", result.model_dump_json()),
50
- ("question", "hi"),
51
- ]
 
4
  from sentinel.conversation import ConversationManager
5
  from sentinel.models import (
6
  ConversationResponse,
 
7
  InitialAssessment,
8
+ )
9
+ from sentinel.user_input import (
10
+ Anthropometrics,
11
+ Demographics,
12
  Lifestyle,
13
  PersonalMedicalHistory,
14
+ SmokingHistory,
15
  UserInput,
16
  )
17
 
18
 
19
  def sample_user() -> UserInput:
20
  return UserInput(
21
+ demographics=Demographics(
22
+ age_years=30,
23
+ sex="male",
24
+ anthropometrics=Anthropometrics(height_cm=175, weight_kg=70),
25
+ ),
26
+ lifestyle=Lifestyle(
27
+ smoking=SmokingHistory(
28
+ status="never",
29
+ cigarettes_per_day=0,
30
+ years_smoked=0,
31
+ ),
32
+ ),
33
+ personal_medical_history=PersonalMedicalHistory(
34
+ chronic_conditions=[],
35
+ previous_cancers=[],
36
+ ),
37
  family_history=[],
38
  )
39
 
 
42
  @patch("sentinel.llm_service.create_conversational_chain")
43
  def test_conversation_flow(mock_create_conversational_chain, mock_create_initial_chain):
44
  structured = MagicMock()
 
45
  freeform = MagicMock()
46
  structured.invoke.return_value = {
47
  "overall_summary": "ok",
48
+ "llm_risk_interpretations": [],
49
  "dx_recommendations": [],
50
  }
51
  freeform.invoke.return_value = "hi"
 
53
  mock_create_conversational_chain.return_value = freeform
54
 
55
  conv = ConversationManager(structured, freeform)
56
+ user = sample_user()
57
+ result = conv.initial_assessment(user)
58
  assert isinstance(result, InitialAssessment)
59
  assert result.overall_summary == "ok"
60
+ assert result.calculated_risk_scores == {}
61
+
62
+ # Verify history contains initial assessment message
63
+ assert len(conv.history) == 1
64
+ assert conv.history[0][0].startswith("Initial assessment for user profile:")
65
+ assert conv.history[0][1] == result.model_dump_json()
66
 
67
  answer = conv.follow_up("question")
68
  assert isinstance(answer, ConversationResponse)
69
  assert answer.response == "hi"
70
+
71
+ # Verify follow-up added to history
72
+ assert len(conv.history) == 2
73
+ assert conv.history[1] == ("question", "hi")
tests/test_demo.py CHANGED
@@ -7,20 +7,31 @@ import yaml
7
 
8
  from sentinel.models import (
9
  CancerRiskAssessment,
10
- ClinicalObservation,
11
  ContributingFactor,
12
  ContributionStrength,
13
- Demographics,
14
  DxRecommendation,
15
- FamilyMemberCancer,
16
  InitialAssessment,
17
- Lifestyle,
18
- PersonalMedicalHistory,
19
  RiskFactor,
20
  RiskFactorCategory,
21
- UserInput,
22
  )
23
  from sentinel.reporting import generate_excel_report, generate_pdf_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  from sentinel.utils import load_user_file
25
 
26
 
@@ -32,8 +43,15 @@ def test_load_user_file_yaml(tmp_path):
32
  """
33
 
34
  data = {
35
- "demographics": {"age": 30, "sex": "male"},
36
- "lifestyle": {"smoking_status": "never", "alcohol_consumption": "none"},
 
 
 
 
 
 
 
37
  "personal_medical_history": {},
38
  "family_history": [],
39
  }
@@ -42,9 +60,9 @@ def test_load_user_file_yaml(tmp_path):
42
 
43
  user = load_user_file(str(path))
44
  assert isinstance(user, UserInput)
45
- assert user.demographics.age == 30
46
- assert user.lifestyle.smoking_status == "never"
47
- assert user.clinical_observations == []
48
 
49
 
50
  @pytest.mark.parametrize("save_files", [True, False])
@@ -57,41 +75,31 @@ def test_generate_reports(tmp_path, save_files):
57
  """
58
  # 1. Create mock UserInput data with all fields
59
  user = UserInput(
60
- demographics=Demographics(age=45, sex="Female", ethnicity="Caucasian"),
 
 
 
 
 
61
  lifestyle=Lifestyle(
62
- smoking_status="former",
63
- smoking_pack_years=10,
64
- alcohol_consumption="light",
65
- dietary_habits="Balanced",
66
- physical_activity_level="moderate",
67
  ),
68
  personal_medical_history=PersonalMedicalHistory(
69
- previous_cancers=["Skin Cancer"],
70
- known_genetic_mutations=["BRCA2"],
71
- chronic_illnesses=["IBS"],
72
  ),
73
  family_history=[
74
  FamilyMemberCancer(
75
- relative="Mother", cancer_type="Breast Cancer", age_at_diagnosis=50
 
 
 
 
76
  )
77
  ],
78
- clinical_observations=[
79
- ClinicalObservation(
80
- test_name="Blood Pressure",
81
- value="120/80",
82
- unit="mmHg",
83
- reference_range="<130/85",
84
- date="2023-05-10",
85
- ),
86
- ClinicalObservation(
87
- test_name="Cholesterol",
88
- value="190",
89
- unit="mg/dL",
90
- reference_range="<200",
91
- date="2023-05-10",
92
- ),
93
- ],
94
- current_concerns_or_symptoms="Occasional headaches.",
95
  )
96
 
97
  # 2. Create mock InitialAssessment data
@@ -122,7 +130,7 @@ def test_generate_reports(tmp_path, save_files):
122
  category=RiskFactorCategory.LIFESTYLE,
123
  ),
124
  ],
125
- risk_assessments=[
126
  CancerRiskAssessment(
127
  cancer_type="Breast Cancer",
128
  risk_level=4,
 
7
 
8
  from sentinel.models import (
9
  CancerRiskAssessment,
 
10
  ContributingFactor,
11
  ContributionStrength,
 
12
  DxRecommendation,
 
13
  InitialAssessment,
 
 
14
  RiskFactor,
15
  RiskFactorCategory,
 
16
  )
17
  from sentinel.reporting import generate_excel_report, generate_pdf_report
18
+ from sentinel.user_input import (
19
+ AlcoholConsumption,
20
+ Anthropometrics,
21
+ CancerType,
22
+ Demographics,
23
+ Ethnicity,
24
+ FamilyMemberCancer,
25
+ FamilyRelation,
26
+ FamilySide,
27
+ Lifestyle,
28
+ PersonalMedicalHistory,
29
+ RelationshipDegree,
30
+ Sex,
31
+ SmokingHistory,
32
+ SmokingStatus,
33
+ UserInput,
34
+ )
35
  from sentinel.utils import load_user_file
36
 
37
 
 
43
  """
44
 
45
  data = {
46
+ "demographics": {
47
+ "age_years": 30,
48
+ "sex": "male",
49
+ "anthropometrics": {"height_cm": 175, "weight_kg": 70},
50
+ },
51
+ "lifestyle": {
52
+ "smoking": {"status": "never"},
53
+ "alcohol_consumption": "none",
54
+ },
55
  "personal_medical_history": {},
56
  "family_history": [],
57
  }
 
60
 
61
  user = load_user_file(str(path))
62
  assert isinstance(user, UserInput)
63
+ assert user.demographics.age_years == 30
64
+ assert user.lifestyle.smoking.status == SmokingStatus.NEVER
65
+ assert user.symptoms == []
66
 
67
 
68
  @pytest.mark.parametrize("save_files", [True, False])
 
75
  """
76
  # 1. Create mock UserInput data with all fields
77
  user = UserInput(
78
+ demographics=Demographics(
79
+ age_years=45,
80
+ sex=Sex.FEMALE,
81
+ ethnicity=Ethnicity.WHITE,
82
+ anthropometrics=Anthropometrics(height_cm=165, weight_kg=70),
83
+ ),
84
  lifestyle=Lifestyle(
85
+ smoking=SmokingHistory(
86
+ status=SmokingStatus.FORMER,
87
+ pack_years=10,
88
+ ),
89
+ alcohol_consumption=AlcoholConsumption.LIGHT,
90
  ),
91
  personal_medical_history=PersonalMedicalHistory(
92
+ previous_cancers=[CancerType.MELANOMA],
 
 
93
  ),
94
  family_history=[
95
  FamilyMemberCancer(
96
+ relation=FamilyRelation.MOTHER,
97
+ cancer_type=CancerType.BREAST,
98
+ age_at_diagnosis=50,
99
+ degree=RelationshipDegree.FIRST,
100
+ side=FamilySide.MATERNAL,
101
  )
102
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  )
104
 
105
  # 2. Create mock InitialAssessment data
 
130
  category=RiskFactorCategory.LIFESTYLE,
131
  ),
132
  ],
133
+ llm_risk_interpretations=[
134
  CancerRiskAssessment(
135
  cancer_type="Breast Cancer",
136
  risk_level=4,
tests/test_integration_canrisk_api.py CHANGED
@@ -15,15 +15,30 @@ from pathlib import Path
15
  import pytest
16
 
17
  from sentinel.api_clients.canrisk import BOADICEAInput, CanRiskClient
18
- from sentinel.models import (
 
 
 
 
19
  Demographics,
 
20
  FamilyMemberCancer,
 
 
21
  FemaleSpecific,
 
 
 
22
  Lifestyle,
 
 
23
  PersonalMedicalHistory,
 
 
 
 
24
  UserInput,
25
  )
26
- from sentinel.risk_models.boadicea import BOADICEARiskModel
27
 
28
  CREDENTIALS_AVAILABLE = bool(
29
  os.getenv("CANRISK_USERNAME") and os.getenv("CANRISK_PASSWORD")
@@ -48,28 +63,37 @@ class Scenario:
48
  def _high_risk_user() -> UserInput:
49
  return UserInput(
50
  demographics=Demographics(
51
- age=42,
52
- sex="female",
53
- ethnicity="Ashkenazi Jewish",
54
- height=1.65,
55
- weight=65.0,
 
 
 
56
  ),
57
- lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="none"),
58
  personal_medical_history=PersonalMedicalHistory(
59
- known_genetic_mutations=["BRCA1", "BRCA2"],
60
  ),
61
  female_specific=FemaleSpecific(
62
- age_at_first_period=13,
63
- age_at_first_live_birth=28,
64
- num_live_births=1,
65
- hormone_therapy_use="N",
66
  ),
67
  family_history=[
68
  FamilyMemberCancer(
69
- relative="mother", cancer_type="breast", age_at_diagnosis=52
 
 
 
 
70
  ),
71
  FamilyMemberCancer(
72
- relative="sister", cancer_type="ovarian", age_at_diagnosis=48
 
 
 
 
73
  ),
74
  ],
75
  )
@@ -78,28 +102,37 @@ def _high_risk_user() -> UserInput:
78
  def _moderate_risk_user() -> UserInput:
79
  return UserInput(
80
  demographics=Demographics(
81
- age=50,
82
- sex="female",
83
- ethnicity="Hispanic",
84
- height=1.60,
85
- weight=70.0,
 
 
 
86
  ),
87
- lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="light"),
88
  personal_medical_history=PersonalMedicalHistory(
89
- known_genetic_mutations=["BRCA1"],
90
  ),
91
  female_specific=FemaleSpecific(
92
- age_at_first_period=12,
93
- age_at_first_live_birth=30,
94
- num_live_births=2,
95
- hormone_therapy_use="former",
96
  ),
97
  family_history=[
98
  FamilyMemberCancer(
99
- relative="mother", cancer_type="breast", age_at_diagnosis=60
 
 
 
 
100
  ),
101
  FamilyMemberCancer(
102
- relative="maternal aunt", cancer_type="breast", age_at_diagnosis=55
 
 
 
 
103
  ),
104
  ],
105
  )
@@ -108,24 +141,28 @@ def _moderate_risk_user() -> UserInput:
108
  def _average_risk_user() -> UserInput:
109
  return UserInput(
110
  demographics=Demographics(
111
- age=38,
112
- sex="female",
113
- ethnicity="White",
114
- height=1.68,
115
- weight=62.0,
 
 
 
116
  ),
117
- lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="moderate"),
118
  personal_medical_history=PersonalMedicalHistory(),
119
  female_specific=FemaleSpecific(
120
- age_at_first_period=12,
121
- hormone_therapy_use="never",
122
- num_live_births=0,
123
  ),
124
  family_history=[
125
  FamilyMemberCancer(
126
- relative="paternal grandmother",
127
- cancer_type="breast",
128
  age_at_diagnosis=67,
 
 
129
  ),
130
  ],
131
  )
 
15
  import pytest
16
 
17
  from sentinel.api_clients.canrisk import BOADICEAInput, CanRiskClient
18
+ from sentinel.risk_models.boadicea import BOADICEARiskModel
19
+ from sentinel.user_input import (
20
+ AlcoholConsumption,
21
+ Anthropometrics,
22
+ CancerType,
23
  Demographics,
24
+ Ethnicity,
25
  FamilyMemberCancer,
26
+ FamilyRelation,
27
+ FamilySide,
28
  FemaleSpecific,
29
+ GeneticMutation,
30
+ HormoneUse,
31
+ HormoneUseHistory,
32
  Lifestyle,
33
+ MenstrualHistory,
34
+ ParityHistory,
35
  PersonalMedicalHistory,
36
+ RelationshipDegree,
37
+ Sex,
38
+ SmokingHistory,
39
+ SmokingStatus,
40
  UserInput,
41
  )
 
42
 
43
  CREDENTIALS_AVAILABLE = bool(
44
  os.getenv("CANRISK_USERNAME") and os.getenv("CANRISK_PASSWORD")
 
63
  def _high_risk_user() -> UserInput:
64
  return UserInput(
65
  demographics=Demographics(
66
+ age_years=42,
67
+ sex=Sex.FEMALE,
68
+ ethnicity=Ethnicity.ASHKENAZI_JEWISH,
69
+ anthropometrics=Anthropometrics(height_cm=165, weight_kg=65.0),
70
+ ),
71
+ lifestyle=Lifestyle(
72
+ smoking=SmokingHistory(status=SmokingStatus.NEVER),
73
+ alcohol_consumption=AlcoholConsumption.NONE,
74
  ),
 
75
  personal_medical_history=PersonalMedicalHistory(
76
+ genetic_mutations=[GeneticMutation.BRCA1, GeneticMutation.BRCA2],
77
  ),
78
  female_specific=FemaleSpecific(
79
+ menstrual=MenstrualHistory(age_at_menarche=13),
80
+ parity=ParityHistory(age_at_first_live_birth=28, num_live_births=1),
81
+ hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.NEVER),
 
82
  ),
83
  family_history=[
84
  FamilyMemberCancer(
85
+ relation=FamilyRelation.MOTHER,
86
+ cancer_type=CancerType.BREAST,
87
+ age_at_diagnosis=52,
88
+ degree=RelationshipDegree.FIRST,
89
+ side=FamilySide.MATERNAL,
90
  ),
91
  FamilyMemberCancer(
92
+ relation=FamilyRelation.SISTER,
93
+ cancer_type=CancerType.OVARIAN,
94
+ age_at_diagnosis=48,
95
+ degree=RelationshipDegree.FIRST,
96
+ side=FamilySide.UNKNOWN,
97
  ),
98
  ],
99
  )
 
102
  def _moderate_risk_user() -> UserInput:
103
  return UserInput(
104
  demographics=Demographics(
105
+ age_years=50,
106
+ sex=Sex.FEMALE,
107
+ ethnicity=Ethnicity.HISPANIC,
108
+ anthropometrics=Anthropometrics(height_cm=160, weight_kg=70.0),
109
+ ),
110
+ lifestyle=Lifestyle(
111
+ smoking=SmokingHistory(status=SmokingStatus.NEVER),
112
+ alcohol_consumption=AlcoholConsumption.LIGHT,
113
  ),
 
114
  personal_medical_history=PersonalMedicalHistory(
115
+ genetic_mutations=[GeneticMutation.BRCA1],
116
  ),
117
  female_specific=FemaleSpecific(
118
+ menstrual=MenstrualHistory(age_at_menarche=12),
119
+ parity=ParityHistory(age_at_first_live_birth=30, num_live_births=2),
120
+ hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.FORMER),
 
121
  ),
122
  family_history=[
123
  FamilyMemberCancer(
124
+ relation=FamilyRelation.MOTHER,
125
+ cancer_type=CancerType.BREAST,
126
+ age_at_diagnosis=60,
127
+ degree=RelationshipDegree.FIRST,
128
+ side=FamilySide.MATERNAL,
129
  ),
130
  FamilyMemberCancer(
131
+ relation=FamilyRelation.MATERNAL_AUNT,
132
+ cancer_type=CancerType.BREAST,
133
+ age_at_diagnosis=55,
134
+ degree=RelationshipDegree.SECOND,
135
+ side=FamilySide.MATERNAL,
136
  ),
137
  ],
138
  )
 
141
  def _average_risk_user() -> UserInput:
142
  return UserInput(
143
  demographics=Demographics(
144
+ age_years=38,
145
+ sex=Sex.FEMALE,
146
+ ethnicity=Ethnicity.WHITE,
147
+ anthropometrics=Anthropometrics(height_cm=168, weight_kg=62.0),
148
+ ),
149
+ lifestyle=Lifestyle(
150
+ smoking=SmokingHistory(status=SmokingStatus.NEVER),
151
+ alcohol_consumption=AlcoholConsumption.MODERATE,
152
  ),
 
153
  personal_medical_history=PersonalMedicalHistory(),
154
  female_specific=FemaleSpecific(
155
+ menstrual=MenstrualHistory(age_at_menarche=12),
156
+ parity=ParityHistory(num_live_births=0),
157
+ hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.NEVER),
158
  ),
159
  family_history=[
160
  FamilyMemberCancer(
161
+ relation=FamilyRelation.PATERNAL_GRANDMOTHER,
162
+ cancer_type=CancerType.BREAST,
163
  age_at_diagnosis=67,
164
+ degree=RelationshipDegree.SECOND,
165
+ side=FamilySide.PATERNAL,
166
  ),
167
  ],
168
  )
tests/test_main.py CHANGED
@@ -31,18 +31,30 @@ def test_root():
31
  @patch("apps.api.main.SentinelFactory")
32
  def test_assess_local(mock_factory):
33
  payload = {
34
- "demographics": {"age": 55, "sex": "male", "ethnicity": "Caucasian"},
 
 
 
 
 
35
  "lifestyle": {
36
- "smoking_status": "former",
37
- "smoking_pack_years": 10,
 
 
38
  "alcohol_consumption": "moderate",
39
  },
40
  "family_history": [
41
- {"relative": "father", "cancer_type": "lung", "age_at_diagnosis": 60}
 
 
 
 
 
 
42
  ],
43
  "personal_medical_history": {
44
  "previous_cancers": ["melanoma"],
45
- "chronic_illnesses": [],
46
  },
47
  }
48
  expected = {
@@ -51,6 +63,7 @@ def test_assess_local(mock_factory):
51
  "response": None,
52
  "overall_summary": "ok",
53
  "overall_risk_score": None,
 
54
  "identified_risk_factors": [],
55
  "risk_assessments": [],
56
  "dx_recommendations": [],
@@ -73,10 +86,17 @@ def test_assess_local(mock_factory):
73
  @patch("apps.api.main.SentinelFactory")
74
  def test_assess_bad_provider(mock_factory):
75
  payload = {
76
- "demographics": {"age": 30, "sex": "male"},
77
- "lifestyle": {"smoking_status": "never", "alcohol_consumption": "none"},
 
 
 
 
 
 
 
78
  "family_history": [],
79
- "personal_medical_history": {"previous_cancers": [], "chronic_illnesses": []},
80
  }
81
  mock_factory.side_effect = ValueError("bad")
82
  response = client.post("/assess/invalid", json={"user_input": payload})
@@ -86,18 +106,22 @@ def test_assess_bad_provider(mock_factory):
86
  @patch("apps.api.main.SentinelFactory")
87
  def test_assess_with_observations(mock_factory):
88
  payload = {
89
- "demographics": {"age": 60, "sex": "male"},
90
- "lifestyle": {"smoking_status": "never", "alcohol_consumption": "none"},
91
- "personal_medical_history": {"previous_cancers": [], "chronic_illnesses": []},
 
 
 
 
 
 
 
92
  "family_history": [],
93
- "clinical_observations": [
94
- {
95
- "test_name": "PSA",
96
- "value": "5",
97
- "unit": "ng/mL",
98
- "reference_range": "<4",
99
  }
100
- ],
101
  }
102
  expected = {
103
  "thinking": None,
@@ -105,6 +129,7 @@ def test_assess_with_observations(mock_factory):
105
  "response": None,
106
  "overall_summary": "ok",
107
  "overall_risk_score": None,
 
108
  "identified_risk_factors": [],
109
  "risk_assessments": [],
110
  "dx_recommendations": [],
 
31
  @patch("apps.api.main.SentinelFactory")
32
  def test_assess_local(mock_factory):
33
  payload = {
34
+ "demographics": {
35
+ "age_years": 55,
36
+ "sex": "male",
37
+ "ethnicity": "white",
38
+ "anthropometrics": {"height_cm": 175, "weight_kg": 80},
39
+ },
40
  "lifestyle": {
41
+ "smoking": {
42
+ "status": "former",
43
+ "pack_years": 10,
44
+ },
45
  "alcohol_consumption": "moderate",
46
  },
47
  "family_history": [
48
+ {
49
+ "relation": "father",
50
+ "cancer_type": "lung_cancer",
51
+ "age_at_diagnosis": 60,
52
+ "degree": "1",
53
+ "side": "paternal",
54
+ }
55
  ],
56
  "personal_medical_history": {
57
  "previous_cancers": ["melanoma"],
 
58
  },
59
  }
60
  expected = {
 
63
  "response": None,
64
  "overall_summary": "ok",
65
  "overall_risk_score": None,
66
+ "calculated_risk_scores": {},
67
  "identified_risk_factors": [],
68
  "risk_assessments": [],
69
  "dx_recommendations": [],
 
86
  @patch("apps.api.main.SentinelFactory")
87
  def test_assess_bad_provider(mock_factory):
88
  payload = {
89
+ "demographics": {
90
+ "age_years": 30,
91
+ "sex": "male",
92
+ "anthropometrics": {"height_cm": 175, "weight_kg": 70},
93
+ },
94
+ "lifestyle": {
95
+ "smoking": {"status": "never"},
96
+ "alcohol_consumption": "none",
97
+ },
98
  "family_history": [],
99
+ "personal_medical_history": {},
100
  }
101
  mock_factory.side_effect = ValueError("bad")
102
  response = client.post("/assess/invalid", json={"user_input": payload})
 
106
  @patch("apps.api.main.SentinelFactory")
107
  def test_assess_with_observations(mock_factory):
108
  payload = {
109
+ "demographics": {
110
+ "age_years": 60,
111
+ "sex": "male",
112
+ "anthropometrics": {"height_cm": 175, "weight_kg": 75},
113
+ },
114
+ "lifestyle": {
115
+ "smoking": {"status": "never"},
116
+ "alcohol_consumption": "none",
117
+ },
118
+ "personal_medical_history": {},
119
  "family_history": [],
120
+ "clinical_tests": {
121
+ "psa": {
122
+ "value_ng_ml": 5.0,
 
 
 
123
  }
124
+ },
125
  }
126
  expected = {
127
  "thinking": None,
 
129
  "response": None,
130
  "overall_summary": "ok",
131
  "overall_risk_score": None,
132
+ "calculated_risk_scores": {},
133
  "identified_risk_factors": [],
134
  "risk_assessments": [],
135
  "dx_recommendations": [],
tests/test_risk_aggregation.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for risk aggregation utilities."""
2
+
3
+ import pytest
4
+
5
+ from sentinel.models import RiskScore
6
+ from sentinel.risk_aggregation import (
7
+ format_scores_for_llm,
8
+ format_scores_for_pdf,
9
+ group_scores_by_cancer_type,
10
+ )
11
+
12
+
13
+ def test_group_scores_by_cancer_type():
14
+ """Test grouping risk scores by cancer type."""
15
+ scores = [
16
+ RiskScore(
17
+ name="Gail Model",
18
+ score="5%",
19
+ cancer_type="Breast Cancer",
20
+ description="5-year risk",
21
+ ),
22
+ RiskScore(
23
+ name="Claus Model",
24
+ score="3%",
25
+ cancer_type="Breast Cancer",
26
+ description="Lifetime risk",
27
+ ),
28
+ RiskScore(
29
+ name="PLCOm2012",
30
+ score="2%",
31
+ cancer_type="Lung Cancer",
32
+ description="6-year risk",
33
+ ),
34
+ ]
35
+
36
+ grouped = group_scores_by_cancer_type(scores)
37
+
38
+ assert len(grouped) == 2
39
+ assert "Breast Cancer" in grouped
40
+ assert "Lung Cancer" in grouped
41
+ assert len(grouped["Breast Cancer"]) == 2
42
+ assert len(grouped["Lung Cancer"]) == 1
43
+ assert grouped["Breast Cancer"][0].name == "Gail Model"
44
+ assert grouped["Breast Cancer"][1].name == "Claus Model"
45
+
46
+
47
+ def test_group_scores_empty():
48
+ """Test grouping with empty list."""
49
+ grouped = group_scores_by_cancer_type([])
50
+ assert grouped == {}
51
+
52
+
53
+ def test_group_scores_no_cancer_type():
54
+ """Test grouping with scores that have no cancer type."""
55
+
56
+ scores = [
57
+ RiskScore(name="Test Model", score="5%", cancer_type=None),
58
+ ]
59
+
60
+ with pytest.raises(ValueError, match=r"Test Model.*missing cancer_type"):
61
+ group_scores_by_cancer_type(scores)
62
+
63
+
64
+ def test_format_scores_for_llm():
65
+ """Test formatting scores for LLM context."""
66
+ scores = [
67
+ RiskScore(
68
+ name="Gail Model",
69
+ score="5%",
70
+ cancer_type="Breast Cancer",
71
+ description="5-year risk",
72
+ interpretation="Low to moderate risk",
73
+ references=["Gail et al., 1989"],
74
+ ),
75
+ ]
76
+
77
+ grouped = group_scores_by_cancer_type(scores)
78
+ formatted = format_scores_for_llm(grouped)
79
+
80
+ assert "# Calculated Risk Scores (Ground Truth)" in formatted
81
+ assert "Breast Cancer" in formatted
82
+ assert "Gail Model" in formatted
83
+ assert "5%" in formatted
84
+ assert "5-year risk" in formatted
85
+ assert "Low to moderate risk" in formatted
86
+ assert "Gail et al., 1989" in formatted
87
+ assert "DO NOT generate your own risk levels" in formatted
88
+
89
+
90
+ def test_format_scores_for_llm_empty():
91
+ """Test formatting empty scores for LLM."""
92
+ formatted = format_scores_for_llm({})
93
+ assert formatted == "No risk scores calculated."
94
+
95
+
96
+ def test_format_scores_for_pdf():
97
+ """Test formatting scores for PDF presentation."""
98
+ scores = [
99
+ RiskScore(
100
+ name="Gail Model",
101
+ score="5%",
102
+ cancer_type="Breast Cancer",
103
+ ),
104
+ RiskScore(
105
+ name="PLCOm2012",
106
+ score="2%",
107
+ cancer_type="Lung Cancer",
108
+ ),
109
+ ]
110
+
111
+ grouped = group_scores_by_cancer_type(scores)
112
+ formatted = format_scores_for_pdf(grouped)
113
+
114
+ assert len(formatted) == 2
115
+ assert formatted[0][0] == "Breast Cancer"
116
+ assert formatted[1][0] == "Lung Cancer"
117
+ assert len(formatted[0][1]) == 1
118
+ assert len(formatted[1][1]) == 1