Spaces:

InstaDeepAI
/

sentinel

Runtime error

App Files Files Community

jeuko commited on Oct 20

Commit

cc034ee

verified ·

1 Parent(s): 8018595

Sync from GitHub (main)

Browse files

Files changed (23) hide show

apps/api/main.py +2 -1
apps/cli/main.py +28 -8
apps/streamlit_ui/page_versions/profile/v2.py +5 -1
apps/streamlit_ui/pages/1_Profile.py +5 -1
apps/streamlit_ui/pages/3_Assessment.py +25 -1
configs/output_format/assessment.yaml +10 -8
examples/benchmark/benchmark_female.yaml +37 -25
examples/benchmark/benchmark_male.yaml +41 -25
prompts/instruction/assessment.md +12 -8
scripts/generate_documentation.py +1 -1
src/sentinel/api_clients/canrisk.py +1 -1
src/sentinel/conversation.py +27 -5
src/sentinel/models.py +18 -211
src/sentinel/prompting.py +1 -0
src/sentinel/reporting.py +181 -148
src/sentinel/risk_aggregation.py +98 -0
src/sentinel/risk_models/qcancer.py +102 -0
src/sentinel/utils.py +1 -1
tests/test_conversation.py +34 -12
tests/test_demo.py +47 -39
tests/test_integration_canrisk_api.py +76 -39
tests/test_main.py +43 -18
tests/test_risk_aggregation.py +118 -0

apps/api/main.py CHANGED Viewed

@@ -6,7 +6,8 @@ from fastapi import FastAPI, HTTPException
 from sentinel.config import AppConfig, ModelConfig, ResourcePaths
 from sentinel.factory import SentinelFactory
-from sentinel.models import InitialAssessment, UserInput
 app = FastAPI(
     title="Cancer Risk Assessment Assistant",

 from sentinel.config import AppConfig, ModelConfig, ResourcePaths
 from sentinel.factory import SentinelFactory
+from sentinel.models import InitialAssessment
+from sentinel.user_input import UserInput
 app = FastAPI(
     title="Cancer Risk Assessment Assistant",

apps/cli/main.py CHANGED Viewed

@@ -12,16 +12,18 @@ from sentinel.config import AppConfig, ModelConfig, ResourcePaths
 from sentinel.factory import SentinelFactory
 from sentinel.models import (
     ConversationResponse,
     Demographics,
     FamilyMemberCancer,
     FemaleSpecific,
-    InitialAssessment,
     Lifestyle,
     PersonalMedicalHistory,
     UserInput,
 )
-from sentinel.reporting import generate_excel_report, generate_pdf_report
-from sentinel.risk_models import RISK_MODELS
 from sentinel.utils import load_user_file
@@ -461,17 +463,35 @@ def main(cfg: DictConfig) -> None:
     print(f"\n{Colors.OKCYAN}🔄 Running risk scoring tools...{Colors.ENDC}")
     risks_scores = []
     for model in RISK_MODELS:
-        risk_score = model().run(user)
-        risks_scores.append(risk_score)
-    user.risks_scores = risks_scores
     for risk_score in risks_scores:
-        print(f"{Colors.OKCYAN}🔄 {risk_score.name}: {risk_score.score}{Colors.ENDC}")
     print(f"\n{Colors.OKGREEN}🔄 Analyzing your information...{Colors.ENDC}")
     response = None
     try:
-        response = conversation.initial_assessment(user)
         format_risk_assessment(response, dev_mode)
     except Exception as e:
         print(f"{Colors.FAIL}❌ Error generating assessment: {e}{Colors.ENDC}")

 from sentinel.factory import SentinelFactory
 from sentinel.models import (
     ConversationResponse,
+    InitialAssessment,
+)
+from sentinel.reporting import generate_excel_report, generate_pdf_report
+from sentinel.risk_models import RISK_MODELS
+from sentinel.user_input import (
     Demographics,
     FamilyMemberCancer,
     FemaleSpecific,
     Lifestyle,
     PersonalMedicalHistory,
     UserInput,
 )
 from sentinel.utils import load_user_file
     print(f"\n{Colors.OKCYAN}🔄 Running risk scoring tools...{Colors.ENDC}")
     risks_scores = []
     for model in RISK_MODELS:
+        try:
+            risk_score = model().run(user)
+            # Handle models that return multiple scores (e.g., QCancer)
+            if isinstance(risk_score, list):
+                risks_scores.extend(risk_score)
+            else:
+                risks_scores.append(risk_score)
+        except ValueError as e:
+            # Skip models that aren't applicable or have validation errors
+            print(f"{Colors.WARNING}⚠️  Skipping {model().name}: {e!s}{Colors.ENDC}")
+            continue
     for risk_score in risks_scores:
+        # Format output based on whether cancer type is specified
+        if risk_score.cancer_type and risk_score.cancer_type not in [
+            "multiple",
+            "Multiple Cancer Sites",
+        ]:
+            display = (
+                f"{risk_score.name} ({risk_score.cancer_type}): {risk_score.score}"
+            )
+        else:
+            display = f"{risk_score.name}: {risk_score.score}"
+        print(f"{Colors.OKCYAN}🔄 {display}{Colors.ENDC}")
     print(f"\n{Colors.OKGREEN}🔄 Analyzing your information...{Colors.ENDC}")
     response = None
     try:
+        response = conversation.initial_assessment(user, risk_scores=risks_scores)
         format_risk_assessment(response, dev_mode)
     except Exception as e:
         print(f"{Colors.FAIL}❌ Error generating assessment: {e}{Colors.ENDC}")

apps/streamlit_ui/page_versions/profile/v2.py CHANGED Viewed

@@ -232,7 +232,11 @@ def render():
                         risks_scores = []
                         for model in RISK_MODELS:
                             risk_score = model().run(updated_profile)
-                            risks_scores.append(risk_score)
                         # Attach the scores to the object before saving
                         updated_profile.risks_scores = risks_scores

                         risks_scores = []
                         for model in RISK_MODELS:
                             risk_score = model().run(updated_profile)
+                            # Handle models that return multiple scores (e.g., QCancer)
+                            if isinstance(risk_score, list):
+                                risks_scores.extend(risk_score)
+                            else:
+                                risks_scores.append(risk_score)
                         # Attach the scores to the object before saving
                         updated_profile.risks_scores = risks_scores

apps/streamlit_ui/pages/1_Profile.py CHANGED Viewed

@@ -257,7 +257,11 @@ with st.expander("Create New Profile Manually"):
                 risks_scores = []
                 for model in RISK_MODELS:
                     risk_score = model().run(new_profile)
-                    risks_scores.append(risk_score)
                 new_profile.risks_scores = risks_scores

                 risks_scores = []
                 for model in RISK_MODELS:
                     risk_score = model().run(new_profile)
+                    # Handle models that return multiple scores (e.g., QCancer)
+                    if isinstance(risk_score, list):
+                        risks_scores.extend(risk_score)
+                    else:
+                        risks_scores.append(risk_score)
                 new_profile.risks_scores = risks_scores

apps/streamlit_ui/pages/3_Assessment.py CHANGED Viewed

@@ -198,7 +198,31 @@ if assessment:
     with st.expander("Overall Summary"):
         st.markdown(assessment.overall_summary, unsafe_allow_html=True)
-    with st.expander("Risk Assessments"):
         for ra in sorted_risk_assessments:
             st.markdown(f"**{ra.cancer_type}** - {ra.risk_level or 'N/A'}/5")
             st.write(ra.explanation)

     with st.expander("Overall Summary"):
         st.markdown(assessment.overall_summary, unsafe_allow_html=True)
+    with st.expander("Calculated Risk Scores (Ground Truth)"):
+        if assessment.calculated_risk_scores:
+            st.info(
+                "These scores have been calculated using validated clinical risk models "
+                "and represent the authoritative risk assessment."
+            )
+            for cancer_type, scores in sorted(
+                assessment.calculated_risk_scores.items()
+            ):
+                st.markdown(f"### {cancer_type}")
+                for score in scores:
+                    st.markdown(f"**{score.name}**: {score.score}")
+                    if score.description:
+                        st.write(f"*{score.description}*")
+                    if score.interpretation:
+                        st.write(score.interpretation)
+                    if score.references:
+                        with st.expander("References"):
+                            for ref in score.references:
+                                st.write(f"- {ref}")
+                    st.divider()
+        else:
+            st.write("No risk scores calculated.")
+    with st.expander("AI-Generated Risk Interpretations"):
         for ra in sorted_risk_assessments:
             st.markdown(f"**{ra.cancer_type}** - {ra.risk_level or 'N/A'}/5")
             st.write(ra.explanation)

configs/output_format/assessment.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 format_instructions: |
   CRITICAL:
   - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
-  - Provide a risk assessment for each cancer type that has a computed risk score in the `RISK SCORES` section of `USER INFORMATION`.
   - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
   - The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}.  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
   - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
@@ -13,15 +13,15 @@ format_instructions: |
     "identified_risk_factors": [
       {{
         "description": "string - A human-readable description of the risk factor identified from the user's profile.",
-        "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other).  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. ",
       }}
-    ]
-    "risk_assessments": [
       {{
-        "cancer_type": "string - Type of cancer",
-        "risk_level": "number - A score from 1 (lowest risk), 2 (low risk - proactive screening not needed but user should be aware of symptoms), 3 (moderate risk - some screening recommended), 4 (high risk - screening important), 5 (very high risk - screening critical, short-term action required)",
-        "explanation": "string - Reasoning behind the assessment.  Always relate the explanation to information provided in the `User Information` and `Clinical Observations` as much as possible.",
-        "recommended_steps": ["string"] or null - Optional steps to mitigate risk.  This field is only required if the risk level is 3 or higher, otherwise leave this field blank.
         "contributing_factors": [
           {{
             "description": "string - A human-readable description of the risk factor",
@@ -47,6 +47,8 @@ format_instructions: |
   IMPORTANT:
   - The `reasoning` field is mandatory for your internal monologue.  You must put any and all reasoning you were asked to do in here.  This is your internal monologue, and should be as detailed as possible.
   - Do not add disclaimers; they are handled separately.
   - Use null for optional fields that don't apply.
   - Return ONLY the JSON object, nothing else.

 format_instructions: |
   CRITICAL:
   - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
+  - The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
   - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
   - The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}.  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
   - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
     "identified_risk_factors": [
       {{
         "description": "string - A human-readable description of the risk factor identified from the user's profile.",
+        "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other).  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. "
       }}
+    ],
+    "llm_risk_interpretations": [
       {{
+        "cancer_type": "string - Type of cancer from RISK SCORES section",
+        "risk_level": "number or null - Optional qualitative score (1-5) that should align with the calculated risk scores. Use null if you cannot confidently map the score to a 1-5 scale.",
+        "explanation": "string - Explain what the calculated risk score means for the patient. Identify key contributing factors from their profile. Always reference the actual score from RISK SCORES section.",
+        "recommended_steps": ["string"] or null - Optional steps to mitigate risk, particularly for higher calculated scores.
         "contributing_factors": [
           {{
             "description": "string - A human-readable description of the risk factor",
   IMPORTANT:
   - The `reasoning` field is mandatory for your internal monologue.  You must put any and all reasoning you were asked to do in here.  This is your internal monologue, and should be as detailed as possible.
+  - Do NOT include a `calculated_risk_scores` field in your response - this is populated programmatically from the RISK SCORES section.
+  - Focus your `llm_risk_interpretations` on explaining the CALCULATED scores, not generating new risk assessments.
   - Do not add disclaimers; they are handled separately.
   - Use null for optional fields that don't apply.
   - Return ONLY the JSON object, nothing else.

examples/benchmark/benchmark_female.yaml CHANGED Viewed

@@ -1,23 +1,39 @@
 demographics:
-  age: 52
   sex: female
-  ethnicity: "Asian"
 lifestyle:
   smoking:
     status: never
-  alcohol:
-    consumption_level: light
 family_history:
-  - relative: mother
-    cancer_type: breast
     age_at_diagnosis: 48
-  - relative: aunt
-    cancer_type: ovarian
     age_at_diagnosis: 55
-personal_medical_history: {}
 female_specific:
   menstrual:
@@ -25,21 +41,17 @@ female_specific:
   parity:
     num_live_births: 2
     age_at_first_live_birth: 28
-current_concerns_or_symptoms: "Small lump in left breast. Fatigue and irregular periods."
-lab_results:
-  - test_name: "CA 15-3"
-    value: "32"
-    unit: "U/mL"
-    date: "2025-09-20"
-  - test_name: "Hemoglobin"
-    value: "12.8"
-    unit: "g/dL"
-    date: "2025-09-20"
-clinical_observations:
-  - test_name: "Mammogram"
-    value: "BI-RADS 4"
-    unit: "category"
-    date: "2025-09-20"

 demographics:
+  age_years: 52
   sex: female
+  ethnicity: asian
+  education_level: 4
+  anthropometrics:
+    height_cm: 165
+    weight_kg: 65
 lifestyle:
   smoking:
     status: never
+    cigarettes_per_day: 0
+    years_smoked: 0
+    pack_years: 0
+  alcohol_consumption: light
+  multivitamin_use: true
+  moderate_physical_activity_hours_per_day: 0.5
+  red_meat_consumption_oz_per_day: 2.0
 family_history:
+  - relation: mother
+    cancer_type: breast_cancer
     age_at_diagnosis: 48
+    degree: "1"
+    side: unknown
+  - relation: maternal_aunt
+    cancer_type: ovarian_cancer
     age_at_diagnosis: 55
+    degree: "2"
+    side: maternal
+personal_medical_history:
+  chronic_conditions: []
+  previous_cancers: []
+  nsaid_use: never
 female_specific:
   menstrual:
   parity:
     num_live_births: 2
     age_at_first_live_birth: 28
+  hormone_use:
+    estrogen_use: never
+symptoms:
+  - symptom_type: breast_lump
+  - symptom_type: weight_loss
+dermatologic:
+  region: central
+  complexion: medium
+  freckling: mild
+  female_tan: moderate
+  female_small_moles: five_to_eleven
+  solar_damage: false

examples/benchmark/benchmark_male.yaml CHANGED Viewed

@@ -1,42 +1,58 @@
 demographics:
-  age: 58
   sex: male
-  ethnicity: "Caucasian"
 lifestyle:
   smoking:
     status: former
     pack_years: 20
-  alcohol:
-    consumption_level: moderate
 family_history:
-  - relative: father
-    cancer_type: lung
     age_at_diagnosis: 67
-  - relative: brother
-    cancer_type: prostate
     age_at_diagnosis: 62
 personal_medical_history:
   chronic_conditions:
-    - "Type 2 diabetes"
-    - "Hypertension"
-current_concerns_or_symptoms: "Difficulty with urination and persistent cough."
-lab_results:
-  - test_name: "PSA"
-    value: "5.8"
-    unit: "ng/mL"
-    date: "2025-09-15"
-  - test_name: "Hemoglobin A1c"
-    value: "7.2"
-    unit: "%"
-    date: "2025-09-15"
-clinical_observations:
-  - test_name: "Blood Pressure"
-    value: "142/88"
-    unit: "mmHg"
-    date: "2025-09-15"

 demographics:
+  age_years: 58
   sex: male
+  ethnicity: white
+  education_level: 3
+  anthropometrics:
+    height_cm: 178
+    weight_kg: 92
 lifestyle:
   smoking:
     status: former
     pack_years: 20
+    cigarettes_per_day: 20
+    years_smoked: 20
+    years_since_quit: 5
+  alcohol_consumption: moderate
+  multivitamin_use: false
+  moderate_physical_activity_hours_per_day: 0.25
+  red_meat_consumption_oz_per_day: 4.0
 family_history:
+  - relation: father
+    cancer_type: lung_cancer
     age_at_diagnosis: 67
+    degree: "1"
+    side: unknown
+  - relation: brother
+    cancer_type: prostate_cancer
     age_at_diagnosis: 62
+    degree: "1"
+    side: unknown
 personal_medical_history:
   chronic_conditions:
+    - diabetes
+  previous_cancers: []
+  aspirin_use: never
+clinical_tests:
+  psa:
+    value_ng_ml: 5.8
+    date: 2025-09-15
+  dre:
+    result: normal
+    date: 2025-09-15
+symptoms:
+  - symptom_type: persistent_cough
+dermatologic:
+  region: northern
+  complexion: light
+  freckling: moderate
+  male_sunburn: true
+  male_has_two_or_more_big_moles: true
+  male_small_moles: seven_to_sixteen
+  solar_damage: true

prompts/instruction/assessment.md CHANGED Viewed

@@ -2,18 +2,22 @@ You will provide a structured JSON output as specified in the `FORMAT INSTRUCTIO
 ## Your Task
-Review the pre-computed risk scores in `USER INFORMATION` and synthesize them into a clear, structured assessment:
-1. **Analyze the risk scores**: Review each risk score provided in `RISK SCORES`. These scores have been calculated by validated risk models and represent the primary basis for the assessment.
-2. **Review clinical observations**: If `CLINICAL OBSERVATIONS` is not empty, carefully consider each item by comparing the `value` to the `reference_range` to identify abnormalities.
-3. **Apply diagnostic protocols**: For each relevant protocol in `DIAGNOSTIC PROTOCOLS`, determine the user's eligibility and recommended frequency based on their risk profile and demographic information.
-4. **Generate clear explanations**: Transform the technical risk data into user-friendly explanations that are empathetic, actionable, and evidence-based.
-5. **Critical review**: Before generating final output, verify that your recommendations are consistent with the risk scores and guidelines. Look for contradictions or omissions.
-6. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
-Your role is to explain and contextualize the pre-computed risk assessments, NOT to recalculate or second-guess them.

 ## Your Task
+The `RISK SCORES (GROUND TRUTH)` section contains deterministic risk scores calculated by validated risk models. **These scores are the authoritative source of truth and must not be contradicted or overridden.**
+Your role is to:
+1. **Explain the risk scores**: For each cancer type with calculated risk scores, provide clear, empathetic explanations of what these scores mean for the patient. Explain the scores in plain language without generating your own risk levels.
+2. **Identify contributing factors**: Analyze the patient's profile in `USER INFORMATION` to highlight the key risk factors that contributed to elevated risk scores. Explain WHY specific scores are higher based on the patient's demographics, lifestyle, medical history, and family history.
+3. **Review clinical observations**: If clinical observations are present, identify any abnormalities by comparing values to reference ranges and explain how these relate to the calculated risk scores.
+4. **Apply diagnostic protocols**: For each relevant protocol in `DIAGNOSTIC PROTOCOLS`, determine the patient's eligibility and recommended testing frequency based on their risk profile and demographic information.
+5. **Provide actionable insights**: Offer evidence-based recommendations and lifestyle advice that patients can use to understand and potentially modify their risk factors.
+6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
+7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
+**Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.

scripts/generate_documentation.py CHANGED Viewed

@@ -15,7 +15,6 @@ from annotated_types import Ge, Gt, Le, Lt
 from fpdf import FPDF
 from pydantic import BaseModel
-from sentinel.models import UserInput
 from sentinel.risk_models.base import RiskModel
 from sentinel.risk_models.qcancer import (
     FEMALE_CANCER_TYPES as QC_FEMALE_CANCERS,
@@ -23,6 +22,7 @@ from sentinel.risk_models.qcancer import (
 from sentinel.risk_models.qcancer import (
     MALE_CANCER_TYPES as QC_MALE_CANCERS,
 )
 # Constants
 HERE = Path(__file__).resolve().parent

 from fpdf import FPDF
 from pydantic import BaseModel
 from sentinel.risk_models.base import RiskModel
 from sentinel.risk_models.qcancer import (
     FEMALE_CANCER_TYPES as QC_FEMALE_CANCERS,
 from sentinel.risk_models.qcancer import (
     MALE_CANCER_TYPES as QC_MALE_CANCERS,
 )
+from sentinel.user_input import UserInput
 # Constants
 HERE = Path(__file__).resolve().parent

src/sentinel/api_clients/canrisk.py CHANGED Viewed

@@ -1513,7 +1513,7 @@ class CanRiskClient:
         stripped = relative.strip()
         if not stripped:
             return "Unknown"
-        compact = stripped.title().replace(" ", "")
         return compact[:20]
     @staticmethod

         stripped = relative.strip()
         if not stripped:
             return "Unknown"
+        compact = stripped.title().replace(" ", "").replace("_", "")
         return compact[:20]
     @staticmethod

src/sentinel/conversation.py CHANGED Viewed

@@ -7,7 +7,9 @@ from langchain_core.messages import get_buffer_string
 from langchain_core.runnables.base import Runnable
 from .llm_service import extract_thinking
-from .models import ConversationResponse, InitialAssessment, UserInput
 @dataclass
@@ -37,25 +39,45 @@ class ConversationManager:
             pairs.append((human, ai))
         return pairs
-    def initial_assessment(self, user: UserInput) -> InitialAssessment:
         """Run the structured assessment chain and record the exchange.
         Args:
             user: The user profile to assess.
         Returns:
             The structured InitialAssessment result.
         """
         self.user_json = user.model_dump_json()
-        prompt = self.structured_chain.prompt.format(user_data=self.user_json)
-        result = self.structured_chain.invoke({"user_data": self.user_json})
         if isinstance(result, InitialAssessment):
             data = result
         else:
             data = InitialAssessment.model_validate(result)
         # Add to history as a new interaction
-        self.chat_history.add_user_message(prompt)
         self.chat_history.add_ai_message(data.model_dump_json())
         return data

 from langchain_core.runnables.base import Runnable
 from .llm_service import extract_thinking
+from .models import ConversationResponse, InitialAssessment
+from .risk_aggregation import format_scores_for_llm, group_scores_by_cancer_type
+from .user_input import UserInput
 @dataclass
             pairs.append((human, ai))
         return pairs
+    def initial_assessment(
+        self, user: UserInput, risk_scores: list | None = None
+    ) -> InitialAssessment:
         """Run the structured assessment chain and record the exchange.
         Args:
             user: The user profile to assess.
+            risk_scores: Optional list of RiskScore objects. If not provided, will try to get from user.risk_scores.
         Returns:
             The structured InitialAssessment result.
         """
         self.user_json = user.model_dump_json()
+        # Extract and group risk scores
+        if risk_scores is None:
+            # Try to get from user if it has risk_scores attribute
+            risk_scores = getattr(user, "risk_scores", [])
+        grouped_scores = group_scores_by_cancer_type(risk_scores)
+        formatted_scores = format_scores_for_llm(grouped_scores)
+        # Invoke LLM with scores as separate context
+        result = self.structured_chain.invoke(
+            {"user_data": self.user_json, "risk_scores": formatted_scores}
+        )
         if isinstance(result, InitialAssessment):
             data = result
         else:
             data = InitialAssessment.model_validate(result)
+        # Attach the ground truth calculated scores
+        data.calculated_risk_scores = grouped_scores
         # Add to history as a new interaction
+        self.chat_history.add_user_message(
+            f"Initial assessment for user profile: {self.user_json}"
+        )
         self.chat_history.add_ai_message(data.model_dump_json())
         return data

src/sentinel/models.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Pydantic models and enums used across the Sentinel application."""
 import re
-from collections.abc import Iterable, Sequence
 from enum import Enum, IntEnum
 from typing import Any, Literal
@@ -1593,213 +1593,6 @@ class RiskScore(SentinelBaseModel):
     )
-# ---------------------------------------------------------------------------
-# Canonical user input
-# ---------------------------------------------------------------------------
-class UserInput(SentinelBaseModel):
-    """Top-level container for all input required by assessments."""
-    schema_version: str = Field(default="2025.10")
-    demographics: Demographics
-    lifestyle: Lifestyle
-    family_history: list[FamilyMemberCancer] = Field(default_factory=list)
-    personal_medical_history: PersonalMedicalHistory
-    female_specific: FemaleSpecific | None = None
-    current_concerns_or_symptoms: str | None = None
-    symptoms: list[SymptomEntry] = Field(default_factory=list)
-    clinical_observations: list[ClinicalObservation] = Field(default_factory=list)
-    lab_results: list[LabResult] = Field(default_factory=list)
-    screening_history: list[ScreeningEvent] = Field(default_factory=list)
-    medications: list[MedicationRecord] = Field(default_factory=list)
-    risk_scores: list[RiskScore] = Field(default_factory=list, alias="risks_scores")
-    notes: str | None = None
-    dermatologic: DermatologicProfile | None = None
-    @model_validator(mode="before")
-    def _legacy(cls, values: Any) -> Any:
-        if not isinstance(values, dict):
-            return values
-        data = dict(values)
-        for field_name in (
-            "clinical_observations",
-            "lab_results",
-            "screening_history",
-            "medications",
-            "family_history",
-            "symptoms",
-            "risk_scores",
-            "risks_scores",
-        ):
-            if field_name in data and data[field_name] is None:
-                data[field_name] = []
-        if "risks_scores" in data and "risk_scores" not in data:
-            data["risk_scores"] = data.pop("risks_scores")
-        return data
-    @property
-    def risks_scores(self) -> list[RiskScore]:
-        """Get risk scores list.
-        Returns:
-            List of risk scores.
-        """
-        return self.risk_scores
-    @risks_scores.setter
-    def risks_scores(self, value: Iterable[RiskScore]) -> None:
-        """Set risk scores list.
-        Args:
-            value: Risk scores to set.
-        """
-        self.risk_scores = list(value)
-    @property
-    def reproductive_history(self) -> FemaleSpecific | None:
-        """Get reproductive history.
-        Returns:
-            Reproductive history or None.
-        """
-        return self.female_specific
-    @reproductive_history.setter
-    def reproductive_history(self, value: FemaleSpecific | None) -> None:
-        """Set reproductive history.
-        Args:
-            value: Reproductive history to set.
-        """
-        self.female_specific = value
-    @property
-    def bmi(self) -> float | None:
-        """Get BMI value.
-        Returns:
-            BMI value or None.
-        """
-        return self.demographics.bmi
-    @property
-    def smoking_history(self) -> SmokingHistory:
-        """Get smoking history.
-        Returns:
-            Smoking history.
-        """
-        return self.lifestyle.smoking
-    @property
-    def is_current_or_former_smoker(self) -> bool:
-        """Check if user is current or former smoker.
-        Returns:
-            True if current or former smoker, False otherwise.
-        """
-        return self.lifestyle.smoking.status in {
-            SmokingStatus.CURRENT,
-            SmokingStatus.FORMER,
-        }
-    def _build_observation_index(self) -> dict[str, ClinicalObservation]:
-        """Build index of clinical observations by normalized name.
-        Returns:
-            Dictionary mapping normalized names to observations.
-        """
-        index: dict[str, ClinicalObservation] = {}
-        for obs in (*self.clinical_observations, *self.lab_results):
-            key = obs.normalized_name
-            if key and key not in index:
-                index[key] = obs
-        return index
-    def get_observation(self, names: Sequence[str]) -> ClinicalObservation | None:
-        """Get clinical observation by name.
-        Args:
-            names: Sequence of observation names to search for.
-        Returns:
-            Clinical observation or None.
-        """
-        index = self._build_observation_index()
-        for name in names:
-            key = _normalize_key(name)
-            if key in index:
-                return index[key]
-        return None
-    def get_observation_value(self, names: Sequence[str]) -> str | None:
-        """Get clinical observation value by name.
-        Args:
-            names: Sequence of observation names to search for.
-        Returns:
-            Observation value or None.
-        """
-        observation = self.get_observation(names)
-        return observation.value if observation else None
-    def get_numeric_observation(self, names: Sequence[str]) -> float | None:
-        """Get clinical observation numeric value by name.
-        Args:
-            names: Sequence of observation names to search for.
-        Returns:
-            Numeric observation value or None.
-        """
-        observation = self.get_observation(names)
-        return observation.numeric_value if observation else None
-    def has_family_history(
-        self, relations: Iterable[FamilyRelation], cancer_keywords: Iterable[str]
-    ) -> bool:
-        """Check if user has family history of specific cancer types.
-        Args:
-            relations: Family relations to check.
-            cancer_keywords: Cancer type keywords to search for.
-        Returns:
-            True if family history found, False otherwise.
-        """
-        relation_set = {FamilyRelation.normalize(rel) for rel in relations}
-        keywords = {kw.lower() for kw in cancer_keywords}
-        for record in self.family_history:
-            if relation_set and record.relation not in relation_set:
-                continue
-            if any(
-                keyword in (record.cancer_type or "").lower() for keyword in keywords
-            ):
-                return True
-        return False
-    def first_degree_cancer_count(self, cancer_keywords: Iterable[str]) -> int:
-        """Count first-degree relatives with specific cancer types.
-        Args:
-            cancer_keywords: Cancer type keywords to search for.
-        Returns:
-            Count of first-degree relatives with matching cancer types.
-        """
-        keywords = {kw.lower() for kw in cancer_keywords}
-        return sum(
-            1
-            for record in self.family_history
-            if record.is_first_degree
-            and any(
-                keyword in (record.cancer_type or "").lower() for keyword in keywords
-            )
-        )
 # ---------------------------------------------------------------------------
 # Assessment artefacts
 # ---------------------------------------------------------------------------
@@ -1872,24 +1665,38 @@ class InitialAssessment(SentinelBaseModel):
     overall_summary: str | None = Field(
         default=None, description="A high-level summary of the user's cancer risk."
     )
-    overall_risk_score: int | None = Field(
         default=None,
         description="A holistic score from 0 to 100 representing the user's overall cancer risk.",
         ge=0,
         le=100,
     )
     identified_risk_factors: list[RiskFactor] = Field(
         default_factory=list,
         description="A comprehensive list of all distinct risk factors identified from the user's profile.",
     )
-    risk_assessments: list[CancerRiskAssessment] = Field(
         default_factory=list,
-        description="Detailed risk assessments for specific cancers",
     )
     dx_recommendations: list[DxRecommendation] = Field(
         default_factory=list, description="Recommended diagnostic tests and protocols"
     )
 class ConversationResponse(SentinelBaseModel):
     """Structured response for conversational follow-ups."""

 """Pydantic models and enums used across the Sentinel application."""
 import re
+from collections.abc import Iterable
 from enum import Enum, IntEnum
 from typing import Any, Literal
     )
 # ---------------------------------------------------------------------------
 # Assessment artefacts
 # ---------------------------------------------------------------------------
     overall_summary: str | None = Field(
         default=None, description="A high-level summary of the user's cancer risk."
     )
+    overall_risk_score: float | None = Field(
         default=None,
         description="A holistic score from 0 to 100 representing the user's overall cancer risk.",
         ge=0,
         le=100,
     )
+    calculated_risk_scores: dict[str, list[RiskScore]] = Field(
+        default_factory=dict,
+        description="Deterministic risk scores grouped by cancer type (ground truth)",
+    )
     identified_risk_factors: list[RiskFactor] = Field(
         default_factory=list,
         description="A comprehensive list of all distinct risk factors identified from the user's profile.",
     )
+    llm_risk_interpretations: list[CancerRiskAssessment] = Field(
         default_factory=list,
+        description="LLM explanations and interpretations of calculated risk scores",
+        alias="risk_assessments",
     )
     dx_recommendations: list[DxRecommendation] = Field(
         default_factory=list, description="Recommended diagnostic tests and protocols"
     )
+    @property
+    def risk_assessments(self) -> list[CancerRiskAssessment]:
+        """Get LLM risk interpretations (legacy compatibility).
+        Returns:
+            List of cancer risk assessments.
+        """
+        return self.llm_risk_interpretations
 class ConversationResponse(SentinelBaseModel):
     """Structured response for conversational follow-ups."""

src/sentinel/prompting.py CHANGED Viewed

@@ -54,6 +54,7 @@ class PromptBuilder:
             "# PERSONA\n\n{persona}\n\n"
             "# CANCER MODULES\n\n{cancer_modules}\n\n"
             "# DIAGNOSTIC PROTOCOLS\n\n{protocols}\n\n"
             "# USER INFORMATION\n\n{user_data}\n\n"
             "# INSTRUCTIONS\n\n{instruction}\n\n"
             "# OUTPUT FORMAT INSTRUCTIONS (FOR INITIAL RESPONSE ONLY)\n\n{format_instructions}"

             "# PERSONA\n\n{persona}\n\n"
             "# CANCER MODULES\n\n{cancer_modules}\n\n"
             "# DIAGNOSTIC PROTOCOLS\n\n{protocols}\n\n"
+            "# RISK SCORES (GROUND TRUTH)\n\n{risk_scores}\n\n"
             "# USER INFORMATION\n\n{user_data}\n\n"
             "# INSTRUCTIONS\n\n{instruction}\n\n"
             "# OUTPUT FORMAT INSTRUCTIONS (FOR INITIAL RESPONSE ONLY)\n\n{format_instructions}"

src/sentinel/reporting.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import json
 import math
 from datetime import datetime
 import markdown2
@@ -35,8 +36,8 @@ from .models import (
     ContributionStrength,
     InitialAssessment,
     RiskFactorCategory,
-    UserInput,
 )
 # --- PDF Report Formatting Globals ---
 # Fonts
@@ -178,6 +179,7 @@ def generate_excel_report(
     wb = Workbook()
     _create_summary_sheet(wb, assessment, user_input)
     _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
     _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
@@ -223,7 +225,7 @@ def _create_summary_sheet(
     ws.cell(row=current_row, column=1, value="Demographics").font = bold_font
     current_row += 1
     demo_info = {
-        "Age": user_input.demographics.age,
         "Sex": user_input.demographics.sex,
         "Ethnicity": user_input.demographics.ethnicity,
     }
@@ -238,10 +240,9 @@ def _create_summary_sheet(
     ws.cell(row=current_row, column=1, value="Lifestyle").font = bold_font
     current_row += 1
     lifestyle_info = {
-        "Smoking Status": user_input.lifestyle.smoking_status,
-        "Pack Years": user_input.lifestyle.smoking_pack_years,
         "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
-        "Dietary Habits": user_input.lifestyle.dietary_habits,
         "Physical Activity": user_input.lifestyle.physical_activity_level,
     }
     for key, val in lifestyle_info.items():
@@ -253,16 +254,16 @@ def _create_summary_sheet(
     # Personal Medical History
     if user_input.personal_medical_history and (
-        user_input.personal_medical_history.known_genetic_mutations
         or user_input.personal_medical_history.previous_cancers
-        or user_input.personal_medical_history.chronic_illnesses
     ):
         ws.cell(
             row=current_row, column=1, value="Personal Medical History"
         ).font = bold_font
         current_row += 1
         pmh_texts = []
-        if user_input.personal_medical_history.known_genetic_mutations:
             ws.cell(
                 row=current_row, column=1, value="Known Genetic Mutations"
             ).font = bold_font
@@ -270,7 +271,8 @@ def _create_summary_sheet(
                 row=current_row,
                 column=2,
                 value=", ".join(
-                    user_input.personal_medical_history.known_genetic_mutations
                 ),
             ).alignment = wrap_alignment
             current_row += 1
@@ -281,17 +283,22 @@ def _create_summary_sheet(
             ws.cell(
                 row=current_row,
                 column=2,
-                value=", ".join(user_input.personal_medical_history.previous_cancers),
             ).alignment = wrap_alignment
             current_row += 1
-        if user_input.personal_medical_history.chronic_illnesses:
             ws.cell(
-                row=current_row, column=1, value="Chronic Illnesses"
             ).font = bold_font
             ws.cell(
                 row=current_row,
                 column=2,
-                value=", ".join(user_input.personal_medical_history.chronic_illnesses),
             ).alignment = wrap_alignment
             current_row += 1
         current_row += 1
@@ -301,7 +308,7 @@ def _create_summary_sheet(
         ws.cell(row=current_row, column=1, value="Family History").font = bold_font
         current_row += 1
         family_texts = [
-            f"{mem.relative} ({mem.cancer_type} at age {mem.age_at_diagnosis or 'N/A'})"
             for mem in user_input.family_history
         ]
         ws.cell(
@@ -313,12 +320,16 @@ def _create_summary_sheet(
     if user_input.female_specific:
         ws.cell(row=current_row, column=1, value="Female-Specific").font = bold_font
         current_row += 1
         female_specific_info = {
-            "Age at first period": user_input.female_specific.age_at_first_period,
-            "Age at menopause": user_input.female_specific.age_at_menopause,
-            "Number of live births": user_input.female_specific.num_live_births,
-            "Age at first live birth": user_input.female_specific.age_at_first_live_birth,
-            "Hormone therapy use": user_input.female_specific.hormone_therapy_use,
         }
         for key, val in female_specific_info.items():
             ws.cell(row=current_row, column=1, value=key).font = bold_font
@@ -328,35 +339,19 @@ def _create_summary_sheet(
             current_row += 1
         current_row += 1
-    # Current Concerns
-    if user_input.current_concerns_or_symptoms:
-        ws.cell(row=current_row, column=1, value="Current Concerns").font = bold_font
         current_row += 1
         ws.cell(
-            row=current_row, column=2, value=user_input.current_concerns_or_symptoms
         ).alignment = wrap_alignment
         current_row += 2
-    # Clinical Observations
-    if user_input.clinical_observations:
-        ws.merge_cells(f"A{current_row}:F{current_row}")
-        ws.cell(
-            row=current_row, column=1, value="Clinical Observations"
-        ).font = bold_font
-        current_row += 1
-        headers = ["Test Name", "Value", "Unit", "Reference Range", "Date"]
-        for col_idx, header in enumerate(headers, 1):
-            cell = ws.cell(row=current_row, column=col_idx, value=header)
-            cell.font = header_font
-            cell.fill = header_fill
-        for obs in user_input.clinical_observations:
-            current_row += 1
-            ws.cell(row=current_row, column=1, value=obs.test_name)
-            ws.cell(row=current_row, column=2, value=obs.value)
-            ws.cell(row=current_row, column=3, value=obs.unit)
-            ws.cell(row=current_row, column=4, value=obs.reference_range)
-            ws.cell(row=current_row, column=5, value=obs.date)
-        current_row += 1
     ws.merge_cells(
         start_row=current_row, start_column=1, end_row=current_row, end_column=6
@@ -457,6 +452,73 @@ def _create_summary_sheet(
     ws.column_dimensions["F"].width = 30
 def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
     ws = wb.create_sheet(title)
     pretty_json = json.dumps(data, indent=2)
@@ -651,7 +713,7 @@ def generate_pdf_report(
     add_section(
         "Demographics",
         {
-            "Age": user_input.demographics.age,
             "Sex": user_input.demographics.sex,
             "Ethnicity": user_input.demographics.ethnicity or "N/A",
         },
@@ -661,10 +723,9 @@ def generate_pdf_report(
     add_section(
         "Lifestyle",
         {
-            "Smoking Status": user_input.lifestyle.smoking_status,
-            "Pack Years": user_input.lifestyle.smoking_pack_years or "N/A",
             "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
-            "Dietary Habits": user_input.lifestyle.dietary_habits or "N/A",
             "Physical Activity": user_input.lifestyle.physical_activity_level or "N/A",
         },
     )
@@ -672,21 +733,27 @@ def generate_pdf_report(
     # --- Personal Medical History ---
     pmh = user_input.personal_medical_history
     if pmh and (
-        pmh.known_genetic_mutations or pmh.previous_cancers or pmh.chronic_illnesses
     ):
         pmh_data = {}
-        if pmh.known_genetic_mutations:
-            pmh_data["Known Genetic Mutations"] = ", ".join(pmh.known_genetic_mutations)
         if pmh.previous_cancers:
-            pmh_data["Previous Cancers"] = ", ".join(pmh.previous_cancers)
-        if pmh.chronic_illnesses:
-            pmh_data["Chronic Illnesses"] = ", ".join(pmh.chronic_illnesses)
         add_section("Personal Medical History", pmh_data)
     # --- Family History ---
     if user_input.family_history:
         family_texts = [
-            f"{mem.relative} - {mem.cancer_type} (Age: {mem.age_at_diagnosis or 'N/A'})"
             for mem in user_input.family_history
         ]
         add_list_section("Family History", family_texts)
@@ -695,80 +762,50 @@ def generate_pdf_report(
     fs = user_input.female_specific
     if fs:
         fs_data = {}
-        if fs.age_at_first_period is not None:
-            fs_data["Age at first period"] = fs.age_at_first_period
-        if fs.age_at_menopause is not None:
-            fs_data["Age at menopause"] = fs.age_at_menopause
-        if fs.num_live_births is not None:
-            fs_data["Number of live births"] = fs.num_live_births
-        if fs.age_at_first_live_birth is not None:
-            fs_data["Age at first live birth"] = fs.age_at_first_live_birth
-        if fs.hormone_therapy_use:
-            fs_data["Hormone therapy"] = fs.hormone_therapy_use
-        add_section("Female-Specific", fs_data)
-    # --- Current Concerns ---
-    if user_input.current_concerns_or_symptoms:
-        add_list_section("Current Concerns", [user_input.current_concerns_or_symptoms])
     story.append(Spacer(1, SPACER_NORMAL))
-    # --- Clinical Observations Table ---
-    if user_input.clinical_observations:
-        story.append(Paragraph("Clinical Observations", subheading_style))
-        obs_data = [
-            [
-                Paragraph(h, table_header_style)
-                for h in ["Test", "Value", "Unit", "Range", "Date"]
-            ]
-        ]
-        obs_style_cmds = [
-            (
-                "BACKGROUND",
-                (0, 0),
-                (-1, 0),
-                colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
-            ),
-            ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
-            ("GRID", (0, 0), (-1, -1), 1, colors.black),
-            ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
-            ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
-            ("TOPPADDING", (0, 0), (-1, -1), 4),
-        ]
-        for obs in user_input.clinical_observations:
-            obs_data.append(
-                [
-                    Paragraph(obs.test_name, table_body_style),
-                    Paragraph(obs.value, table_body_style),
-                    Paragraph(obs.unit, table_body_style),
-                    Paragraph(obs.reference_range or "N/A", table_body_style),
-                    Paragraph(obs.date or "N/A", table_body_style),
-                ]
-            )
-        obs_widths = [1.75 * inch, 0.75 * inch, 0.75 * inch, 1.75 * inch, 1.5 * inch]
-        scaled_widths = [w * (CONTENT_WIDTH / sum(obs_widths)) for w in obs_widths]
-        obs_table = Table(
-            obs_data, colWidths=scaled_widths, style=obs_style_cmds, splitByRow=1
-        )
-        story.append(obs_table)
-        story.append(Spacer(1, SPACER_NORMAL))
-    # --- Risk Scores Table ---
-    if user_input.risks_scores:
-        story.append(Paragraph("Risk Scores", subheading_style))
-        obs_data = [
             [
                 Paragraph(h, table_header_style)
-                for h in [
-                    "Model",
-                    "Score",
-                    "Cancer Type",
-                    "Description",
-                    "Interpretation",
-                ]
             ]
         ]
-        obs_style_cmds = [
             (
                 "BACKGROUND",
                 (0, 0),
@@ -781,28 +818,29 @@ def generate_pdf_report(
             ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
             ("TOPPADDING", (0, 0), (-1, -1), 4),
         ]
-        for risk_score in user_input.risks_scores:
-            obs_data.append(
-                [
-                    Paragraph(risk_score.name, table_body_style),
-                    Paragraph(risk_score.score, table_body_style),
-                    Paragraph(risk_score.cancer_type or "N/A", table_body_style),
-                    Paragraph(risk_score.description or "N/A", table_body_style),
-                    Paragraph(risk_score.interpretation or "N/A", table_body_style),
-                ]
-            )
-        obs_widths = [1.75 * inch, 0.75 * inch, 0.75 * inch, 1.75 * inch, 1.5 * inch]
-        scaled_widths = [w * (CONTENT_WIDTH / sum(obs_widths)) for w in obs_widths]
-        obs_table = Table(
-            obs_data, colWidths=scaled_widths, style=obs_style_cmds, splitByRow=1
         )
-        story.append(obs_table)
         story.append(Spacer(1, SPACER_NORMAL))
-    story.append(PageBreak())
-    story.append(Paragraph("Assessment", heading_style))
-    story.append(Spacer(1, SPACER_NORMAL))
     # --- New 3-Column Summary Section ---
     headers = [
         Paragraph("<b>Overall Risk Score</b>", summary_header_style),
@@ -910,13 +948,12 @@ def generate_pdf_report(
     story.append(Spacer(1, SPACER_NORMAL))
-    story.append(Paragraph("Detailed Risk Assessments", subheading_style))
     story.append(Spacer(1, SPACER_SMALL))
     risk_intro_text = """
-    The following table outlines your personalized cancer risk assessment. The risk level is graded
-    on a scale from 1 (lowest risk) to 5 (highest risk) based on the information provided.
-    Additional detail on the contributing risk factors and possible recommendation are then
-    provided for any and all higher risk cancers (scoring 3-5).
     """
     story.append(Paragraph(risk_intro_text, styles["BodyText"]))
     story.append(Spacer(1, SPACER_SMALL))
@@ -1497,8 +1534,6 @@ def _calculate_risk_points(
     Returns:
         Mapping of RiskFactorCategory to integer points.
     """
-    from collections import defaultdict
     risk_points_by_category = defaultdict(int)
     strength_to_points = {
         ContributionStrength.MAJOR: 5,
@@ -1708,8 +1743,6 @@ def _create_risk_factor_table(
     Returns:
         A ReportLab Table or Paragraph to insert in the story.
     """
-    from collections import defaultdict
     if not assessment.identified_risk_factors:
         return Paragraph("No specific risk factors identified.", panel_body_style)

 import json
 import math
+from collections import defaultdict
 from datetime import datetime
 import markdown2
     ContributionStrength,
     InitialAssessment,
     RiskFactorCategory,
 )
+from .user_input import UserInput
 # --- PDF Report Formatting Globals ---
 # Fonts
     wb = Workbook()
     _create_summary_sheet(wb, assessment, user_input)
+    _create_risk_scores_sheet(wb, assessment)
     _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
     _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
     ws.cell(row=current_row, column=1, value="Demographics").font = bold_font
     current_row += 1
     demo_info = {
+        "Age": user_input.demographics.age_years,
         "Sex": user_input.demographics.sex,
         "Ethnicity": user_input.demographics.ethnicity,
     }
     ws.cell(row=current_row, column=1, value="Lifestyle").font = bold_font
     current_row += 1
     lifestyle_info = {
+        "Smoking Status": user_input.lifestyle.smoking.status,
+        "Pack Years": user_input.lifestyle.smoking.pack_years,
         "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
         "Physical Activity": user_input.lifestyle.physical_activity_level,
     }
     for key, val in lifestyle_info.items():
     # Personal Medical History
     if user_input.personal_medical_history and (
+        user_input.personal_medical_history.genetic_mutations
         or user_input.personal_medical_history.previous_cancers
+        or user_input.personal_medical_history.chronic_conditions
     ):
         ws.cell(
             row=current_row, column=1, value="Personal Medical History"
         ).font = bold_font
         current_row += 1
         pmh_texts = []
+        if user_input.personal_medical_history.genetic_mutations:
             ws.cell(
                 row=current_row, column=1, value="Known Genetic Mutations"
             ).font = bold_font
                 row=current_row,
                 column=2,
                 value=", ".join(
+                    str(m)
+                    for m in user_input.personal_medical_history.genetic_mutations
                 ),
             ).alignment = wrap_alignment
             current_row += 1
             ws.cell(
                 row=current_row,
                 column=2,
+                value=", ".join(
+                    str(c) for c in user_input.personal_medical_history.previous_cancers
+                ),
             ).alignment = wrap_alignment
             current_row += 1
+        if user_input.personal_medical_history.chronic_conditions:
             ws.cell(
+                row=current_row, column=1, value="Chronic Conditions"
             ).font = bold_font
             ws.cell(
                 row=current_row,
                 column=2,
+                value=", ".join(
+                    str(c)
+                    for c in user_input.personal_medical_history.chronic_conditions
+                ),
             ).alignment = wrap_alignment
             current_row += 1
         current_row += 1
         ws.cell(row=current_row, column=1, value="Family History").font = bold_font
         current_row += 1
         family_texts = [
+            f"{mem.relation} ({mem.cancer_type} at age {mem.age_at_diagnosis or 'N/A'})"
             for mem in user_input.family_history
         ]
         ws.cell(
     if user_input.female_specific:
         ws.cell(row=current_row, column=1, value="Female-Specific").font = bold_font
         current_row += 1
+        fs = user_input.female_specific
         female_specific_info = {
+            "Age at first period": fs.menstrual.age_at_menarche
+            if fs.menstrual
+            else None,
+            "Age at menopause": fs.menstrual.age_at_menopause if fs.menstrual else None,
+            "Number of live births": fs.parity.num_live_births if fs.parity else None,
+            "Age at first live birth": fs.parity.age_at_first_live_birth
+            if fs.parity
+            else None,
         }
         for key, val in female_specific_info.items():
             ws.cell(row=current_row, column=1, value=key).font = bold_font
             current_row += 1
         current_row += 1
+    # Current Symptoms
+    if user_input.symptoms:
+        ws.cell(row=current_row, column=1, value="Current Symptoms").font = bold_font
         current_row += 1
+        symptom_texts = [str(s.symptom_type) for s in user_input.symptoms]
         ws.cell(
+            row=current_row, column=2, value=", ".join(symptom_texts)
         ).alignment = wrap_alignment
         current_row += 2
+    # Note: clinical_observations doesn't exist in user_input.UserInput (strict schema)
+    # The strict schema uses clinical_tests instead (PSA, DRE, etc.)
+    # Skipping this section as it requires restructuring
     ws.merge_cells(
         start_row=current_row, start_column=1, end_row=current_row, end_column=6
     ws.column_dimensions["F"].width = 30
+def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> None:
+    """Create a worksheet with calculated risk scores.
+    Args:
+        wb: An openpyxl workbook.
+        assessment: The structured initial assessment containing calculated scores.
+    """
+    ws = wb.create_sheet("Risk Model Scores")
+    title_font = Font(bold=True, size=16, name="Calibri")
+    header_font = Font(bold=True, color=HEX_COLORS["header_font"], name="Calibri")
+    header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
+    wrap_alignment = Alignment(wrap_text=True, vertical="top")
+    ws.merge_cells("A1:E1")
+    ws["A1"] = "Calculated Risk Scores (Ground Truth)"
+    ws["A1"].font = title_font
+    ws["A1"].alignment = Alignment(horizontal="center")
+    ws.merge_cells("A2:E2")
+    ws["A2"] = "Scores calculated using validated clinical risk models"
+    ws["A2"].alignment = Alignment(horizontal="center")
+    current_row = 4
+    if not assessment.calculated_risk_scores:
+        ws.cell(row=current_row, column=1, value="No risk scores calculated")
+        return
+    # Create headers
+    headers = ["Cancer Type", "Model Name", "Score", "Interpretation", "References"]
+    for col_idx, header in enumerate(headers, 1):
+        cell = ws.cell(row=current_row, column=col_idx, value=header)
+        cell.font = header_font
+        cell.fill = header_fill
+    current_row += 1
+    # Add risk scores grouped by cancer type
+    for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
+        for i, score in enumerate(scores):
+            # Show cancer type only on first row for each cancer
+            if i == 0:
+                ws.cell(row=current_row, column=1, value=cancer_type)
+            ws.cell(row=current_row, column=2, value=score.name)
+            ws.cell(row=current_row, column=3, value=score.score or "N/A")
+            interp_cell = ws.cell(
+                row=current_row, column=4, value=score.interpretation or "N/A"
+            )
+            interp_cell.alignment = wrap_alignment
+            refs = "; ".join(score.references) if score.references else "N/A"
+            refs_cell = ws.cell(row=current_row, column=5, value=refs)
+            refs_cell.alignment = wrap_alignment
+            current_row += 1
+    # Set column widths
+    ws.column_dimensions["A"].width = 20
+    ws.column_dimensions["B"].width = 25
+    ws.column_dimensions["C"].width = 15
+    ws.column_dimensions["D"].width = 50
+    ws.column_dimensions["E"].width = 40
 def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
     ws = wb.create_sheet(title)
     pretty_json = json.dumps(data, indent=2)
     add_section(
         "Demographics",
         {
+            "Age": user_input.demographics.age_years,
             "Sex": user_input.demographics.sex,
             "Ethnicity": user_input.demographics.ethnicity or "N/A",
         },
     add_section(
         "Lifestyle",
         {
+            "Smoking Status": user_input.lifestyle.smoking.status,
+            "Pack Years": user_input.lifestyle.smoking.pack_years or "N/A",
             "Alcohol Consumption": user_input.lifestyle.alcohol_consumption,
             "Physical Activity": user_input.lifestyle.physical_activity_level or "N/A",
         },
     )
     # --- Personal Medical History ---
     pmh = user_input.personal_medical_history
     if pmh and (
+        pmh.genetic_mutations or pmh.previous_cancers or pmh.chronic_conditions
     ):
         pmh_data = {}
+        if pmh.genetic_mutations:
+            pmh_data["Known Genetic Mutations"] = ", ".join(
+                str(m) for m in pmh.genetic_mutations
+            )
         if pmh.previous_cancers:
+            pmh_data["Previous Cancers"] = ", ".join(
+                str(c) for c in pmh.previous_cancers
+            )
+        if pmh.chronic_conditions:
+            pmh_data["Chronic Conditions"] = ", ".join(
+                str(c) for c in pmh.chronic_conditions
+            )
         add_section("Personal Medical History", pmh_data)
     # --- Family History ---
     if user_input.family_history:
         family_texts = [
+            f"{mem.relation} - {mem.cancer_type} (Age: {mem.age_at_diagnosis or 'N/A'})"
             for mem in user_input.family_history
         ]
         add_list_section("Family History", family_texts)
     fs = user_input.female_specific
     if fs:
         fs_data = {}
+        if fs.menstrual and fs.menstrual.age_at_menarche is not None:
+            fs_data["Age at first period"] = fs.menstrual.age_at_menarche
+        if fs.menstrual and fs.menstrual.age_at_menopause is not None:
+            fs_data["Age at menopause"] = fs.menstrual.age_at_menopause
+        if fs.parity and fs.parity.num_live_births is not None:
+            fs_data["Number of live births"] = fs.parity.num_live_births
+        if fs.parity and fs.parity.age_at_first_live_birth is not None:
+            fs_data["Age at first live birth"] = fs.parity.age_at_first_live_birth
+        if fs_data:  # Only add section if we have data
+            add_section("Female-Specific", fs_data)
+    # --- Current Symptoms ---
+    if user_input.symptoms:
+        symptom_texts = [str(s.symptom_type) for s in user_input.symptoms]
+        add_list_section("Current Symptoms", symptom_texts)
     story.append(Spacer(1, SPACER_NORMAL))
+    # Note: clinical_observations doesn't exist in user_input.UserInput (strict schema)
+    # The strict schema uses clinical_tests instead - skipping this section
+    story.append(PageBreak())
+    story.append(Paragraph("Assessment", heading_style))
+    story.append(Spacer(1, SPACER_NORMAL))
+    # --- NEW: Calculated Risk Scores Section ---
+    if assessment.calculated_risk_scores:
+        story.append(Paragraph("Calculated Risk Scores", subheading_style))
+        story.append(Spacer(1, SPACER_SMALL))
+        risk_scores_intro = """
+        The following risk scores have been calculated using validated clinical risk models.
+        Each score represents a quantitative assessment based on your specific profile.
+        """
+        story.append(Paragraph(risk_scores_intro, styles["BodyText"]))
+        story.append(Spacer(1, SPACER_SMALL))
+        # Create table for calculated risk scores
+        score_data = [
             [
                 Paragraph(h, table_header_style)
+                for h in ["Cancer Type", "Model", "Score", "Interpretation"]
             ]
         ]
+        score_style_cmds = [
             (
                 "BACKGROUND",
                 (0, 0),
             ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
             ("TOPPADDING", (0, 0), (-1, -1), 4),
         ]
+        # Sort by cancer type and add rows
+        for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
+            for i, score in enumerate(scores):
+                # Only show cancer type on first row for each cancer
+                cancer_cell = Paragraph(cancer_type, table_body_style) if i == 0 else ""
+                score_data.append(
+                    [
+                        cancer_cell,
+                        Paragraph(score.name, table_body_style),
+                        Paragraph(score.score or "N/A", table_body_style),
+                        Paragraph(score.interpretation or "N/A", table_body_style),
+                    ]
+                )
+        score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
+        scaled_widths = [w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths]
+        scores_table = Table(
+            score_data, colWidths=scaled_widths, style=score_style_cmds, splitByRow=1
         )
+        story.append(scores_table)
         story.append(Spacer(1, SPACER_NORMAL))
     # --- New 3-Column Summary Section ---
     headers = [
         Paragraph("<b>Overall Risk Score</b>", summary_header_style),
     story.append(Spacer(1, SPACER_NORMAL))
+    story.append(Paragraph("AI-Generated Risk Interpretations", subheading_style))
     story.append(Spacer(1, SPACER_SMALL))
     risk_intro_text = """
+    The following interpretations provide context and explanation for the calculated risk scores above.
+    These AI-generated insights identify key contributing factors and provide actionable recommendations.
+    For cancers with higher risk levels (3-5), additional details on risk factors and recommendations are provided.
     """
     story.append(Paragraph(risk_intro_text, styles["BodyText"]))
     story.append(Spacer(1, SPACER_SMALL))
     Returns:
         Mapping of RiskFactorCategory to integer points.
     """
     risk_points_by_category = defaultdict(int)
     strength_to_points = {
         ContributionStrength.MAJOR: 5,
     Returns:
         A ReportLab Table or Paragraph to insert in the story.
     """
     if not assessment.identified_risk_factors:
         return Paragraph("No specific risk factors identified.", panel_body_style)

src/sentinel/risk_aggregation.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""Risk score aggregation and formatting utilities."""
+from collections import defaultdict
+from .models import RiskScore
+def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
+    """Group risk scores by cancer type.
+    Args:
+        scores: List of risk scores from various models.
+    Returns:
+        Dictionary mapping cancer type to list of risk scores.
+    Raises:
+        ValueError: If any score is missing a cancer_type.
+    """
+    grouped: dict[str, list[RiskScore]] = defaultdict(list)
+    for score in scores:
+        if not score.cancer_type:
+            raise ValueError(
+                f"Risk score '{score.name}' is missing cancer_type. "
+                "All risk scores must have a cancer_type specified."
+            )
+        # Normalize cancer type for grouping
+        cancer_type = score.cancer_type.strip()
+        grouped[cancer_type].append(score)
+    # Convert defaultdict to regular dict and sort by cancer type
+    return dict(sorted(grouped.items()))
+def format_scores_for_llm(grouped_scores: dict[str, list[RiskScore]]) -> str:
+    """Format grouped risk scores for LLM context.
+    Args:
+        grouped_scores: Dictionary mapping cancer type to list of risk scores.
+    Returns:
+        Formatted string representation of all risk scores.
+    """
+    if not grouped_scores:
+        return "No risk scores calculated."
+    lines = []
+    lines.append("# Calculated Risk Scores (Ground Truth)\n")
+    lines.append(
+        "The following risk scores have been calculated using validated models:\n"
+    )
+    for cancer_type, scores in grouped_scores.items():
+        lines.append(f"\n## {cancer_type}\n")
+        for score in scores:
+            lines.append(f"### {score.name}")
+            lines.append(f"- **Score**: {score.score}")
+            if score.description:
+                lines.append(f"- **Description**: {score.description}")
+            if score.interpretation:
+                lines.append(f"- **Interpretation**: {score.interpretation}")
+            if score.references:
+                refs = "; ".join(score.references)
+                lines.append(f"- **References**: {refs}")
+            lines.append("")  # Empty line between models
+    lines.append("\n---\n")
+    lines.append("**Important**: These scores are the ground truth. Your task is to:")
+    lines.append("1. Explain what these scores mean for the patient in clear language")
+    lines.append(
+        "2. Identify and highlight key risk factors contributing to elevated scores"
+    )
+    lines.append("3. Provide actionable context and insights based on these scores")
+    lines.append(
+        "4. DO NOT generate your own risk levels - explain and contextualize the calculated ones\n"
+    )
+    return "\n".join(lines)
+def format_scores_for_pdf(
+    grouped_scores: dict[str, list[RiskScore]],
+) -> list[tuple[str, list[RiskScore]]]:
+    """Format grouped risk scores for PDF presentation.
+    Args:
+        grouped_scores: Dictionary mapping cancer type to list of risk scores.
+    Returns:
+        List of (cancer_type, scores) tuples sorted by cancer type.
+    """
+    return sorted(grouped_scores.items())

src/sentinel/risk_models/qcancer.py CHANGED Viewed

@@ -23,6 +23,7 @@ from typing import Annotated
 from pydantic import Field
 from sentinel.risk_models.base import RiskModel
 from sentinel.user_input import (
     AlcoholConsumption,
@@ -1780,6 +1781,107 @@ class QCancerRiskModel(RiskModel):
             "Values sum to 100% and reflect relative likelihoods over the next 10 years; higher percentages warrant clinical review."
         )
     def references(self) -> list[str]:
         return [
             "Hippisley-Cox J, Coupland C. QCancer (10 year risk) BMJ. 2014;349:g4606.",

 from pydantic import Field
+from sentinel.models import RiskScore
 from sentinel.risk_models.base import RiskModel
 from sentinel.user_input import (
     AlcoholConsumption,
             "Values sum to 100% and reflect relative likelihoods over the next 10 years; higher percentages warrant clinical review."
         )
+    def run(self, user: UserInput) -> list:
+        """Compute QCancer scores and return as list of RiskScore objects.
+        Overrides base class to return multiple scores (one per cancer type).
+        Args:
+            user: The user profile to score.
+        Returns:
+            List of RiskScore objects, one for each cancer type assessed.
+        """
+        sex = (user.demographics.sex or "").strip().lower()
+        scores = []
+        try:
+            if sex.startswith("f"):
+                params = self._extract_female_params(user)
+                probabilities = compute_female_probabilities(**params)
+                scores = self._create_individual_scores(probabilities, is_female=True)
+            elif sex.startswith("m"):
+                params = self._extract_male_params(user)
+                probabilities = compute_male_probabilities(**params)
+                scores = self._create_individual_scores(probabilities, is_female=False)
+            else:
+                # Return single N/A score
+                scores = [
+                    RiskScore(
+                        name=self.name,
+                        score="N/A: QCancer requires patient sex (male or female).",
+                        cancer_type="Multiple Cancer Sites",
+                        description=self.description(),
+                        interpretation=self.interpretation(),
+                        references=self.references(),
+                    )
+                ]
+        except ValueError as exc:
+            # Return single N/A score with error message
+            scores = [
+                RiskScore(
+                    name=self.name,
+                    score=f"N/A: {exc}",
+                    cancer_type="Multiple Cancer Sites",
+                    description=self.description(),
+                    interpretation=self.interpretation(),
+                    references=self.references(),
+                )
+            ]
+        return scores
+    def _create_individual_scores(
+        self, risks: dict[str, float], is_female: bool
+    ) -> list:
+        """Create individual RiskScore objects for each cancer type.
+        Args:
+            risks: Dictionary of cancer names to probabilities.
+            is_female: Whether results are for female patient.
+        Returns:
+            List of RiskScore objects.
+        """
+        from sentinel.models import RiskScore
+        order = FEMALE_CANCER_TYPES if is_female else MALE_CANCER_TYPES
+        scores = []
+        # Add "No Cancer" score first
+        no_cancer_pct = risks.get("none", 0.0)
+        scores.append(
+            RiskScore(
+                name="QCancer",
+                score=f"{no_cancer_pct:.1f}%",
+                cancer_type="No Cancer",
+                description="10-year probability of not developing cancer",
+                interpretation="Baseline probability - higher values indicate lower overall cancer risk",
+                references=self.references(),
+            )
+        )
+        # Add each cancer type
+        for cancer_name in order:
+            pct = risks.get(cancer_name, 0.0)
+            display_name = cancer_name.replace("_", " ").title()
+            scores.append(
+                RiskScore(
+                    name="QCancer",
+                    score=f"{pct:.1f}%",
+                    cancer_type=display_name,
+                    description=f"10-year probability of {display_name.lower()}",
+                    interpretation=(
+                        "Percentages reflect relative likelihood over next 10 years. "
+                        "Values >1% warrant clinical review."
+                    ),
+                    references=self.references(),
+                )
+            )
+        return scores
     def references(self) -> list[str]:
         return [
             "Hippisley-Cox J, Coupland C. QCancer (10 year risk) BMJ. 2014;349:g4606.",

src/sentinel/utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Literal
 import yaml
-from .models import UserInput
 def load_user_file(source: str | Any) -> UserInput:

 import yaml
+from .user_input import UserInput
 def load_user_file(source: str | Any) -> UserInput:

tests/test_conversation.py CHANGED Viewed

@@ -4,19 +4,36 @@ from unittest.mock import MagicMock, patch
 from sentinel.conversation import ConversationManager
 from sentinel.models import (
     ConversationResponse,
-    Demographics,
     InitialAssessment,
     Lifestyle,
     PersonalMedicalHistory,
     UserInput,
 )
 def sample_user() -> UserInput:
     return UserInput(
-        demographics=Demographics(age=30, sex="male"),
-        lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="none"),
-        personal_medical_history=PersonalMedicalHistory(),
         family_history=[],
     )
@@ -25,11 +42,10 @@ def sample_user() -> UserInput:
 @patch("sentinel.llm_service.create_conversational_chain")
 def test_conversation_flow(mock_create_conversational_chain, mock_create_initial_chain):
     structured = MagicMock()
-    structured.prompt.format.return_value = "full prompt"
     freeform = MagicMock()
     structured.invoke.return_value = {
         "overall_summary": "ok",
-        "risk_assessments": [],
         "dx_recommendations": [],
     }
     freeform.invoke.return_value = "hi"
@@ -37,15 +53,21 @@ def test_conversation_flow(mock_create_conversational_chain, mock_create_initial
     mock_create_conversational_chain.return_value = freeform
     conv = ConversationManager(structured, freeform)
-    result = conv.initial_assessment(sample_user())
     assert isinstance(result, InitialAssessment)
     assert result.overall_summary == "ok"
-    assert conv.history == [("full prompt", result.model_dump_json())]
     answer = conv.follow_up("question")
     assert isinstance(answer, ConversationResponse)
     assert answer.response == "hi"
-    assert conv.history == [
-        ("full prompt", result.model_dump_json()),
-        ("question", "hi"),
-    ]

 from sentinel.conversation import ConversationManager
 from sentinel.models import (
     ConversationResponse,
     InitialAssessment,
+)
+from sentinel.user_input import (
+    Anthropometrics,
+    Demographics,
     Lifestyle,
     PersonalMedicalHistory,
+    SmokingHistory,
     UserInput,
 )
 def sample_user() -> UserInput:
     return UserInput(
+        demographics=Demographics(
+            age_years=30,
+            sex="male",
+            anthropometrics=Anthropometrics(height_cm=175, weight_kg=70),
+        ),
+        lifestyle=Lifestyle(
+            smoking=SmokingHistory(
+                status="never",
+                cigarettes_per_day=0,
+                years_smoked=0,
+            ),
+        ),
+        personal_medical_history=PersonalMedicalHistory(
+            chronic_conditions=[],
+            previous_cancers=[],
+        ),
         family_history=[],
     )
 @patch("sentinel.llm_service.create_conversational_chain")
 def test_conversation_flow(mock_create_conversational_chain, mock_create_initial_chain):
     structured = MagicMock()
     freeform = MagicMock()
     structured.invoke.return_value = {
         "overall_summary": "ok",
+        "llm_risk_interpretations": [],
         "dx_recommendations": [],
     }
     freeform.invoke.return_value = "hi"
     mock_create_conversational_chain.return_value = freeform
     conv = ConversationManager(structured, freeform)
+    user = sample_user()
+    result = conv.initial_assessment(user)
     assert isinstance(result, InitialAssessment)
     assert result.overall_summary == "ok"
+    assert result.calculated_risk_scores == {}
+    # Verify history contains initial assessment message
+    assert len(conv.history) == 1
+    assert conv.history[0][0].startswith("Initial assessment for user profile:")
+    assert conv.history[0][1] == result.model_dump_json()
     answer = conv.follow_up("question")
     assert isinstance(answer, ConversationResponse)
     assert answer.response == "hi"
+    # Verify follow-up added to history
+    assert len(conv.history) == 2
+    assert conv.history[1] == ("question", "hi")

tests/test_demo.py CHANGED Viewed

@@ -7,20 +7,31 @@ import yaml
 from sentinel.models import (
     CancerRiskAssessment,
-    ClinicalObservation,
     ContributingFactor,
     ContributionStrength,
-    Demographics,
     DxRecommendation,
-    FamilyMemberCancer,
     InitialAssessment,
-    Lifestyle,
-    PersonalMedicalHistory,
     RiskFactor,
     RiskFactorCategory,
-    UserInput,
 )
 from sentinel.reporting import generate_excel_report, generate_pdf_report
 from sentinel.utils import load_user_file
@@ -32,8 +43,15 @@ def test_load_user_file_yaml(tmp_path):
     """
     data = {
-        "demographics": {"age": 30, "sex": "male"},
-        "lifestyle": {"smoking_status": "never", "alcohol_consumption": "none"},
         "personal_medical_history": {},
         "family_history": [],
     }
@@ -42,9 +60,9 @@ def test_load_user_file_yaml(tmp_path):
     user = load_user_file(str(path))
     assert isinstance(user, UserInput)
-    assert user.demographics.age == 30
-    assert user.lifestyle.smoking_status == "never"
-    assert user.clinical_observations == []
 @pytest.mark.parametrize("save_files", [True, False])
@@ -57,41 +75,31 @@ def test_generate_reports(tmp_path, save_files):
     """
     # 1. Create mock UserInput data with all fields
     user = UserInput(
-        demographics=Demographics(age=45, sex="Female", ethnicity="Caucasian"),
         lifestyle=Lifestyle(
-            smoking_status="former",
-            smoking_pack_years=10,
-            alcohol_consumption="light",
-            dietary_habits="Balanced",
-            physical_activity_level="moderate",
         ),
         personal_medical_history=PersonalMedicalHistory(
-            previous_cancers=["Skin Cancer"],
-            known_genetic_mutations=["BRCA2"],
-            chronic_illnesses=["IBS"],
         ),
         family_history=[
             FamilyMemberCancer(
-                relative="Mother", cancer_type="Breast Cancer", age_at_diagnosis=50
             )
         ],
-        clinical_observations=[
-            ClinicalObservation(
-                test_name="Blood Pressure",
-                value="120/80",
-                unit="mmHg",
-                reference_range="<130/85",
-                date="2023-05-10",
-            ),
-            ClinicalObservation(
-                test_name="Cholesterol",
-                value="190",
-                unit="mg/dL",
-                reference_range="<200",
-                date="2023-05-10",
-            ),
-        ],
-        current_concerns_or_symptoms="Occasional headaches.",
     )
     # 2. Create mock InitialAssessment data
@@ -122,7 +130,7 @@ def test_generate_reports(tmp_path, save_files):
                 category=RiskFactorCategory.LIFESTYLE,
             ),
         ],
-        risk_assessments=[
             CancerRiskAssessment(
                 cancer_type="Breast Cancer",
                 risk_level=4,

 from sentinel.models import (
     CancerRiskAssessment,
     ContributingFactor,
     ContributionStrength,
     DxRecommendation,
     InitialAssessment,
     RiskFactor,
     RiskFactorCategory,
 )
 from sentinel.reporting import generate_excel_report, generate_pdf_report
+from sentinel.user_input import (
+    AlcoholConsumption,
+    Anthropometrics,
+    CancerType,
+    Demographics,
+    Ethnicity,
+    FamilyMemberCancer,
+    FamilyRelation,
+    FamilySide,
+    Lifestyle,
+    PersonalMedicalHistory,
+    RelationshipDegree,
+    Sex,
+    SmokingHistory,
+    SmokingStatus,
+    UserInput,
+)
 from sentinel.utils import load_user_file
     """
     data = {
+        "demographics": {
+            "age_years": 30,
+            "sex": "male",
+            "anthropometrics": {"height_cm": 175, "weight_kg": 70},
+        },
+        "lifestyle": {
+            "smoking": {"status": "never"},
+            "alcohol_consumption": "none",
+        },
         "personal_medical_history": {},
         "family_history": [],
     }
     user = load_user_file(str(path))
     assert isinstance(user, UserInput)
+    assert user.demographics.age_years == 30
+    assert user.lifestyle.smoking.status == SmokingStatus.NEVER
+    assert user.symptoms == []
 @pytest.mark.parametrize("save_files", [True, False])
     """
     # 1. Create mock UserInput data with all fields
     user = UserInput(
+        demographics=Demographics(
+            age_years=45,
+            sex=Sex.FEMALE,
+            ethnicity=Ethnicity.WHITE,
+            anthropometrics=Anthropometrics(height_cm=165, weight_kg=70),
+        ),
         lifestyle=Lifestyle(
+            smoking=SmokingHistory(
+                status=SmokingStatus.FORMER,
+                pack_years=10,
+            ),
+            alcohol_consumption=AlcoholConsumption.LIGHT,
         ),
         personal_medical_history=PersonalMedicalHistory(
+            previous_cancers=[CancerType.MELANOMA],
         ),
         family_history=[
             FamilyMemberCancer(
+                relation=FamilyRelation.MOTHER,
+                cancer_type=CancerType.BREAST,
+                age_at_diagnosis=50,
+                degree=RelationshipDegree.FIRST,
+                side=FamilySide.MATERNAL,
             )
         ],
     )
     # 2. Create mock InitialAssessment data
                 category=RiskFactorCategory.LIFESTYLE,
             ),
         ],
+        llm_risk_interpretations=[
             CancerRiskAssessment(
                 cancer_type="Breast Cancer",
                 risk_level=4,

tests/test_integration_canrisk_api.py CHANGED Viewed

@@ -15,15 +15,30 @@ from pathlib import Path
 import pytest
 from sentinel.api_clients.canrisk import BOADICEAInput, CanRiskClient
-from sentinel.models import (
     Demographics,
     FamilyMemberCancer,
     FemaleSpecific,
     Lifestyle,
     PersonalMedicalHistory,
     UserInput,
 )
-from sentinel.risk_models.boadicea import BOADICEARiskModel
 CREDENTIALS_AVAILABLE = bool(
     os.getenv("CANRISK_USERNAME") and os.getenv("CANRISK_PASSWORD")
@@ -48,28 +63,37 @@ class Scenario:
 def _high_risk_user() -> UserInput:
     return UserInput(
         demographics=Demographics(
-            age=42,
-            sex="female",
-            ethnicity="Ashkenazi Jewish",
-            height=1.65,
-            weight=65.0,
         ),
-        lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="none"),
         personal_medical_history=PersonalMedicalHistory(
-            known_genetic_mutations=["BRCA1", "BRCA2"],
         ),
         female_specific=FemaleSpecific(
-            age_at_first_period=13,
-            age_at_first_live_birth=28,
-            num_live_births=1,
-            hormone_therapy_use="N",
         ),
         family_history=[
             FamilyMemberCancer(
-                relative="mother", cancer_type="breast", age_at_diagnosis=52
             ),
             FamilyMemberCancer(
-                relative="sister", cancer_type="ovarian", age_at_diagnosis=48
             ),
         ],
     )
@@ -78,28 +102,37 @@ def _high_risk_user() -> UserInput:
 def _moderate_risk_user() -> UserInput:
     return UserInput(
         demographics=Demographics(
-            age=50,
-            sex="female",
-            ethnicity="Hispanic",
-            height=1.60,
-            weight=70.0,
         ),
-        lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="light"),
         personal_medical_history=PersonalMedicalHistory(
-            known_genetic_mutations=["BRCA1"],
         ),
         female_specific=FemaleSpecific(
-            age_at_first_period=12,
-            age_at_first_live_birth=30,
-            num_live_births=2,
-            hormone_therapy_use="former",
         ),
         family_history=[
             FamilyMemberCancer(
-                relative="mother", cancer_type="breast", age_at_diagnosis=60
             ),
             FamilyMemberCancer(
-                relative="maternal aunt", cancer_type="breast", age_at_diagnosis=55
             ),
         ],
     )
@@ -108,24 +141,28 @@ def _moderate_risk_user() -> UserInput:
 def _average_risk_user() -> UserInput:
     return UserInput(
         demographics=Demographics(
-            age=38,
-            sex="female",
-            ethnicity="White",
-            height=1.68,
-            weight=62.0,
         ),
-        lifestyle=Lifestyle(smoking_status="never", alcohol_consumption="moderate"),
         personal_medical_history=PersonalMedicalHistory(),
         female_specific=FemaleSpecific(
-            age_at_first_period=12,
-            hormone_therapy_use="never",
-            num_live_births=0,
         ),
         family_history=[
             FamilyMemberCancer(
-                relative="paternal grandmother",
-                cancer_type="breast",
                 age_at_diagnosis=67,
             ),
         ],
     )

 import pytest
 from sentinel.api_clients.canrisk import BOADICEAInput, CanRiskClient
+from sentinel.risk_models.boadicea import BOADICEARiskModel
+from sentinel.user_input import (
+    AlcoholConsumption,
+    Anthropometrics,
+    CancerType,
     Demographics,
+    Ethnicity,
     FamilyMemberCancer,
+    FamilyRelation,
+    FamilySide,
     FemaleSpecific,
+    GeneticMutation,
+    HormoneUse,
+    HormoneUseHistory,
     Lifestyle,
+    MenstrualHistory,
+    ParityHistory,
     PersonalMedicalHistory,
+    RelationshipDegree,
+    Sex,
+    SmokingHistory,
+    SmokingStatus,
     UserInput,
 )
 CREDENTIALS_AVAILABLE = bool(
     os.getenv("CANRISK_USERNAME") and os.getenv("CANRISK_PASSWORD")
 def _high_risk_user() -> UserInput:
     return UserInput(
         demographics=Demographics(
+            age_years=42,
+            sex=Sex.FEMALE,
+            ethnicity=Ethnicity.ASHKENAZI_JEWISH,
+            anthropometrics=Anthropometrics(height_cm=165, weight_kg=65.0),
+        ),
+        lifestyle=Lifestyle(
+            smoking=SmokingHistory(status=SmokingStatus.NEVER),
+            alcohol_consumption=AlcoholConsumption.NONE,
         ),
         personal_medical_history=PersonalMedicalHistory(
+            genetic_mutations=[GeneticMutation.BRCA1, GeneticMutation.BRCA2],
         ),
         female_specific=FemaleSpecific(
+            menstrual=MenstrualHistory(age_at_menarche=13),
+            parity=ParityHistory(age_at_first_live_birth=28, num_live_births=1),
+            hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.NEVER),
         ),
         family_history=[
             FamilyMemberCancer(
+                relation=FamilyRelation.MOTHER,
+                cancer_type=CancerType.BREAST,
+                age_at_diagnosis=52,
+                degree=RelationshipDegree.FIRST,
+                side=FamilySide.MATERNAL,
             ),
             FamilyMemberCancer(
+                relation=FamilyRelation.SISTER,
+                cancer_type=CancerType.OVARIAN,
+                age_at_diagnosis=48,
+                degree=RelationshipDegree.FIRST,
+                side=FamilySide.UNKNOWN,
             ),
         ],
     )
 def _moderate_risk_user() -> UserInput:
     return UserInput(
         demographics=Demographics(
+            age_years=50,
+            sex=Sex.FEMALE,
+            ethnicity=Ethnicity.HISPANIC,
+            anthropometrics=Anthropometrics(height_cm=160, weight_kg=70.0),
+        ),
+        lifestyle=Lifestyle(
+            smoking=SmokingHistory(status=SmokingStatus.NEVER),
+            alcohol_consumption=AlcoholConsumption.LIGHT,
         ),
         personal_medical_history=PersonalMedicalHistory(
+            genetic_mutations=[GeneticMutation.BRCA1],
         ),
         female_specific=FemaleSpecific(
+            menstrual=MenstrualHistory(age_at_menarche=12),
+            parity=ParityHistory(age_at_first_live_birth=30, num_live_births=2),
+            hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.FORMER),
         ),
         family_history=[
             FamilyMemberCancer(
+                relation=FamilyRelation.MOTHER,
+                cancer_type=CancerType.BREAST,
+                age_at_diagnosis=60,
+                degree=RelationshipDegree.FIRST,
+                side=FamilySide.MATERNAL,
             ),
             FamilyMemberCancer(
+                relation=FamilyRelation.MATERNAL_AUNT,
+                cancer_type=CancerType.BREAST,
+                age_at_diagnosis=55,
+                degree=RelationshipDegree.SECOND,
+                side=FamilySide.MATERNAL,
             ),
         ],
     )
 def _average_risk_user() -> UserInput:
     return UserInput(
         demographics=Demographics(
+            age_years=38,
+            sex=Sex.FEMALE,
+            ethnicity=Ethnicity.WHITE,
+            anthropometrics=Anthropometrics(height_cm=168, weight_kg=62.0),
+        ),
+        lifestyle=Lifestyle(
+            smoking=SmokingHistory(status=SmokingStatus.NEVER),
+            alcohol_consumption=AlcoholConsumption.MODERATE,
         ),
         personal_medical_history=PersonalMedicalHistory(),
         female_specific=FemaleSpecific(
+            menstrual=MenstrualHistory(age_at_menarche=12),
+            parity=ParityHistory(num_live_births=0),
+            hormone_use=HormoneUseHistory(estrogen_use=HormoneUse.NEVER),
         ),
         family_history=[
             FamilyMemberCancer(
+                relation=FamilyRelation.PATERNAL_GRANDMOTHER,
+                cancer_type=CancerType.BREAST,
                 age_at_diagnosis=67,
+                degree=RelationshipDegree.SECOND,
+                side=FamilySide.PATERNAL,
             ),
         ],
     )

tests/test_main.py CHANGED Viewed

@@ -31,18 +31,30 @@ def test_root():
 @patch("apps.api.main.SentinelFactory")
 def test_assess_local(mock_factory):
     payload = {
-        "demographics": {"age": 55, "sex": "male", "ethnicity": "Caucasian"},
         "lifestyle": {
-            "smoking_status": "former",
-            "smoking_pack_years": 10,
             "alcohol_consumption": "moderate",
         },
         "family_history": [
-            {"relative": "father", "cancer_type": "lung", "age_at_diagnosis": 60}
         ],
         "personal_medical_history": {
             "previous_cancers": ["melanoma"],
-            "chronic_illnesses": [],
         },
     }
     expected = {
@@ -51,6 +63,7 @@ def test_assess_local(mock_factory):
         "response": None,
         "overall_summary": "ok",
         "overall_risk_score": None,
         "identified_risk_factors": [],
         "risk_assessments": [],
         "dx_recommendations": [],
@@ -73,10 +86,17 @@ def test_assess_local(mock_factory):
 @patch("apps.api.main.SentinelFactory")
 def test_assess_bad_provider(mock_factory):
     payload = {
-        "demographics": {"age": 30, "sex": "male"},
-        "lifestyle": {"smoking_status": "never", "alcohol_consumption": "none"},
         "family_history": [],
-        "personal_medical_history": {"previous_cancers": [], "chronic_illnesses": []},
     }
     mock_factory.side_effect = ValueError("bad")
     response = client.post("/assess/invalid", json={"user_input": payload})
@@ -86,18 +106,22 @@ def test_assess_bad_provider(mock_factory):
 @patch("apps.api.main.SentinelFactory")
 def test_assess_with_observations(mock_factory):
     payload = {
-        "demographics": {"age": 60, "sex": "male"},
-        "lifestyle": {"smoking_status": "never", "alcohol_consumption": "none"},
-        "personal_medical_history": {"previous_cancers": [], "chronic_illnesses": []},
         "family_history": [],
-        "clinical_observations": [
-            {
-                "test_name": "PSA",
-                "value": "5",
-                "unit": "ng/mL",
-                "reference_range": "<4",
             }
-        ],
     }
     expected = {
         "thinking": None,
@@ -105,6 +129,7 @@ def test_assess_with_observations(mock_factory):
         "response": None,
         "overall_summary": "ok",
         "overall_risk_score": None,
         "identified_risk_factors": [],
         "risk_assessments": [],
         "dx_recommendations": [],

 @patch("apps.api.main.SentinelFactory")
 def test_assess_local(mock_factory):
     payload = {
+        "demographics": {
+            "age_years": 55,
+            "sex": "male",
+            "ethnicity": "white",
+            "anthropometrics": {"height_cm": 175, "weight_kg": 80},
+        },
         "lifestyle": {
+            "smoking": {
+                "status": "former",
+                "pack_years": 10,
+            },
             "alcohol_consumption": "moderate",
         },
         "family_history": [
+            {
+                "relation": "father",
+                "cancer_type": "lung_cancer",
+                "age_at_diagnosis": 60,
+                "degree": "1",
+                "side": "paternal",
+            }
         ],
         "personal_medical_history": {
             "previous_cancers": ["melanoma"],
         },
     }
     expected = {
         "response": None,
         "overall_summary": "ok",
         "overall_risk_score": None,
+        "calculated_risk_scores": {},
         "identified_risk_factors": [],
         "risk_assessments": [],
         "dx_recommendations": [],
 @patch("apps.api.main.SentinelFactory")
 def test_assess_bad_provider(mock_factory):
     payload = {
+        "demographics": {
+            "age_years": 30,
+            "sex": "male",
+            "anthropometrics": {"height_cm": 175, "weight_kg": 70},
+        },
+        "lifestyle": {
+            "smoking": {"status": "never"},
+            "alcohol_consumption": "none",
+        },
         "family_history": [],
+        "personal_medical_history": {},
     }
     mock_factory.side_effect = ValueError("bad")
     response = client.post("/assess/invalid", json={"user_input": payload})
 @patch("apps.api.main.SentinelFactory")
 def test_assess_with_observations(mock_factory):
     payload = {
+        "demographics": {
+            "age_years": 60,
+            "sex": "male",
+            "anthropometrics": {"height_cm": 175, "weight_kg": 75},
+        },
+        "lifestyle": {
+            "smoking": {"status": "never"},
+            "alcohol_consumption": "none",
+        },
+        "personal_medical_history": {},
         "family_history": [],
+        "clinical_tests": {
+            "psa": {
+                "value_ng_ml": 5.0,
             }
+        },
     }
     expected = {
         "thinking": None,
         "response": None,
         "overall_summary": "ok",
         "overall_risk_score": None,
+        "calculated_risk_scores": {},
         "identified_risk_factors": [],
         "risk_assessments": [],
         "dx_recommendations": [],

tests/test_risk_aggregation.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""Tests for risk aggregation utilities."""
+import pytest
+from sentinel.models import RiskScore
+from sentinel.risk_aggregation import (
+    format_scores_for_llm,
+    format_scores_for_pdf,
+    group_scores_by_cancer_type,
+)
+def test_group_scores_by_cancer_type():
+    """Test grouping risk scores by cancer type."""
+    scores = [
+        RiskScore(
+            name="Gail Model",
+            score="5%",
+            cancer_type="Breast Cancer",
+            description="5-year risk",
+        ),
+        RiskScore(
+            name="Claus Model",
+            score="3%",
+            cancer_type="Breast Cancer",
+            description="Lifetime risk",
+        ),
+        RiskScore(
+            name="PLCOm2012",
+            score="2%",
+            cancer_type="Lung Cancer",
+            description="6-year risk",
+        ),
+    ]
+    grouped = group_scores_by_cancer_type(scores)
+    assert len(grouped) == 2
+    assert "Breast Cancer" in grouped
+    assert "Lung Cancer" in grouped
+    assert len(grouped["Breast Cancer"]) == 2
+    assert len(grouped["Lung Cancer"]) == 1
+    assert grouped["Breast Cancer"][0].name == "Gail Model"
+    assert grouped["Breast Cancer"][1].name == "Claus Model"
+def test_group_scores_empty():
+    """Test grouping with empty list."""
+    grouped = group_scores_by_cancer_type([])
+    assert grouped == {}
+def test_group_scores_no_cancer_type():
+    """Test grouping with scores that have no cancer type."""
+    scores = [
+        RiskScore(name="Test Model", score="5%", cancer_type=None),
+    ]
+    with pytest.raises(ValueError, match=r"Test Model.*missing cancer_type"):
+        group_scores_by_cancer_type(scores)
+def test_format_scores_for_llm():
+    """Test formatting scores for LLM context."""
+    scores = [
+        RiskScore(
+            name="Gail Model",
+            score="5%",
+            cancer_type="Breast Cancer",
+            description="5-year risk",
+            interpretation="Low to moderate risk",
+            references=["Gail et al., 1989"],
+        ),
+    ]
+    grouped = group_scores_by_cancer_type(scores)
+    formatted = format_scores_for_llm(grouped)
+    assert "# Calculated Risk Scores (Ground Truth)" in formatted
+    assert "Breast Cancer" in formatted
+    assert "Gail Model" in formatted
+    assert "5%" in formatted
+    assert "5-year risk" in formatted
+    assert "Low to moderate risk" in formatted
+    assert "Gail et al., 1989" in formatted
+    assert "DO NOT generate your own risk levels" in formatted
+def test_format_scores_for_llm_empty():
+    """Test formatting empty scores for LLM."""
+    formatted = format_scores_for_llm({})
+    assert formatted == "No risk scores calculated."
+def test_format_scores_for_pdf():
+    """Test formatting scores for PDF presentation."""
+    scores = [
+        RiskScore(
+            name="Gail Model",
+            score="5%",
+            cancer_type="Breast Cancer",
+        ),
+        RiskScore(
+            name="PLCOm2012",
+            score="2%",
+            cancer_type="Lung Cancer",
+        ),
+    ]
+    grouped = group_scores_by_cancer_type(scores)
+    formatted = format_scores_for_pdf(grouped)
+    assert len(formatted) == 2
+    assert formatted[0][0] == "Breast Cancer"
+    assert formatted[1][0] == "Lung Cancer"
+    assert len(formatted[0][1]) == 1
+    assert len(formatted[1][1]) == 1