File size: 11,172 Bytes
499170b
 
36983ae
25c3a8b
499170b
 
 
2ff16a3
20f762e
1bc9785
499170b
 
 
 
1bc9785
e67c99f
499170b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9286db5
 
 
 
499170b
 
 
 
 
 
 
 
 
1bc9785
499170b
 
d7e5abb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25c3a8b
 
 
 
 
 
 
6b5e05b
25c3a8b
 
 
 
 
 
 
 
1922dbd
7cc8b69
 
 
0257d2f
25c3a8b
 
 
36983ae
25c3a8b
 
 
 
 
 
6b5e05b
25c3a8b
 
 
 
 
 
 
 
d247864
c690006
7cc8b69
 
0257d2f
25c3a8b
 
 
 
 
c690006
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc74531
c690006
 
 
 
 
3139749
 
 
 
 
f1e4e5b
3139749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f882609
 
3139749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1e4e5b
 
3139749
 
 
f1e4e5b
 
 
 
 
 
3139749
 
 
 
 
 
 
 
 
 
 
f1e4e5b
 
 
 
 
3139749
 
 
f1e4e5b
 
 
 
 
3139749
 
 
 
 
 
f1e4e5b
 
 
 
 
 
 
 
 
 
3139749
 
 
 
 
 
 
 
 
 
 
 
25c3a8b
 
 
72b2667
25c3a8b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
"""Data models for the Search feature."""

from datetime import UTC, datetime
from typing import Any, ClassVar, Literal

from pydantic import BaseModel, Field

# Centralized source type - add new sources here (e.g., new databases)
SourceName = Literal["pubmed", "clinicaltrials", "europepmc", "preprint", "openalex", "web"]


class Citation(BaseModel):
    """A citation to a source document."""

    source: SourceName = Field(description="Where this came from")

    title: str = Field(min_length=1, max_length=500)
    url: str = Field(description="URL to the source")
    date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
    authors: list[str] = Field(default_factory=list)

    MAX_AUTHORS_IN_CITATION: ClassVar[int] = 3

    @property
    def formatted(self) -> str:
        """Format as a citation string."""
        author_str = ", ".join(self.authors[: self.MAX_AUTHORS_IN_CITATION])
        if len(self.authors) > self.MAX_AUTHORS_IN_CITATION:
            author_str += " et al."
        return f"{author_str} ({self.date}). {self.title}. {self.source.upper()}"


class Evidence(BaseModel):
    """A piece of evidence retrieved from search."""

    content: str = Field(min_length=1, description="The actual text content")
    citation: Citation
    relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
    metadata: dict[str, Any] = Field(
        default_factory=dict,
        description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
    )

    model_config = {"frozen": True}


class SearchResult(BaseModel):
    """Result of a search operation."""

    query: str
    evidence: list[Evidence]
    sources_searched: list[SourceName]
    total_found: int
    errors: list[str] = Field(default_factory=list)


class AssessmentDetails(BaseModel):
    """Detailed assessment of evidence quality."""

    mechanism_score: int = Field(
        ...,
        ge=0,
        le=10,
        description="How well does the evidence explain the mechanism? 0-10",
    )
    mechanism_reasoning: str = Field(
        ..., min_length=10, description="Explanation of mechanism score"
    )
    clinical_evidence_score: int = Field(
        ...,
        ge=0,
        le=10,
        description="Strength of clinical/preclinical evidence. 0-10",
    )
    clinical_reasoning: str = Field(
        ..., min_length=10, description="Explanation of clinical evidence score"
    )
    drug_candidates: list[str] = Field(
        default_factory=list, description="List of specific drug candidates mentioned"
    )
    key_findings: list[str] = Field(
        default_factory=list, description="Key findings from the evidence"
    )


class JudgeAssessment(BaseModel):
    """Complete assessment from the Judge."""

    details: AssessmentDetails
    sufficient: bool = Field(..., description="Is evidence sufficient to provide a recommendation?")
    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in the assessment (0-1)")
    recommendation: Literal["continue", "synthesize"] = Field(
        ...,
        description="continue = need more evidence, synthesize = ready to answer",
    )
    next_search_queries: list[str] = Field(
        default_factory=list, description="If continue, what queries to search next"
    )
    reasoning: str = Field(
        ..., min_length=20, description="Overall reasoning for the recommendation"
    )


class AgentEvent(BaseModel):
    """Event emitted by the orchestrator for UI streaming."""

    type: Literal[
        "started",
        "thinking",  # Multi-agent reasoning in progress (before first event)
        "searching",
        "search_complete",
        "judging",
        "judge_complete",
        "looping",
        "synthesizing",
        "complete",
        "error",
        "streaming",
        "hypothesizing",
        "analyzing",  # NEW for Phase 13
        "analysis_complete",  # NEW for Phase 13
        "progress",  # NEW for SPEC_01
    ]
    message: str
    data: Any = None
    timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
    iteration: int = 0

    def to_markdown(self) -> str:
        """Format event as markdown for chat display."""
        icons = {
            "started": "πŸš€",
            "thinking": "⏳",  # Hourglass for thinking/waiting
            "searching": "πŸ”",
            "search_complete": "πŸ“š",
            "judging": "🧠",
            "judge_complete": "βœ…",
            "looping": "πŸ”„",
            "synthesizing": "πŸ“",
            "complete": "πŸŽ‰",
            "error": "❌",
            "streaming": "πŸ“‘",
            "hypothesizing": "πŸ”¬",  # NEW
            "analyzing": "πŸ“Š",  # NEW
            "analysis_complete": "πŸ“ˆ",  # NEW
            "progress": "⏱️",  # NEW
        }
        icon = icons.get(self.type, "β€’")
        return f"{icon} **{self.type.upper()}**: {self.message}"


class MechanismHypothesis(BaseModel):
    """A scientific hypothesis about drug mechanism."""

    drug: str = Field(description="The drug being studied")
    target: str = Field(description="Molecular target (e.g., AMPK, mTOR)")
    pathway: str = Field(description="Biological pathway affected")
    effect: str = Field(description="Downstream effect on disease")
    confidence: float = Field(ge=0, le=1, description="Confidence in hypothesis")
    supporting_evidence: list[str] = Field(
        default_factory=list, description="PMIDs or URLs supporting this hypothesis"
    )
    contradicting_evidence: list[str] = Field(
        default_factory=list, description="PMIDs or URLs contradicting this hypothesis"
    )
    search_suggestions: list[str] = Field(
        default_factory=list, description="Suggested searches to test this hypothesis"
    )

    def to_search_queries(self) -> list[str]:
        """Generate search queries to test this hypothesis."""
        return [
            f"{self.drug} {self.target}",
            f"{self.target} {self.pathway}",
            f"{self.pathway} {self.effect}",
            *self.search_suggestions,
        ]


class HypothesisAssessment(BaseModel):
    """Assessment of evidence against hypotheses."""

    hypotheses: list[MechanismHypothesis]
    primary_hypothesis: MechanismHypothesis | None = Field(
        default=None, description="Most promising hypothesis based on current evidence"
    )
    knowledge_gaps: list[str] = Field(description="What we don't know yet")
    recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps")


class ReportSection(BaseModel):
    """A section of the research report."""

    title: str
    content: str
    # Reserved for future inline citation tracking within sections
    citations: list[str] = Field(default_factory=list)


class ResearchReport(BaseModel):
    """Structured scientific report."""

    title: str = Field(description="Report title")
    executive_summary: str = Field(
        description="One-paragraph summary for quick reading", min_length=100, max_length=1000
    )
    research_question: str = Field(description="Clear statement of what was investigated")

    methodology: ReportSection = Field(description="How the research was conducted")
    hypotheses_tested: list[dict[str, Any]] = Field(
        description="Hypotheses with supporting/contradicting evidence counts"
    )

    mechanistic_findings: ReportSection = Field(description="Findings about drug mechanisms")
    clinical_findings: ReportSection = Field(
        description="Findings from clinical/preclinical studies"
    )

    drug_candidates: list[str] = Field(description="Identified drug candidates")
    limitations: list[str] = Field(description="Study limitations")
    conclusion: str = Field(description="Overall conclusion")

    references: list[dict[str, str]] = Field(
        default_factory=list,
        description="Formatted references with title, authors, source, URL",
    )

    # Metadata
    sources_searched: list[str] = Field(default_factory=list)
    total_papers_reviewed: int = 0
    search_iterations: int = 0
    confidence_score: float = Field(ge=0, le=1)

    def to_markdown(self) -> str:
        """Render report as markdown."""
        sections = [
            f"# {self.title}\n",
            f"## Executive Summary\n{self.executive_summary}\n",
            f"## Research Question\n{self.research_question}\n",
            f"## Methodology\n{self.methodology.content}\n",
        ]

        # Hypotheses
        sections.append("## Hypotheses Tested\n")
        if not self.hypotheses_tested:
            sections.append("*No hypotheses tested yet.*\n")
        for h in self.hypotheses_tested:
            supported = h.get("supported", 0)
            contradicted = h.get("contradicted", 0)
            if supported == 0 and contradicted == 0:
                status = "❓ Untested"
            elif supported > contradicted:
                status = "βœ… Supported"
            else:
                status = "⚠️ Mixed"
            sections.append(
                f"- **{h.get('mechanism', 'Unknown')}** ({status}): "
                f"{supported} supporting, {contradicted} contradicting\n"
            )

        # Findings
        sections.append(f"## Mechanistic Findings\n{self.mechanistic_findings.content}\n")
        sections.append(f"## Clinical Findings\n{self.clinical_findings.content}\n")

        # Drug candidates
        sections.append("## Drug Candidates\n")
        if self.drug_candidates:
            for drug in self.drug_candidates:
                sections.append(f"- **{drug}**\n")
        else:
            sections.append("*No drug candidates identified.*\n")

        # Limitations
        sections.append("## Limitations\n")
        if self.limitations:
            for lim in self.limitations:
                sections.append(f"- {lim}\n")
        else:
            sections.append("*No limitations documented.*\n")

        # Conclusion
        sections.append(f"## Conclusion\n{self.conclusion}\n")

        # References
        sections.append("## References\n")
        if self.references:
            for i, ref in enumerate(self.references, 1):
                sections.append(
                    f"{i}. {ref.get('authors', 'Unknown')}. "
                    f"*{ref.get('title', 'Untitled')}*. "
                    f"{ref.get('source', '')} ({ref.get('date', '')}). "
                    f"[Link]({ref.get('url', '#')})\n"
                )
        else:
            sections.append("*No references available.*\n")

        # Metadata footer
        sections.append("\n---\n")
        sections.append(
            f"*Report generated from {self.total_papers_reviewed} papers "
            f"across {self.search_iterations} search iterations. "
            f"Confidence: {self.confidence_score:.0%}*"
        )

        return "\n".join(sections)


class OrchestratorConfig(BaseModel):
    """Configuration for the orchestrator."""

    max_iterations: int = Field(default=10, ge=1, le=20)
    max_results_per_tool: int = Field(default=10, ge=1, le=50)
    search_timeout: float = Field(default=30.0, ge=5.0, le=120.0)