Spaces:
Running
Running
File size: 11,172 Bytes
499170b 36983ae 25c3a8b 499170b 2ff16a3 20f762e 1bc9785 499170b 1bc9785 e67c99f 499170b 9286db5 499170b 1bc9785 499170b d7e5abb 25c3a8b 6b5e05b 25c3a8b 1922dbd 7cc8b69 0257d2f 25c3a8b 36983ae 25c3a8b 6b5e05b 25c3a8b d247864 c690006 7cc8b69 0257d2f 25c3a8b c690006 bc74531 c690006 3139749 f1e4e5b 3139749 f882609 3139749 f1e4e5b 3139749 f1e4e5b 3139749 f1e4e5b 3139749 f1e4e5b 3139749 f1e4e5b 3139749 25c3a8b 72b2667 25c3a8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
"""Data models for the Search feature."""
from datetime import UTC, datetime
from typing import Any, ClassVar, Literal
from pydantic import BaseModel, Field
# Centralized source type - add new sources here (e.g., new databases)
SourceName = Literal["pubmed", "clinicaltrials", "europepmc", "preprint", "openalex", "web"]
class Citation(BaseModel):
"""A citation to a source document."""
source: SourceName = Field(description="Where this came from")
title: str = Field(min_length=1, max_length=500)
url: str = Field(description="URL to the source")
date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
authors: list[str] = Field(default_factory=list)
MAX_AUTHORS_IN_CITATION: ClassVar[int] = 3
@property
def formatted(self) -> str:
"""Format as a citation string."""
author_str = ", ".join(self.authors[: self.MAX_AUTHORS_IN_CITATION])
if len(self.authors) > self.MAX_AUTHORS_IN_CITATION:
author_str += " et al."
return f"{author_str} ({self.date}). {self.title}. {self.source.upper()}"
class Evidence(BaseModel):
"""A piece of evidence retrieved from search."""
content: str = Field(min_length=1, description="The actual text content")
citation: Citation
relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
metadata: dict[str, Any] = Field(
default_factory=dict,
description="Additional metadata (e.g., cited_by_count, concepts, is_open_access)",
)
model_config = {"frozen": True}
class SearchResult(BaseModel):
"""Result of a search operation."""
query: str
evidence: list[Evidence]
sources_searched: list[SourceName]
total_found: int
errors: list[str] = Field(default_factory=list)
class AssessmentDetails(BaseModel):
"""Detailed assessment of evidence quality."""
mechanism_score: int = Field(
...,
ge=0,
le=10,
description="How well does the evidence explain the mechanism? 0-10",
)
mechanism_reasoning: str = Field(
..., min_length=10, description="Explanation of mechanism score"
)
clinical_evidence_score: int = Field(
...,
ge=0,
le=10,
description="Strength of clinical/preclinical evidence. 0-10",
)
clinical_reasoning: str = Field(
..., min_length=10, description="Explanation of clinical evidence score"
)
drug_candidates: list[str] = Field(
default_factory=list, description="List of specific drug candidates mentioned"
)
key_findings: list[str] = Field(
default_factory=list, description="Key findings from the evidence"
)
class JudgeAssessment(BaseModel):
"""Complete assessment from the Judge."""
details: AssessmentDetails
sufficient: bool = Field(..., description="Is evidence sufficient to provide a recommendation?")
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in the assessment (0-1)")
recommendation: Literal["continue", "synthesize"] = Field(
...,
description="continue = need more evidence, synthesize = ready to answer",
)
next_search_queries: list[str] = Field(
default_factory=list, description="If continue, what queries to search next"
)
reasoning: str = Field(
..., min_length=20, description="Overall reasoning for the recommendation"
)
class AgentEvent(BaseModel):
"""Event emitted by the orchestrator for UI streaming."""
type: Literal[
"started",
"thinking", # Multi-agent reasoning in progress (before first event)
"searching",
"search_complete",
"judging",
"judge_complete",
"looping",
"synthesizing",
"complete",
"error",
"streaming",
"hypothesizing",
"analyzing", # NEW for Phase 13
"analysis_complete", # NEW for Phase 13
"progress", # NEW for SPEC_01
]
message: str
data: Any = None
timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
iteration: int = 0
def to_markdown(self) -> str:
"""Format event as markdown for chat display."""
icons = {
"started": "π",
"thinking": "β³", # Hourglass for thinking/waiting
"searching": "π",
"search_complete": "π",
"judging": "π§ ",
"judge_complete": "β
",
"looping": "π",
"synthesizing": "π",
"complete": "π",
"error": "β",
"streaming": "π‘",
"hypothesizing": "π¬", # NEW
"analyzing": "π", # NEW
"analysis_complete": "π", # NEW
"progress": "β±οΈ", # NEW
}
icon = icons.get(self.type, "β’")
return f"{icon} **{self.type.upper()}**: {self.message}"
class MechanismHypothesis(BaseModel):
"""A scientific hypothesis about drug mechanism."""
drug: str = Field(description="The drug being studied")
target: str = Field(description="Molecular target (e.g., AMPK, mTOR)")
pathway: str = Field(description="Biological pathway affected")
effect: str = Field(description="Downstream effect on disease")
confidence: float = Field(ge=0, le=1, description="Confidence in hypothesis")
supporting_evidence: list[str] = Field(
default_factory=list, description="PMIDs or URLs supporting this hypothesis"
)
contradicting_evidence: list[str] = Field(
default_factory=list, description="PMIDs or URLs contradicting this hypothesis"
)
search_suggestions: list[str] = Field(
default_factory=list, description="Suggested searches to test this hypothesis"
)
def to_search_queries(self) -> list[str]:
"""Generate search queries to test this hypothesis."""
return [
f"{self.drug} {self.target}",
f"{self.target} {self.pathway}",
f"{self.pathway} {self.effect}",
*self.search_suggestions,
]
class HypothesisAssessment(BaseModel):
"""Assessment of evidence against hypotheses."""
hypotheses: list[MechanismHypothesis]
primary_hypothesis: MechanismHypothesis | None = Field(
default=None, description="Most promising hypothesis based on current evidence"
)
knowledge_gaps: list[str] = Field(description="What we don't know yet")
recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps")
class ReportSection(BaseModel):
"""A section of the research report."""
title: str
content: str
# Reserved for future inline citation tracking within sections
citations: list[str] = Field(default_factory=list)
class ResearchReport(BaseModel):
"""Structured scientific report."""
title: str = Field(description="Report title")
executive_summary: str = Field(
description="One-paragraph summary for quick reading", min_length=100, max_length=1000
)
research_question: str = Field(description="Clear statement of what was investigated")
methodology: ReportSection = Field(description="How the research was conducted")
hypotheses_tested: list[dict[str, Any]] = Field(
description="Hypotheses with supporting/contradicting evidence counts"
)
mechanistic_findings: ReportSection = Field(description="Findings about drug mechanisms")
clinical_findings: ReportSection = Field(
description="Findings from clinical/preclinical studies"
)
drug_candidates: list[str] = Field(description="Identified drug candidates")
limitations: list[str] = Field(description="Study limitations")
conclusion: str = Field(description="Overall conclusion")
references: list[dict[str, str]] = Field(
default_factory=list,
description="Formatted references with title, authors, source, URL",
)
# Metadata
sources_searched: list[str] = Field(default_factory=list)
total_papers_reviewed: int = 0
search_iterations: int = 0
confidence_score: float = Field(ge=0, le=1)
def to_markdown(self) -> str:
"""Render report as markdown."""
sections = [
f"# {self.title}\n",
f"## Executive Summary\n{self.executive_summary}\n",
f"## Research Question\n{self.research_question}\n",
f"## Methodology\n{self.methodology.content}\n",
]
# Hypotheses
sections.append("## Hypotheses Tested\n")
if not self.hypotheses_tested:
sections.append("*No hypotheses tested yet.*\n")
for h in self.hypotheses_tested:
supported = h.get("supported", 0)
contradicted = h.get("contradicted", 0)
if supported == 0 and contradicted == 0:
status = "β Untested"
elif supported > contradicted:
status = "β
Supported"
else:
status = "β οΈ Mixed"
sections.append(
f"- **{h.get('mechanism', 'Unknown')}** ({status}): "
f"{supported} supporting, {contradicted} contradicting\n"
)
# Findings
sections.append(f"## Mechanistic Findings\n{self.mechanistic_findings.content}\n")
sections.append(f"## Clinical Findings\n{self.clinical_findings.content}\n")
# Drug candidates
sections.append("## Drug Candidates\n")
if self.drug_candidates:
for drug in self.drug_candidates:
sections.append(f"- **{drug}**\n")
else:
sections.append("*No drug candidates identified.*\n")
# Limitations
sections.append("## Limitations\n")
if self.limitations:
for lim in self.limitations:
sections.append(f"- {lim}\n")
else:
sections.append("*No limitations documented.*\n")
# Conclusion
sections.append(f"## Conclusion\n{self.conclusion}\n")
# References
sections.append("## References\n")
if self.references:
for i, ref in enumerate(self.references, 1):
sections.append(
f"{i}. {ref.get('authors', 'Unknown')}. "
f"*{ref.get('title', 'Untitled')}*. "
f"{ref.get('source', '')} ({ref.get('date', '')}). "
f"[Link]({ref.get('url', '#')})\n"
)
else:
sections.append("*No references available.*\n")
# Metadata footer
sections.append("\n---\n")
sections.append(
f"*Report generated from {self.total_papers_reviewed} papers "
f"across {self.search_iterations} search iterations. "
f"Confidence: {self.confidence_score:.0%}*"
)
return "\n".join(sections)
class OrchestratorConfig(BaseModel):
"""Configuration for the orchestrator."""
max_iterations: int = Field(default=10, ge=1, le=20)
max_results_per_tool: int = Field(default=10, ge=1, le=50)
search_timeout: float = Field(default=30.0, ge=5.0, le=120.0)
|