File size: 5,802 Bytes
069f0a0
 
 
 
 
 
 
 
 
fd1472e
069f0a0
 
 
 
 
 
 
 
 
 
 
 
 
fd1472e
627c291
fd1472e
069f0a0
 
 
b2929fc
069f0a0
 
622c8ba
9e9bc6b
 
 
b2929fc
 
 
 
 
 
 
069f0a0
20c7bad
 
 
 
 
 
 
 
 
 
 
069f0a0
 
 
 
 
 
 
599a754
 
 
 
 
 
 
 
 
 
 
 
069f0a0
cb46aac
 
599a754
cb46aac
069f0a0
 
 
 
9e9bc6b
3aa91e9
 
 
7cc8b69
 
 
9e9bc6b
7cc8b69
3aa91e9
069f0a0
 
 
 
 
 
 
 
 
 
 
 
 
 
9d21bf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2929fc
 
 
 
 
9d21bf8
 
 
b2929fc
9d21bf8
069f0a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""Application configuration using Pydantic Settings."""

import logging
from typing import Literal

import structlog
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

from src.config.domain import ResearchDomain
from src.utils.exceptions import ConfigurationError


class Settings(BaseSettings):
    """Strongly-typed application settings."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
        extra="ignore",
    )

    # Domain configuration
    research_domain: ResearchDomain = ResearchDomain.SEXUAL_HEALTH

    # LLM Configuration
    openai_api_key: str | None = Field(default=None, description="OpenAI API key")
    anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
    llm_provider: Literal["openai", "anthropic", "huggingface"] = Field(
        default="openai", description="Which LLM provider to use"
    )
    openai_model: str = Field(default="gpt-5", description="OpenAI model name")
    anthropic_model: str = Field(
        default="claude-sonnet-4-5-20250929", description="Anthropic model"
    )
    # HuggingFace (free tier)
    huggingface_model: str | None = Field(
        default="meta-llama/Llama-3.1-70B-Instruct", description="HuggingFace model name"
    )
    hf_token: str | None = Field(
        default=None, alias="HF_TOKEN", description="HuggingFace API token"
    )

    # Embedding Configuration
    # Note: OpenAI embeddings require OPENAI_API_KEY (Anthropic has no embeddings API)
    openai_embedding_model: str = Field(
        default="text-embedding-3-small",
        description="OpenAI embedding model (used by LlamaIndex RAG)",
    )
    local_embedding_model: str = Field(
        default="all-MiniLM-L6-v2",
        description="Local sentence-transformers model (used by EmbeddingService)",
    )

    # PubMed Configuration
    ncbi_api_key: str | None = Field(
        default=None, description="NCBI API key for higher rate limits"
    )

    # Agent Configuration
    max_iterations: int = Field(default=10, ge=1, le=50)
    advanced_max_rounds: int = Field(
        default=5,
        ge=1,
        le=20,
        description="Max coordination rounds for Advanced mode (default 5 for faster demos)",
    )
    advanced_timeout: float = Field(
        default=300.0,
        ge=60.0,
        le=900.0,
        description="Timeout for Advanced mode in seconds (default 5 min)",
    )
    search_timeout: int = Field(default=30, description="Seconds to wait for search")
    magentic_timeout: int = Field(
        default=600,
        description="Timeout for Magentic mode in seconds (deprecated, use advanced_timeout)",
    )

    # Logging
    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"

    # External Services
    modal_token_id: str | None = Field(default=None, description="Modal token ID")
    modal_token_secret: str | None = Field(default=None, description="Modal token secret")
    chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")

    @property
    def modal_available(self) -> bool:
        """Check if Modal credentials are configured."""
        return bool(self.modal_token_id and self.modal_token_secret)

    def get_api_key(self) -> str:
        """Get the API key for the configured provider."""
        if self.llm_provider == "openai":
            if not self.openai_api_key:
                raise ConfigurationError("OPENAI_API_KEY not set")
            return self.openai_api_key

        if self.llm_provider == "anthropic":
            if not self.anthropic_api_key:
                raise ConfigurationError("ANTHROPIC_API_KEY not set")
            return self.anthropic_api_key

        raise ConfigurationError(f"Unknown LLM provider: {self.llm_provider}")

    def get_openai_api_key(self) -> str:
        """Get OpenAI API key (required for Magentic function calling)."""
        if not self.openai_api_key:
            raise ConfigurationError(
                "OPENAI_API_KEY not set. Magentic mode requires OpenAI for function calling. "
                "Use mode='simple' for other providers."
            )
        return self.openai_api_key

    @property
    def has_openai_key(self) -> bool:
        """Check if OpenAI API key is available."""
        return bool(self.openai_api_key)

    @property
    def has_anthropic_key(self) -> bool:
        """Check if Anthropic API key is available."""
        return bool(self.anthropic_api_key)

    @property
    def has_huggingface_key(self) -> bool:
        """Check if HuggingFace token is available."""
        return bool(self.hf_token)

    @property
    def has_any_llm_key(self) -> bool:
        """Check if any LLM API key is available."""
        return self.has_openai_key or self.has_anthropic_key or self.has_huggingface_key


def get_settings() -> Settings:
    """Factory function to get settings (allows mocking in tests)."""
    return Settings()


def configure_logging(settings: Settings) -> None:
    """Configure structured logging with the configured log level."""
    # Set stdlib logging level from settings
    logging.basicConfig(
        level=getattr(logging, settings.log_level),
        format="%(message)s",
    )

    structlog.configure(
        processors=[
            structlog.stdlib.filter_by_level,
            structlog.stdlib.add_logger_name,
            structlog.stdlib.add_log_level,
            structlog.processors.TimeStamper(fmt="iso"),
            structlog.processors.JSONRenderer(),
        ],
        wrapper_class=structlog.stdlib.BoundLogger,
        context_class=dict,
        logger_factory=structlog.stdlib.LoggerFactory(),
    )


# Singleton for easy import
settings = get_settings()