File size: 4,751 Bytes
d6d843f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
Background Scheduler for API Health Checks
Runs periodic health checks with APScheduler
"""

import asyncio
import logging
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler as APScheduler
from apscheduler.triggers.interval import IntervalTrigger
from typing import Optional

logger = logging.getLogger(__name__)


class BackgroundScheduler:
    """Background scheduler for periodic health checks"""

    def __init__(self, monitor, database, interval_minutes: int = 5):
        """
        Initialize the scheduler

        Args:
            monitor: APIMonitor instance
            database: Database instance
            interval_minutes: Interval between health checks
        """
        self.monitor = monitor
        self.database = database
        self.interval_minutes = interval_minutes
        self.scheduler = APScheduler()
        self.last_run_time: Optional[datetime] = None
        self._running = False

    def _run_health_check(self):
        """Run health check and save results"""
        try:
            logger.info("Running scheduled health check...")
            self.last_run_time = datetime.now()

            # Run async health check
            results = asyncio.run(self.monitor.check_all())

            # Save to database
            self.database.save_health_checks(results)

            # Check for incidents (offline Tier 1 providers)
            for result in results:
                if result.status.value == "offline":
                    # Check if provider is Tier 1
                    resources = self.monitor.config.get_all_resources()
                    resource = next((r for r in resources if r.get('name') == result.provider_name), None)

                    if resource and resource.get('tier', 3) == 1:
                        # Create incident for Tier 1 outage
                        self.database.create_incident(
                            provider_name=result.provider_name,
                            category=result.category,
                            incident_type="service_offline",
                            description=f"Tier 1 provider offline: {result.error_message}",
                            severity="high"
                        )

                        # Create alert
                        self.database.create_alert(
                            provider_name=result.provider_name,
                            alert_type="tier1_offline",
                            message=f"Critical: Tier 1 provider {result.provider_name} is offline"
                        )

            logger.info(f"Health check completed. Checked {len(results)} providers.")

            # Cleanup old data (older than 7 days)
            self.database.cleanup_old_data(days=7)

            # Aggregate response times
            self.database.aggregate_response_times(period_hours=1)

        except Exception as e:
            logger.error(f"Error in scheduled health check: {e}")

    def start(self):
        """Start the scheduler"""
        if not self._running:
            try:
                # Add job with interval trigger
                self.scheduler.add_job(
                    func=self._run_health_check,
                    trigger=IntervalTrigger(minutes=self.interval_minutes),
                    id='health_check_job',
                    name='API Health Check',
                    replace_existing=True
                )

                self.scheduler.start()
                self._running = True
                logger.info(f"Scheduler started. Running every {self.interval_minutes} minutes.")

                # Run initial check
                self._run_health_check()

            except Exception as e:
                logger.error(f"Error starting scheduler: {e}")

    def stop(self):
        """Stop the scheduler"""
        if self._running:
            self.scheduler.shutdown()
            self._running = False
            logger.info("Scheduler stopped.")

    def update_interval(self, interval_minutes: int):
        """Update the check interval"""
        self.interval_minutes = interval_minutes

        if self._running:
            # Reschedule the job
            self.scheduler.reschedule_job(
                job_id='health_check_job',
                trigger=IntervalTrigger(minutes=interval_minutes)
            )
            logger.info(f"Scheduler interval updated to {interval_minutes} minutes.")

    def is_running(self) -> bool:
        """Check if scheduler is running"""
        return self._running

    def trigger_immediate_check(self):
        """Trigger an immediate health check"""
        logger.info("Triggering immediate health check...")
        self._run_health_check()