unrealon 1.1.5__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-1.1.5.dist-info/licenses → unrealon-2.0.4.dist-info}/LICENSE +1 -1
- unrealon-2.0.4.dist-info/METADATA +491 -0
- unrealon-2.0.4.dist-info/RECORD +129 -0
- {unrealon-1.1.5.dist-info → unrealon-2.0.4.dist-info}/WHEEL +2 -1
- unrealon-2.0.4.dist-info/entry_points.txt +3 -0
- unrealon-2.0.4.dist-info/top_level.txt +3 -0
- unrealon_browser/__init__.py +5 -2
- unrealon_browser/cli/browser_cli.py +18 -9
- unrealon_browser/cli/interactive_mode.py +18 -7
- unrealon_browser/core/browser_manager.py +76 -13
- unrealon_browser/dto/__init__.py +21 -0
- unrealon_browser/dto/bot_detection.py +175 -0
- unrealon_browser/dto/models/config.py +14 -1
- unrealon_browser/managers/__init__.py +4 -1
- unrealon_browser/managers/logger_bridge.py +3 -6
- unrealon_browser/managers/page_wait_manager.py +198 -0
- unrealon_browser/stealth/__init__.py +27 -0
- unrealon_browser/stealth/bypass_techniques.pyc +0 -0
- unrealon_browser/stealth/manager.pyc +0 -0
- unrealon_browser/stealth/nodriver_stealth.pyc +0 -0
- unrealon_browser/stealth/playwright_stealth.pyc +0 -0
- unrealon_browser/stealth/scanner_tester.pyc +0 -0
- unrealon_browser/stealth/undetected_chrome.pyc +0 -0
- unrealon_core/__init__.py +160 -0
- unrealon_core/config/__init__.py +16 -0
- unrealon_core/config/environment.py +98 -0
- unrealon_core/config/urls.py +93 -0
- unrealon_core/enums/__init__.py +24 -0
- unrealon_core/enums/status.py +216 -0
- unrealon_core/enums/types.py +240 -0
- unrealon_core/error_handling/__init__.py +45 -0
- unrealon_core/error_handling/circuit_breaker.py +292 -0
- unrealon_core/error_handling/error_context.py +324 -0
- unrealon_core/error_handling/recovery.py +371 -0
- unrealon_core/error_handling/retry.py +268 -0
- unrealon_core/exceptions/__init__.py +46 -0
- unrealon_core/exceptions/base.py +292 -0
- unrealon_core/exceptions/communication.py +22 -0
- unrealon_core/exceptions/driver.py +11 -0
- unrealon_core/exceptions/proxy.py +11 -0
- unrealon_core/exceptions/task.py +12 -0
- unrealon_core/exceptions/validation.py +17 -0
- unrealon_core/models/__init__.py +98 -0
- unrealon_core/models/arq_context.py +252 -0
- unrealon_core/models/arq_responses.py +125 -0
- unrealon_core/models/base.py +291 -0
- unrealon_core/models/bridge_stats.py +58 -0
- unrealon_core/models/communication.py +39 -0
- unrealon_core/models/config.py +47 -0
- unrealon_core/models/connection_stats.py +47 -0
- unrealon_core/models/driver.py +30 -0
- unrealon_core/models/driver_details.py +98 -0
- unrealon_core/models/logging.py +28 -0
- unrealon_core/models/task.py +21 -0
- unrealon_core/models/typed_responses.py +210 -0
- unrealon_core/models/websocket/__init__.py +91 -0
- unrealon_core/models/websocket/base.py +49 -0
- unrealon_core/models/websocket/config.py +200 -0
- unrealon_core/models/websocket/driver.py +215 -0
- unrealon_core/models/websocket/errors.py +138 -0
- unrealon_core/models/websocket/heartbeat.py +100 -0
- unrealon_core/models/websocket/logging.py +261 -0
- unrealon_core/models/websocket/proxy.py +496 -0
- unrealon_core/models/websocket/tasks.py +275 -0
- unrealon_core/models/websocket/utils.py +153 -0
- unrealon_core/models/websocket_session.py +144 -0
- unrealon_core/monitoring/__init__.py +43 -0
- unrealon_core/monitoring/alerts.py +398 -0
- unrealon_core/monitoring/dashboard.py +307 -0
- unrealon_core/monitoring/health_check.py +354 -0
- unrealon_core/monitoring/metrics.py +352 -0
- unrealon_core/utils/__init__.py +11 -0
- unrealon_core/utils/time.py +61 -0
- unrealon_core/version.py +219 -0
- unrealon_driver/__init__.py +88 -50
- unrealon_driver/core_module/__init__.py +34 -0
- unrealon_driver/core_module/base.py +184 -0
- unrealon_driver/core_module/config.py +30 -0
- unrealon_driver/core_module/event_manager.py +127 -0
- unrealon_driver/core_module/protocols.py +98 -0
- unrealon_driver/core_module/registry.py +146 -0
- unrealon_driver/decorators/__init__.py +15 -0
- unrealon_driver/decorators/retry.py +117 -0
- unrealon_driver/decorators/schedule.py +137 -0
- unrealon_driver/decorators/task.py +61 -0
- unrealon_driver/decorators/timing.py +132 -0
- unrealon_driver/driver/__init__.py +20 -0
- unrealon_driver/driver/communication/__init__.py +10 -0
- unrealon_driver/driver/communication/session.py +203 -0
- unrealon_driver/driver/communication/websocket_client.py +197 -0
- unrealon_driver/driver/core/__init__.py +10 -0
- unrealon_driver/driver/core/config.py +85 -0
- unrealon_driver/driver/core/driver.py +221 -0
- unrealon_driver/driver/factory/__init__.py +9 -0
- unrealon_driver/driver/factory/manager_factory.py +130 -0
- unrealon_driver/driver/lifecycle/__init__.py +11 -0
- unrealon_driver/driver/lifecycle/daemon.py +76 -0
- unrealon_driver/driver/lifecycle/initialization.py +97 -0
- unrealon_driver/driver/lifecycle/shutdown.py +48 -0
- unrealon_driver/driver/monitoring/__init__.py +9 -0
- unrealon_driver/driver/monitoring/health.py +63 -0
- unrealon_driver/driver/utilities/__init__.py +10 -0
- unrealon_driver/driver/utilities/logging.py +51 -0
- unrealon_driver/driver/utilities/serialization.py +61 -0
- unrealon_driver/managers/__init__.py +32 -0
- unrealon_driver/managers/base.py +174 -0
- unrealon_driver/managers/browser.py +98 -0
- unrealon_driver/managers/cache.py +116 -0
- unrealon_driver/managers/http.py +107 -0
- unrealon_driver/managers/logger.py +286 -0
- unrealon_driver/managers/proxy.py +99 -0
- unrealon_driver/managers/registry.py +87 -0
- unrealon_driver/managers/threading.py +54 -0
- unrealon_driver/managers/update.py +107 -0
- unrealon_driver/utils/__init__.py +9 -0
- unrealon_driver/utils/time.py +10 -0
- unrealon/__init__.py +0 -40
- unrealon-1.1.5.dist-info/METADATA +0 -621
- unrealon-1.1.5.dist-info/RECORD +0 -54
- unrealon-1.1.5.dist-info/entry_points.txt +0 -9
- unrealon_browser/managers/stealth.py +0 -388
- unrealon_driver/exceptions.py +0 -33
- unrealon_driver/html_analyzer/__init__.py +0 -32
- unrealon_driver/html_analyzer/cleaner.py +0 -657
- unrealon_driver/html_analyzer/config.py +0 -64
- unrealon_driver/html_analyzer/manager.py +0 -247
- unrealon_driver/html_analyzer/models.py +0 -115
- unrealon_driver/html_analyzer/websocket_analyzer.py +0 -157
- unrealon_driver/models/__init__.py +0 -31
- unrealon_driver/models/websocket.py +0 -98
- unrealon_driver/parser/__init__.py +0 -36
- unrealon_driver/parser/cli_manager.py +0 -142
- unrealon_driver/parser/daemon_manager.py +0 -403
- unrealon_driver/parser/managers/__init__.py +0 -25
- unrealon_driver/parser/managers/config.py +0 -293
- unrealon_driver/parser/managers/error.py +0 -412
- unrealon_driver/parser/managers/result.py +0 -321
- unrealon_driver/parser/parser_manager.py +0 -458
- unrealon_driver/smart_logging/__init__.py +0 -24
- unrealon_driver/smart_logging/models.py +0 -44
- unrealon_driver/smart_logging/smart_logger.py +0 -406
- unrealon_driver/smart_logging/unified_logger.py +0 -525
- unrealon_driver/websocket/__init__.py +0 -31
- unrealon_driver/websocket/client.py +0 -249
- unrealon_driver/websocket/config.py +0 -188
- unrealon_driver/websocket/manager.py +0 -90
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Alerts System
|
|
3
|
+
|
|
4
|
+
Simple alerting based on metrics and health checks.
|
|
5
|
+
Following critical requirements - max 500 lines, functions < 20 lines.
|
|
6
|
+
|
|
7
|
+
Phase 2: Core Systems - Monitoring
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Dict, Any, List, Optional, Callable, Union
|
|
14
|
+
from enum import Enum
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
17
|
+
|
|
18
|
+
from .health_check import HealthStatus
|
|
19
|
+
from .metrics import MetricsCollector, get_metrics_collector
|
|
20
|
+
from ..utils.time import utc_now
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AlertSeverity(str, Enum):
|
|
27
|
+
"""Alert severity levels."""
|
|
28
|
+
INFO = "info"
|
|
29
|
+
WARNING = "warning"
|
|
30
|
+
ERROR = "error"
|
|
31
|
+
CRITICAL = "critical"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AlertRule(BaseModel):
|
|
35
|
+
"""Alert rule configuration."""
|
|
36
|
+
|
|
37
|
+
model_config = ConfigDict(
|
|
38
|
+
validate_assignment=True,
|
|
39
|
+
extra="forbid"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
name: str = Field(description="Alert rule name")
|
|
43
|
+
description: str = Field(description="Alert description")
|
|
44
|
+
severity: AlertSeverity = Field(description="Alert severity")
|
|
45
|
+
|
|
46
|
+
# Condition
|
|
47
|
+
metric_name: Optional[str] = Field(default=None, description="Metric to monitor")
|
|
48
|
+
threshold: Optional[float] = Field(default=None, description="Alert threshold")
|
|
49
|
+
comparison: str = Field(default="gt", description="Comparison operator (gt, lt, eq, gte, lte)")
|
|
50
|
+
|
|
51
|
+
# Health check condition
|
|
52
|
+
component_name: Optional[str] = Field(default=None, description="Component to monitor")
|
|
53
|
+
health_status: Optional[HealthStatus] = Field(default=None, description="Health status to alert on")
|
|
54
|
+
|
|
55
|
+
# Timing
|
|
56
|
+
duration_seconds: float = Field(default=60.0, description="How long condition must persist")
|
|
57
|
+
cooldown_seconds: float = Field(default=300.0, description="Cooldown between alerts")
|
|
58
|
+
|
|
59
|
+
# State
|
|
60
|
+
enabled: bool = Field(default=True, description="Whether rule is enabled")
|
|
61
|
+
last_triggered: Optional[datetime] = Field(default=None, description="Last trigger time")
|
|
62
|
+
trigger_count: int = Field(default=0, description="Number of times triggered")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Alert(BaseModel):
|
|
66
|
+
"""Active alert instance."""
|
|
67
|
+
|
|
68
|
+
model_config = ConfigDict(
|
|
69
|
+
validate_assignment=True,
|
|
70
|
+
extra="forbid"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
alert_id: str = Field(description="Unique alert ID")
|
|
74
|
+
rule_name: str = Field(description="Alert rule name")
|
|
75
|
+
severity: AlertSeverity = Field(description="Alert severity")
|
|
76
|
+
message: str = Field(description="Alert message")
|
|
77
|
+
timestamp: datetime = Field(description="When alert was triggered")
|
|
78
|
+
|
|
79
|
+
# Context
|
|
80
|
+
metric_name: Optional[str] = Field(default=None, description="Related metric")
|
|
81
|
+
metric_value: Optional[float] = Field(default=None, description="Metric value that triggered alert")
|
|
82
|
+
component_name: Optional[str] = Field(default=None, description="Related component")
|
|
83
|
+
|
|
84
|
+
# State
|
|
85
|
+
acknowledged: bool = Field(default=False, description="Whether alert is acknowledged")
|
|
86
|
+
resolved: bool = Field(default=False, description="Whether alert is resolved")
|
|
87
|
+
resolved_at: Optional[datetime] = Field(default=None, description="When alert was resolved")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class AlertManager:
|
|
91
|
+
"""
|
|
92
|
+
Simple alert manager.
|
|
93
|
+
|
|
94
|
+
Monitors metrics and health checks, triggers alerts based on rules,
|
|
95
|
+
and manages alert lifecycle.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
def __init__(self, metrics_collector: Optional[MetricsCollector] = None):
|
|
99
|
+
"""Initialize alert manager."""
|
|
100
|
+
self.metrics_collector = metrics_collector or get_metrics_collector()
|
|
101
|
+
self._rules: Dict[str, AlertRule] = {}
|
|
102
|
+
self._active_alerts: Dict[str, Alert] = {}
|
|
103
|
+
self._alert_handlers: List[Callable[[Alert], Any]] = []
|
|
104
|
+
|
|
105
|
+
# State tracking
|
|
106
|
+
self._rule_states: Dict[str, Dict[str, Any]] = {}
|
|
107
|
+
self._running = False
|
|
108
|
+
self._check_task: Optional[asyncio.Task] = None
|
|
109
|
+
|
|
110
|
+
self.logger = logging.getLogger("alert_manager")
|
|
111
|
+
|
|
112
|
+
def add_rule(self, rule: AlertRule) -> None:
|
|
113
|
+
"""Add alert rule."""
|
|
114
|
+
self._rules[rule.name] = rule
|
|
115
|
+
self._rule_states[rule.name] = {
|
|
116
|
+
'condition_start': None,
|
|
117
|
+
'last_check': None,
|
|
118
|
+
'consecutive_triggers': 0
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
self.logger.info(f"Added alert rule: {rule.name} ({rule.severity})")
|
|
122
|
+
|
|
123
|
+
def remove_rule(self, rule_name: str) -> bool:
|
|
124
|
+
"""Remove alert rule."""
|
|
125
|
+
if rule_name in self._rules:
|
|
126
|
+
del self._rules[rule_name]
|
|
127
|
+
del self._rule_states[rule_name]
|
|
128
|
+
self.logger.info(f"Removed alert rule: {rule_name}")
|
|
129
|
+
return True
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def add_alert_handler(self, handler: Callable[[Alert], Any]) -> None:
|
|
133
|
+
"""Add alert handler function."""
|
|
134
|
+
self._alert_handlers.append(handler)
|
|
135
|
+
self.logger.debug(f"Added alert handler: {handler.__name__}")
|
|
136
|
+
|
|
137
|
+
async def check_rules(self) -> List[Alert]:
|
|
138
|
+
"""Check all alert rules and trigger alerts if needed."""
|
|
139
|
+
new_alerts = []
|
|
140
|
+
|
|
141
|
+
for rule_name, rule in self._rules.items():
|
|
142
|
+
if not rule.enabled:
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
alert = await self._check_rule(rule)
|
|
147
|
+
if alert:
|
|
148
|
+
new_alerts.append(alert)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.logger.error(f"Error checking rule {rule_name}: {e}")
|
|
151
|
+
|
|
152
|
+
return new_alerts
|
|
153
|
+
|
|
154
|
+
async def _check_rule(self, rule: AlertRule) -> Optional[Alert]:
|
|
155
|
+
"""Check individual alert rule."""
|
|
156
|
+
now = utc_now()
|
|
157
|
+
rule_state = self._rule_states[rule.name]
|
|
158
|
+
|
|
159
|
+
# Check cooldown
|
|
160
|
+
if (rule.last_triggered and
|
|
161
|
+
(now - rule.last_triggered).total_seconds() < rule.cooldown_seconds):
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
# Evaluate condition
|
|
165
|
+
condition_met = await self._evaluate_condition(rule)
|
|
166
|
+
|
|
167
|
+
if condition_met:
|
|
168
|
+
# Track when condition started
|
|
169
|
+
if rule_state['condition_start'] is None:
|
|
170
|
+
rule_state['condition_start'] = now
|
|
171
|
+
|
|
172
|
+
# Check if condition has persisted long enough
|
|
173
|
+
condition_duration = (now - rule_state['condition_start']).total_seconds()
|
|
174
|
+
|
|
175
|
+
if condition_duration >= rule.duration_seconds:
|
|
176
|
+
# Trigger alert
|
|
177
|
+
alert = await self._trigger_alert(rule)
|
|
178
|
+
rule_state['condition_start'] = None # Reset
|
|
179
|
+
return alert
|
|
180
|
+
else:
|
|
181
|
+
# Condition not met, reset state
|
|
182
|
+
rule_state['condition_start'] = None
|
|
183
|
+
|
|
184
|
+
rule_state['last_check'] = now
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
async def _evaluate_condition(self, rule: AlertRule) -> bool:
|
|
188
|
+
"""Evaluate alert rule condition."""
|
|
189
|
+
# Metric-based condition
|
|
190
|
+
if rule.metric_name and rule.threshold is not None:
|
|
191
|
+
metric = await self.metrics_collector.get_metric(rule.metric_name)
|
|
192
|
+
if not metric:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
value = metric.current_value
|
|
196
|
+
threshold = rule.threshold
|
|
197
|
+
|
|
198
|
+
if rule.comparison == "gt":
|
|
199
|
+
return value > threshold
|
|
200
|
+
elif rule.comparison == "lt":
|
|
201
|
+
return value < threshold
|
|
202
|
+
elif rule.comparison == "eq":
|
|
203
|
+
return value == threshold
|
|
204
|
+
elif rule.comparison == "gte":
|
|
205
|
+
return value >= threshold
|
|
206
|
+
elif rule.comparison == "lte":
|
|
207
|
+
return value <= threshold
|
|
208
|
+
else:
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
# Health check condition
|
|
212
|
+
if rule.component_name and rule.health_status:
|
|
213
|
+
# This would integrate with health checker
|
|
214
|
+
# For now, return False as placeholder
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
async def _trigger_alert(self, rule: AlertRule) -> Alert:
|
|
220
|
+
"""Trigger alert for rule."""
|
|
221
|
+
import uuid
|
|
222
|
+
|
|
223
|
+
# Get current metric value for context
|
|
224
|
+
metric_value = None
|
|
225
|
+
if rule.metric_name:
|
|
226
|
+
metric = await self.metrics_collector.get_metric(rule.metric_name)
|
|
227
|
+
if metric:
|
|
228
|
+
metric_value = metric.current_value
|
|
229
|
+
|
|
230
|
+
# Create alert
|
|
231
|
+
alert = Alert(
|
|
232
|
+
alert_id=str(uuid.uuid4())[:8],
|
|
233
|
+
rule_name=rule.name,
|
|
234
|
+
severity=rule.severity,
|
|
235
|
+
message=self._format_alert_message(rule, metric_value),
|
|
236
|
+
timestamp=utc_now(),
|
|
237
|
+
metric_name=rule.metric_name,
|
|
238
|
+
metric_value=metric_value,
|
|
239
|
+
component_name=rule.component_name
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Update rule state
|
|
243
|
+
rule.last_triggered = utc_now()
|
|
244
|
+
rule.trigger_count += 1
|
|
245
|
+
|
|
246
|
+
# Store active alert
|
|
247
|
+
self._active_alerts[alert.alert_id] = alert
|
|
248
|
+
|
|
249
|
+
# Notify handlers
|
|
250
|
+
await self._notify_handlers(alert)
|
|
251
|
+
|
|
252
|
+
self.logger.warning(f"Alert triggered: {alert.rule_name} - {alert.message}")
|
|
253
|
+
return alert
|
|
254
|
+
|
|
255
|
+
def _format_alert_message(self, rule: AlertRule, metric_value: Optional[float]) -> str:
|
|
256
|
+
"""Format alert message."""
|
|
257
|
+
if rule.metric_name and metric_value is not None:
|
|
258
|
+
return (f"{rule.description} - {rule.metric_name} is {metric_value} "
|
|
259
|
+
f"({rule.comparison} {rule.threshold})")
|
|
260
|
+
elif rule.component_name:
|
|
261
|
+
return f"{rule.description} - {rule.component_name} is {rule.health_status}"
|
|
262
|
+
else:
|
|
263
|
+
return rule.description
|
|
264
|
+
|
|
265
|
+
async def _notify_handlers(self, alert: Alert) -> None:
|
|
266
|
+
"""Notify all alert handlers."""
|
|
267
|
+
for handler in self._alert_handlers:
|
|
268
|
+
try:
|
|
269
|
+
if asyncio.iscoroutinefunction(handler):
|
|
270
|
+
await handler(alert)
|
|
271
|
+
else:
|
|
272
|
+
handler(alert)
|
|
273
|
+
except Exception as e:
|
|
274
|
+
self.logger.error(f"Error in alert handler {handler.__name__}: {e}")
|
|
275
|
+
|
|
276
|
+
async def acknowledge_alert(self, alert_id: str) -> bool:
|
|
277
|
+
"""Acknowledge alert."""
|
|
278
|
+
if alert_id in self._active_alerts:
|
|
279
|
+
self._active_alerts[alert_id].acknowledged = True
|
|
280
|
+
self.logger.info(f"Alert acknowledged: {alert_id}")
|
|
281
|
+
return True
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
async def resolve_alert(self, alert_id: str) -> bool:
|
|
285
|
+
"""Resolve alert."""
|
|
286
|
+
if alert_id in self._active_alerts:
|
|
287
|
+
alert = self._active_alerts[alert_id]
|
|
288
|
+
alert.resolved = True
|
|
289
|
+
alert.resolved_at = utc_now()
|
|
290
|
+
self.logger.info(f"Alert resolved: {alert_id}")
|
|
291
|
+
return True
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
async def start_monitoring(self, check_interval: float = 30.0) -> None:
|
|
295
|
+
"""Start alert monitoring."""
|
|
296
|
+
if self._running:
|
|
297
|
+
self.logger.warning("Alert monitoring already running")
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
self._running = True
|
|
301
|
+
self._check_task = asyncio.create_task(self._monitoring_loop(check_interval))
|
|
302
|
+
|
|
303
|
+
self.logger.info(f"Started alert monitoring (interval: {check_interval}s)")
|
|
304
|
+
|
|
305
|
+
async def stop_monitoring(self) -> None:
|
|
306
|
+
"""Stop alert monitoring."""
|
|
307
|
+
self._running = False
|
|
308
|
+
|
|
309
|
+
if self._check_task and not self._check_task.done():
|
|
310
|
+
self._check_task.cancel()
|
|
311
|
+
try:
|
|
312
|
+
await self._check_task
|
|
313
|
+
except asyncio.CancelledError:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
self.logger.info("Stopped alert monitoring")
|
|
317
|
+
|
|
318
|
+
async def _monitoring_loop(self, check_interval: float) -> None:
|
|
319
|
+
"""Background monitoring loop."""
|
|
320
|
+
try:
|
|
321
|
+
while self._running:
|
|
322
|
+
await self.check_rules()
|
|
323
|
+
await asyncio.sleep(check_interval)
|
|
324
|
+
|
|
325
|
+
except asyncio.CancelledError:
|
|
326
|
+
self.logger.debug("Alert monitoring loop cancelled")
|
|
327
|
+
raise
|
|
328
|
+
except Exception as e:
|
|
329
|
+
self.logger.error(f"Error in alert monitoring loop: {e}")
|
|
330
|
+
|
|
331
|
+
def get_active_alerts(self) -> List[Alert]:
|
|
332
|
+
"""Get all active alerts."""
|
|
333
|
+
return [alert for alert in self._active_alerts.values() if not alert.resolved]
|
|
334
|
+
|
|
335
|
+
def get_alert_summary(self) -> Dict[str, Any]:
|
|
336
|
+
"""Get alert summary."""
|
|
337
|
+
active_alerts = self.get_active_alerts()
|
|
338
|
+
|
|
339
|
+
severity_counts = {
|
|
340
|
+
AlertSeverity.INFO: 0,
|
|
341
|
+
AlertSeverity.WARNING: 0,
|
|
342
|
+
AlertSeverity.ERROR: 0,
|
|
343
|
+
AlertSeverity.CRITICAL: 0
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
for alert in active_alerts:
|
|
347
|
+
severity_counts[alert.severity] += 1
|
|
348
|
+
|
|
349
|
+
return {
|
|
350
|
+
'total_rules': len(self._rules),
|
|
351
|
+
'active_alerts': len(active_alerts),
|
|
352
|
+
'severity_counts': severity_counts,
|
|
353
|
+
'alerts': [alert.model_dump() for alert in active_alerts]
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# Global alert manager
|
|
358
|
+
_global_alert_manager = AlertManager()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def get_alert_manager() -> AlertManager:
|
|
362
|
+
"""Get global alert manager."""
|
|
363
|
+
return _global_alert_manager
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def alert_on_condition(
|
|
367
|
+
name: str,
|
|
368
|
+
metric_name: str,
|
|
369
|
+
threshold: float,
|
|
370
|
+
comparison: str = "gt",
|
|
371
|
+
severity: AlertSeverity = AlertSeverity.WARNING,
|
|
372
|
+
description: Optional[str] = None
|
|
373
|
+
) -> AlertRule:
|
|
374
|
+
"""
|
|
375
|
+
Create and register alert rule for metric condition.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
name: Alert rule name
|
|
379
|
+
metric_name: Metric to monitor
|
|
380
|
+
threshold: Alert threshold
|
|
381
|
+
comparison: Comparison operator
|
|
382
|
+
severity: Alert severity
|
|
383
|
+
description: Alert description
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Created AlertRule
|
|
387
|
+
"""
|
|
388
|
+
rule = AlertRule(
|
|
389
|
+
name=name,
|
|
390
|
+
description=description or f"{metric_name} {comparison} {threshold}",
|
|
391
|
+
severity=severity,
|
|
392
|
+
metric_name=metric_name,
|
|
393
|
+
threshold=threshold,
|
|
394
|
+
comparison=comparison
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
_global_alert_manager.add_rule(rule)
|
|
398
|
+
return rule
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Monitoring Dashboard
|
|
3
|
+
|
|
4
|
+
Simple dashboard for system overview and monitoring data.
|
|
5
|
+
Following critical requirements - max 500 lines, functions < 20 lines.
|
|
6
|
+
|
|
7
|
+
Phase 2: Core Systems - Monitoring
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Dict, Any, List, Optional
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
16
|
+
|
|
17
|
+
from .health_check import HealthChecker, get_health_checker
|
|
18
|
+
from .metrics import MetricsCollector, get_metrics_collector
|
|
19
|
+
from .alerts import AlertManager, get_alert_manager
|
|
20
|
+
from ..utils.time import utc_now
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SystemStatus(BaseModel):
|
|
27
|
+
"""Overall system status."""
|
|
28
|
+
|
|
29
|
+
model_config = ConfigDict(
|
|
30
|
+
validate_assignment=True,
|
|
31
|
+
extra="forbid"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
status: str = Field(description="Overall system status")
|
|
35
|
+
uptime_seconds: float = Field(description="System uptime in seconds")
|
|
36
|
+
timestamp: datetime = Field(default_factory=utc_now, description="Status timestamp")
|
|
37
|
+
|
|
38
|
+
# Component counts
|
|
39
|
+
total_components: int = Field(description="Total monitored components")
|
|
40
|
+
healthy_components: int = Field(description="Healthy components")
|
|
41
|
+
unhealthy_components: int = Field(description="Unhealthy components")
|
|
42
|
+
|
|
43
|
+
# Alert counts
|
|
44
|
+
total_alerts: int = Field(description="Total active alerts")
|
|
45
|
+
critical_alerts: int = Field(description="Critical alerts")
|
|
46
|
+
|
|
47
|
+
# Metrics
|
|
48
|
+
total_metrics: int = Field(description="Total registered metrics")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DashboardData(BaseModel):
|
|
52
|
+
"""Complete dashboard data."""
|
|
53
|
+
|
|
54
|
+
model_config = ConfigDict(
|
|
55
|
+
validate_assignment=True,
|
|
56
|
+
extra="forbid"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
system_status: SystemStatus = Field(description="Overall system status")
|
|
60
|
+
health_summary: Dict[str, Any] = Field(description="Health check summary")
|
|
61
|
+
metrics_summary: Dict[str, Any] = Field(description="Metrics summary")
|
|
62
|
+
alerts_summary: Dict[str, Any] = Field(description="Alerts summary")
|
|
63
|
+
|
|
64
|
+
# Recent activity
|
|
65
|
+
recent_alerts: List[Dict[str, Any]] = Field(description="Recent alerts")
|
|
66
|
+
top_metrics: List[Dict[str, Any]] = Field(description="Top metrics by activity")
|
|
67
|
+
|
|
68
|
+
# Performance indicators
|
|
69
|
+
response_times: Dict[str, float] = Field(description="Average response times")
|
|
70
|
+
error_rates: Dict[str, float] = Field(description="Error rates by component")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class MonitoringDashboard:
|
|
74
|
+
"""
|
|
75
|
+
Monitoring dashboard aggregator.
|
|
76
|
+
|
|
77
|
+
Collects data from health checks, metrics, and alerts
|
|
78
|
+
to provide a unified system overview.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
health_checker: Optional[HealthChecker] = None,
|
|
84
|
+
metrics_collector: Optional[MetricsCollector] = None,
|
|
85
|
+
alert_manager: Optional[AlertManager] = None
|
|
86
|
+
):
|
|
87
|
+
"""Initialize monitoring dashboard."""
|
|
88
|
+
self.health_checker = health_checker or get_health_checker()
|
|
89
|
+
self.metrics_collector = metrics_collector or get_metrics_collector()
|
|
90
|
+
self.alert_manager = alert_manager or get_alert_manager()
|
|
91
|
+
|
|
92
|
+
self.start_time = utc_now()
|
|
93
|
+
self.logger = logging.getLogger("monitoring_dashboard")
|
|
94
|
+
|
|
95
|
+
async def get_system_status(self) -> SystemStatus:
|
|
96
|
+
"""Get overall system status."""
|
|
97
|
+
# Get health summary
|
|
98
|
+
health_data = self.health_checker.get_system_health()
|
|
99
|
+
|
|
100
|
+
# Get alerts summary
|
|
101
|
+
alerts_data = self.alert_manager.get_alert_summary()
|
|
102
|
+
|
|
103
|
+
# Get metrics summary
|
|
104
|
+
metrics_data = await self.metrics_collector.get_metrics_summary()
|
|
105
|
+
|
|
106
|
+
# Calculate uptime
|
|
107
|
+
uptime = (utc_now() - self.start_time).total_seconds()
|
|
108
|
+
|
|
109
|
+
# Determine overall status
|
|
110
|
+
if health_data['overall_status'] == 'unhealthy' or alerts_data['severity_counts']['critical'] > 0:
|
|
111
|
+
overall_status = "critical"
|
|
112
|
+
elif health_data['overall_status'] == 'degraded' or alerts_data['severity_counts']['error'] > 0:
|
|
113
|
+
overall_status = "degraded"
|
|
114
|
+
elif health_data['overall_status'] == 'healthy':
|
|
115
|
+
overall_status = "healthy"
|
|
116
|
+
else:
|
|
117
|
+
overall_status = "unknown"
|
|
118
|
+
|
|
119
|
+
return SystemStatus(
|
|
120
|
+
status=overall_status,
|
|
121
|
+
uptime_seconds=uptime,
|
|
122
|
+
timestamp=utc_now(),
|
|
123
|
+
total_components=health_data['summary']['total'],
|
|
124
|
+
healthy_components=health_data['summary']['healthy'],
|
|
125
|
+
unhealthy_components=health_data['summary']['unhealthy'],
|
|
126
|
+
total_alerts=alerts_data['active_alerts'],
|
|
127
|
+
critical_alerts=alerts_data['severity_counts']['critical'],
|
|
128
|
+
total_metrics=metrics_data['total_metrics']
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
async def get_dashboard_data(self) -> DashboardData:
|
|
132
|
+
"""Get complete dashboard data."""
|
|
133
|
+
# Collect all monitoring data
|
|
134
|
+
system_status = await self.get_system_status()
|
|
135
|
+
health_summary = self.health_checker.get_system_health()
|
|
136
|
+
metrics_summary = await self.metrics_collector.get_metrics_summary()
|
|
137
|
+
alerts_summary = self.alert_manager.get_alert_summary()
|
|
138
|
+
|
|
139
|
+
# Get recent alerts (last 10)
|
|
140
|
+
recent_alerts = alerts_summary['alerts'][-10:] if alerts_summary['alerts'] else []
|
|
141
|
+
|
|
142
|
+
# Get top metrics by sample count
|
|
143
|
+
top_metrics = []
|
|
144
|
+
if metrics_summary['metrics']:
|
|
145
|
+
sorted_metrics = sorted(
|
|
146
|
+
metrics_summary['metrics'].items(),
|
|
147
|
+
key=lambda x: x[1]['total_samples'],
|
|
148
|
+
reverse=True
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
top_metrics = [
|
|
152
|
+
{
|
|
153
|
+
'name': name,
|
|
154
|
+
'type': data['type'],
|
|
155
|
+
'current_value': data['current_value'],
|
|
156
|
+
'total_samples': data['total_samples']
|
|
157
|
+
}
|
|
158
|
+
for name, data in sorted_metrics[:10]
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# Calculate response times (placeholder)
|
|
162
|
+
response_times = await self._calculate_response_times()
|
|
163
|
+
|
|
164
|
+
# Calculate error rates (placeholder)
|
|
165
|
+
error_rates = await self._calculate_error_rates()
|
|
166
|
+
|
|
167
|
+
return DashboardData(
|
|
168
|
+
system_status=system_status,
|
|
169
|
+
health_summary=health_summary,
|
|
170
|
+
metrics_summary=metrics_summary,
|
|
171
|
+
alerts_summary=alerts_summary,
|
|
172
|
+
recent_alerts=recent_alerts,
|
|
173
|
+
top_metrics=top_metrics,
|
|
174
|
+
response_times=response_times,
|
|
175
|
+
error_rates=error_rates
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
async def _calculate_response_times(self) -> Dict[str, float]:
|
|
179
|
+
"""Calculate average response times by component."""
|
|
180
|
+
response_times = {}
|
|
181
|
+
|
|
182
|
+
# Look for timer metrics
|
|
183
|
+
metrics = await self.metrics_collector.get_all_metrics()
|
|
184
|
+
|
|
185
|
+
for name, metric in metrics.items():
|
|
186
|
+
if metric.metric_type == "timer" and "response_time" in name:
|
|
187
|
+
component = name.split("_")[0] # Extract component name
|
|
188
|
+
response_times[component] = metric.current_value
|
|
189
|
+
|
|
190
|
+
return response_times
|
|
191
|
+
|
|
192
|
+
async def _calculate_error_rates(self) -> Dict[str, float]:
|
|
193
|
+
"""Calculate error rates by component."""
|
|
194
|
+
error_rates = {}
|
|
195
|
+
|
|
196
|
+
# Look for error counter metrics
|
|
197
|
+
metrics = await self.metrics_collector.get_all_metrics()
|
|
198
|
+
|
|
199
|
+
for name, metric in metrics.items():
|
|
200
|
+
if metric.metric_type == "counter" and "error" in name:
|
|
201
|
+
component = name.split("_")[0] # Extract component name
|
|
202
|
+
|
|
203
|
+
# Calculate error rate (errors per minute)
|
|
204
|
+
if metric.total_samples > 0:
|
|
205
|
+
time_diff = (utc_now() - metric.last_updated).total_seconds()
|
|
206
|
+
if time_diff > 0:
|
|
207
|
+
error_rates[component] = (metric.current_value / time_diff) * 60
|
|
208
|
+
|
|
209
|
+
return error_rates
|
|
210
|
+
|
|
211
|
+
async def get_component_details(self, component_name: str) -> Dict[str, Any]:
|
|
212
|
+
"""Get detailed information for specific component."""
|
|
213
|
+
details = {
|
|
214
|
+
'name': component_name,
|
|
215
|
+
'timestamp': utc_now().isoformat()
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
# Health check details
|
|
219
|
+
health_data = self.health_checker.get_system_health()
|
|
220
|
+
if component_name in health_data['components']:
|
|
221
|
+
details['health'] = health_data['components'][component_name]
|
|
222
|
+
|
|
223
|
+
# Related metrics
|
|
224
|
+
metrics = await self.metrics_collector.get_all_metrics()
|
|
225
|
+
component_metrics = {}
|
|
226
|
+
|
|
227
|
+
for name, metric in metrics.items():
|
|
228
|
+
if component_name in name:
|
|
229
|
+
component_metrics[name] = {
|
|
230
|
+
'type': metric.metric_type,
|
|
231
|
+
'current_value': metric.current_value,
|
|
232
|
+
'total_samples': metric.total_samples,
|
|
233
|
+
'last_updated': metric.last_updated.isoformat()
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
details['metrics'] = component_metrics
|
|
237
|
+
|
|
238
|
+
# Related alerts
|
|
239
|
+
active_alerts = self.alert_manager.get_active_alerts()
|
|
240
|
+
component_alerts = [
|
|
241
|
+
alert.model_dump() for alert in active_alerts
|
|
242
|
+
if alert.component_name == component_name
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
details['alerts'] = component_alerts
|
|
246
|
+
|
|
247
|
+
return details
|
|
248
|
+
|
|
249
|
+
async def export_dashboard_json(self) -> str:
|
|
250
|
+
"""Export dashboard data as JSON."""
|
|
251
|
+
import json
|
|
252
|
+
|
|
253
|
+
dashboard_data = await self.get_dashboard_data()
|
|
254
|
+
|
|
255
|
+
# Convert to JSON-serializable format
|
|
256
|
+
data_dict = dashboard_data.model_dump()
|
|
257
|
+
|
|
258
|
+
# Custom JSON encoder for datetime objects
|
|
259
|
+
def json_encoder(obj):
|
|
260
|
+
if isinstance(obj, datetime):
|
|
261
|
+
return obj.isoformat()
|
|
262
|
+
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
|
|
263
|
+
|
|
264
|
+
return json.dumps(data_dict, indent=2, default=json_encoder)
|
|
265
|
+
|
|
266
|
+
def get_dashboard_stats(self) -> Dict[str, Any]:
|
|
267
|
+
"""Get dashboard statistics."""
|
|
268
|
+
return {
|
|
269
|
+
'uptime_seconds': (utc_now() - self.start_time).total_seconds(),
|
|
270
|
+
'start_time': self.start_time.isoformat(),
|
|
271
|
+
'components_monitored': len(self.health_checker._components),
|
|
272
|
+
'metrics_registered': len(self.metrics_collector._metrics),
|
|
273
|
+
'alert_rules': len(self.alert_manager._rules),
|
|
274
|
+
'active_alerts': len(self.alert_manager.get_active_alerts())
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# Global dashboard instance
|
|
279
|
+
_global_dashboard = MonitoringDashboard()
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def get_monitoring_dashboard() -> MonitoringDashboard:
|
|
283
|
+
"""Get global monitoring dashboard."""
|
|
284
|
+
return _global_dashboard
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
async def get_system_overview() -> Dict[str, Any]:
|
|
288
|
+
"""Get quick system overview."""
|
|
289
|
+
dashboard = get_monitoring_dashboard()
|
|
290
|
+
|
|
291
|
+
system_status = await dashboard.get_system_status()
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
'status': system_status.status,
|
|
295
|
+
'uptime_seconds': system_status.uptime_seconds,
|
|
296
|
+
'components': {
|
|
297
|
+
'total': system_status.total_components,
|
|
298
|
+
'healthy': system_status.healthy_components,
|
|
299
|
+
'unhealthy': system_status.unhealthy_components
|
|
300
|
+
},
|
|
301
|
+
'alerts': {
|
|
302
|
+
'total': system_status.total_alerts,
|
|
303
|
+
'critical': system_status.critical_alerts
|
|
304
|
+
},
|
|
305
|
+
'metrics_count': system_status.total_metrics,
|
|
306
|
+
'timestamp': system_status.timestamp.isoformat()
|
|
307
|
+
}
|