unrealon 1.1.5__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-1.1.5.dist-info/licenses → unrealon-2.0.4.dist-info}/LICENSE +1 -1
- unrealon-2.0.4.dist-info/METADATA +491 -0
- unrealon-2.0.4.dist-info/RECORD +129 -0
- {unrealon-1.1.5.dist-info → unrealon-2.0.4.dist-info}/WHEEL +2 -1
- unrealon-2.0.4.dist-info/entry_points.txt +3 -0
- unrealon-2.0.4.dist-info/top_level.txt +3 -0
- unrealon_browser/__init__.py +5 -2
- unrealon_browser/cli/browser_cli.py +18 -9
- unrealon_browser/cli/interactive_mode.py +18 -7
- unrealon_browser/core/browser_manager.py +76 -13
- unrealon_browser/dto/__init__.py +21 -0
- unrealon_browser/dto/bot_detection.py +175 -0
- unrealon_browser/dto/models/config.py +14 -1
- unrealon_browser/managers/__init__.py +4 -1
- unrealon_browser/managers/logger_bridge.py +3 -6
- unrealon_browser/managers/page_wait_manager.py +198 -0
- unrealon_browser/stealth/__init__.py +27 -0
- unrealon_browser/stealth/bypass_techniques.pyc +0 -0
- unrealon_browser/stealth/manager.pyc +0 -0
- unrealon_browser/stealth/nodriver_stealth.pyc +0 -0
- unrealon_browser/stealth/playwright_stealth.pyc +0 -0
- unrealon_browser/stealth/scanner_tester.pyc +0 -0
- unrealon_browser/stealth/undetected_chrome.pyc +0 -0
- unrealon_core/__init__.py +160 -0
- unrealon_core/config/__init__.py +16 -0
- unrealon_core/config/environment.py +98 -0
- unrealon_core/config/urls.py +93 -0
- unrealon_core/enums/__init__.py +24 -0
- unrealon_core/enums/status.py +216 -0
- unrealon_core/enums/types.py +240 -0
- unrealon_core/error_handling/__init__.py +45 -0
- unrealon_core/error_handling/circuit_breaker.py +292 -0
- unrealon_core/error_handling/error_context.py +324 -0
- unrealon_core/error_handling/recovery.py +371 -0
- unrealon_core/error_handling/retry.py +268 -0
- unrealon_core/exceptions/__init__.py +46 -0
- unrealon_core/exceptions/base.py +292 -0
- unrealon_core/exceptions/communication.py +22 -0
- unrealon_core/exceptions/driver.py +11 -0
- unrealon_core/exceptions/proxy.py +11 -0
- unrealon_core/exceptions/task.py +12 -0
- unrealon_core/exceptions/validation.py +17 -0
- unrealon_core/models/__init__.py +98 -0
- unrealon_core/models/arq_context.py +252 -0
- unrealon_core/models/arq_responses.py +125 -0
- unrealon_core/models/base.py +291 -0
- unrealon_core/models/bridge_stats.py +58 -0
- unrealon_core/models/communication.py +39 -0
- unrealon_core/models/config.py +47 -0
- unrealon_core/models/connection_stats.py +47 -0
- unrealon_core/models/driver.py +30 -0
- unrealon_core/models/driver_details.py +98 -0
- unrealon_core/models/logging.py +28 -0
- unrealon_core/models/task.py +21 -0
- unrealon_core/models/typed_responses.py +210 -0
- unrealon_core/models/websocket/__init__.py +91 -0
- unrealon_core/models/websocket/base.py +49 -0
- unrealon_core/models/websocket/config.py +200 -0
- unrealon_core/models/websocket/driver.py +215 -0
- unrealon_core/models/websocket/errors.py +138 -0
- unrealon_core/models/websocket/heartbeat.py +100 -0
- unrealon_core/models/websocket/logging.py +261 -0
- unrealon_core/models/websocket/proxy.py +496 -0
- unrealon_core/models/websocket/tasks.py +275 -0
- unrealon_core/models/websocket/utils.py +153 -0
- unrealon_core/models/websocket_session.py +144 -0
- unrealon_core/monitoring/__init__.py +43 -0
- unrealon_core/monitoring/alerts.py +398 -0
- unrealon_core/monitoring/dashboard.py +307 -0
- unrealon_core/monitoring/health_check.py +354 -0
- unrealon_core/monitoring/metrics.py +352 -0
- unrealon_core/utils/__init__.py +11 -0
- unrealon_core/utils/time.py +61 -0
- unrealon_core/version.py +219 -0
- unrealon_driver/__init__.py +88 -50
- unrealon_driver/core_module/__init__.py +34 -0
- unrealon_driver/core_module/base.py +184 -0
- unrealon_driver/core_module/config.py +30 -0
- unrealon_driver/core_module/event_manager.py +127 -0
- unrealon_driver/core_module/protocols.py +98 -0
- unrealon_driver/core_module/registry.py +146 -0
- unrealon_driver/decorators/__init__.py +15 -0
- unrealon_driver/decorators/retry.py +117 -0
- unrealon_driver/decorators/schedule.py +137 -0
- unrealon_driver/decorators/task.py +61 -0
- unrealon_driver/decorators/timing.py +132 -0
- unrealon_driver/driver/__init__.py +20 -0
- unrealon_driver/driver/communication/__init__.py +10 -0
- unrealon_driver/driver/communication/session.py +203 -0
- unrealon_driver/driver/communication/websocket_client.py +197 -0
- unrealon_driver/driver/core/__init__.py +10 -0
- unrealon_driver/driver/core/config.py +85 -0
- unrealon_driver/driver/core/driver.py +221 -0
- unrealon_driver/driver/factory/__init__.py +9 -0
- unrealon_driver/driver/factory/manager_factory.py +130 -0
- unrealon_driver/driver/lifecycle/__init__.py +11 -0
- unrealon_driver/driver/lifecycle/daemon.py +76 -0
- unrealon_driver/driver/lifecycle/initialization.py +97 -0
- unrealon_driver/driver/lifecycle/shutdown.py +48 -0
- unrealon_driver/driver/monitoring/__init__.py +9 -0
- unrealon_driver/driver/monitoring/health.py +63 -0
- unrealon_driver/driver/utilities/__init__.py +10 -0
- unrealon_driver/driver/utilities/logging.py +51 -0
- unrealon_driver/driver/utilities/serialization.py +61 -0
- unrealon_driver/managers/__init__.py +32 -0
- unrealon_driver/managers/base.py +174 -0
- unrealon_driver/managers/browser.py +98 -0
- unrealon_driver/managers/cache.py +116 -0
- unrealon_driver/managers/http.py +107 -0
- unrealon_driver/managers/logger.py +286 -0
- unrealon_driver/managers/proxy.py +99 -0
- unrealon_driver/managers/registry.py +87 -0
- unrealon_driver/managers/threading.py +54 -0
- unrealon_driver/managers/update.py +107 -0
- unrealon_driver/utils/__init__.py +9 -0
- unrealon_driver/utils/time.py +10 -0
- unrealon/__init__.py +0 -40
- unrealon-1.1.5.dist-info/METADATA +0 -621
- unrealon-1.1.5.dist-info/RECORD +0 -54
- unrealon-1.1.5.dist-info/entry_points.txt +0 -9
- unrealon_browser/managers/stealth.py +0 -388
- unrealon_driver/exceptions.py +0 -33
- unrealon_driver/html_analyzer/__init__.py +0 -32
- unrealon_driver/html_analyzer/cleaner.py +0 -657
- unrealon_driver/html_analyzer/config.py +0 -64
- unrealon_driver/html_analyzer/manager.py +0 -247
- unrealon_driver/html_analyzer/models.py +0 -115
- unrealon_driver/html_analyzer/websocket_analyzer.py +0 -157
- unrealon_driver/models/__init__.py +0 -31
- unrealon_driver/models/websocket.py +0 -98
- unrealon_driver/parser/__init__.py +0 -36
- unrealon_driver/parser/cli_manager.py +0 -142
- unrealon_driver/parser/daemon_manager.py +0 -403
- unrealon_driver/parser/managers/__init__.py +0 -25
- unrealon_driver/parser/managers/config.py +0 -293
- unrealon_driver/parser/managers/error.py +0 -412
- unrealon_driver/parser/managers/result.py +0 -321
- unrealon_driver/parser/parser_manager.py +0 -458
- unrealon_driver/smart_logging/__init__.py +0 -24
- unrealon_driver/smart_logging/models.py +0 -44
- unrealon_driver/smart_logging/smart_logger.py +0 -406
- unrealon_driver/smart_logging/unified_logger.py +0 -525
- unrealon_driver/websocket/__init__.py +0 -31
- unrealon_driver/websocket/client.py +0 -249
- unrealon_driver/websocket/config.py +0 -188
- unrealon_driver/websocket/manager.py +0 -90
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Health Check System
|
|
3
|
+
|
|
4
|
+
Comprehensive health monitoring for all system components.
|
|
5
|
+
Following critical requirements - max 500 lines, functions < 20 lines.
|
|
6
|
+
|
|
7
|
+
Phase 2: Core Systems - Monitoring
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Dict, Any, List, Optional, Callable
|
|
14
|
+
from enum import Enum
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
17
|
+
from ..utils.time import utc_now
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HealthStatus(str, Enum):
|
|
23
|
+
"""Health status levels."""
|
|
24
|
+
HEALTHY = "healthy"
|
|
25
|
+
DEGRADED = "degraded"
|
|
26
|
+
UNHEALTHY = "unhealthy"
|
|
27
|
+
UNKNOWN = "unknown"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class HealthCheckResult(BaseModel):
|
|
31
|
+
"""Result of a health check."""
|
|
32
|
+
|
|
33
|
+
model_config = ConfigDict(
|
|
34
|
+
validate_assignment=True,
|
|
35
|
+
extra="forbid"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
component: str = Field(description="Component name")
|
|
39
|
+
status: HealthStatus = Field(description="Health status")
|
|
40
|
+
message: str = Field(description="Status message")
|
|
41
|
+
timestamp: datetime = Field(description="Check timestamp")
|
|
42
|
+
duration_ms: float = Field(description="Check duration in milliseconds")
|
|
43
|
+
details: Dict[str, Any] = Field(default_factory=dict, description="Additional details")
|
|
44
|
+
|
|
45
|
+
def is_healthy(self) -> bool:
|
|
46
|
+
"""Check if component is healthy."""
|
|
47
|
+
return self.status == HealthStatus.HEALTHY
|
|
48
|
+
|
|
49
|
+
def is_degraded(self) -> bool:
|
|
50
|
+
"""Check if component is degraded."""
|
|
51
|
+
return self.status == HealthStatus.DEGRADED
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ComponentHealth(BaseModel):
|
|
55
|
+
"""Health information for a component."""
|
|
56
|
+
|
|
57
|
+
model_config = ConfigDict(
|
|
58
|
+
validate_assignment=True,
|
|
59
|
+
extra="forbid"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
name: str = Field(description="Component name")
|
|
63
|
+
current_status: HealthStatus = Field(description="Current health status")
|
|
64
|
+
last_check: datetime = Field(description="Last health check time")
|
|
65
|
+
check_count: int = Field(default=0, description="Total health checks performed")
|
|
66
|
+
failure_count: int = Field(default=0, description="Number of failed checks")
|
|
67
|
+
last_failure: Optional[datetime] = Field(default=None, description="Last failure time")
|
|
68
|
+
uptime_start: datetime = Field(description="When component became healthy")
|
|
69
|
+
|
|
70
|
+
def get_uptime_seconds(self) -> float:
|
|
71
|
+
"""Get uptime in seconds."""
|
|
72
|
+
if self.current_status != HealthStatus.HEALTHY:
|
|
73
|
+
return 0.0
|
|
74
|
+
return (utc_now() - self.uptime_start).total_seconds()
|
|
75
|
+
|
|
76
|
+
def get_failure_rate(self) -> float:
|
|
77
|
+
"""Get failure rate as percentage."""
|
|
78
|
+
if self.check_count == 0:
|
|
79
|
+
return 0.0
|
|
80
|
+
return (self.failure_count / self.check_count) * 100.0
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class HealthChecker:
|
|
84
|
+
"""
|
|
85
|
+
Health checker for system components.
|
|
86
|
+
|
|
87
|
+
Provides centralized health monitoring with configurable
|
|
88
|
+
check intervals and failure thresholds.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self, check_interval: float = 30.0):
|
|
92
|
+
"""Initialize health checker."""
|
|
93
|
+
self.check_interval = check_interval
|
|
94
|
+
self._components: Dict[str, ComponentHealth] = {}
|
|
95
|
+
self._check_functions: Dict[str, Callable] = {}
|
|
96
|
+
self._running = False
|
|
97
|
+
self._check_task: Optional[asyncio.Task] = None
|
|
98
|
+
self.logger = logging.getLogger("health_checker")
|
|
99
|
+
|
|
100
|
+
def register_component(
|
|
101
|
+
self,
|
|
102
|
+
name: str,
|
|
103
|
+
check_func: Callable[[], Any]
|
|
104
|
+
) -> None:
|
|
105
|
+
"""
|
|
106
|
+
Register component for health monitoring.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
name: Component name
|
|
110
|
+
check_func: Async function that returns health status
|
|
111
|
+
"""
|
|
112
|
+
self._check_functions[name] = check_func
|
|
113
|
+
self._components[name] = ComponentHealth(
|
|
114
|
+
name=name,
|
|
115
|
+
current_status=HealthStatus.UNKNOWN,
|
|
116
|
+
last_check=utc_now(),
|
|
117
|
+
uptime_start=utc_now()
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
self.logger.info(f"Registered health check for component: {name}")
|
|
121
|
+
|
|
122
|
+
async def check_component(self, name: str) -> HealthCheckResult:
|
|
123
|
+
"""
|
|
124
|
+
Perform health check for specific component.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
name: Component name
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
HealthCheckResult with check outcome
|
|
131
|
+
"""
|
|
132
|
+
if name not in self._check_functions:
|
|
133
|
+
return HealthCheckResult(
|
|
134
|
+
component=name,
|
|
135
|
+
status=HealthStatus.UNKNOWN,
|
|
136
|
+
message="Component not registered",
|
|
137
|
+
timestamp=utc_now(),
|
|
138
|
+
duration_ms=0.0
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
start_time = utc_now()
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
check_func = self._check_functions[name]
|
|
145
|
+
|
|
146
|
+
# Execute health check with timeout
|
|
147
|
+
result = await asyncio.wait_for(check_func(), timeout=10.0)
|
|
148
|
+
|
|
149
|
+
duration = (utc_now() - start_time).total_seconds() * 1000
|
|
150
|
+
|
|
151
|
+
# Determine status from result
|
|
152
|
+
if isinstance(result, bool):
|
|
153
|
+
status = HealthStatus.HEALTHY if result else HealthStatus.UNHEALTHY
|
|
154
|
+
message = "Health check passed" if result else "Health check failed"
|
|
155
|
+
details = {}
|
|
156
|
+
elif isinstance(result, dict):
|
|
157
|
+
status = HealthStatus(result.get('status', 'unknown'))
|
|
158
|
+
message = result.get('message', 'No message')
|
|
159
|
+
details = result.get('details', {})
|
|
160
|
+
else:
|
|
161
|
+
status = HealthStatus.HEALTHY
|
|
162
|
+
message = str(result)
|
|
163
|
+
details = {}
|
|
164
|
+
|
|
165
|
+
# Update component health
|
|
166
|
+
await self._update_component_health(name, status)
|
|
167
|
+
|
|
168
|
+
return HealthCheckResult(
|
|
169
|
+
component=name,
|
|
170
|
+
status=status,
|
|
171
|
+
message=message,
|
|
172
|
+
timestamp=utc_now(),
|
|
173
|
+
duration_ms=duration,
|
|
174
|
+
details=details
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
except asyncio.TimeoutError:
|
|
178
|
+
duration = (utc_now() - start_time).total_seconds() * 1000
|
|
179
|
+
await self._update_component_health(name, HealthStatus.UNHEALTHY)
|
|
180
|
+
|
|
181
|
+
return HealthCheckResult(
|
|
182
|
+
component=name,
|
|
183
|
+
status=HealthStatus.UNHEALTHY,
|
|
184
|
+
message="Health check timed out",
|
|
185
|
+
timestamp=utc_now(),
|
|
186
|
+
duration_ms=duration
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
duration = (utc_now() - start_time).total_seconds() * 1000
|
|
191
|
+
await self._update_component_health(name, HealthStatus.UNHEALTHY)
|
|
192
|
+
|
|
193
|
+
return HealthCheckResult(
|
|
194
|
+
component=name,
|
|
195
|
+
status=HealthStatus.UNHEALTHY,
|
|
196
|
+
message=f"Health check error: {str(e)}",
|
|
197
|
+
timestamp=utc_now(),
|
|
198
|
+
duration_ms=duration
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
async def check_all_components(self) -> List[HealthCheckResult]:
|
|
202
|
+
"""Check health of all registered components."""
|
|
203
|
+
results = []
|
|
204
|
+
|
|
205
|
+
for name in self._check_functions.keys():
|
|
206
|
+
result = await self.check_component(name)
|
|
207
|
+
results.append(result)
|
|
208
|
+
|
|
209
|
+
return results
|
|
210
|
+
|
|
211
|
+
async def _update_component_health(
|
|
212
|
+
self,
|
|
213
|
+
name: str,
|
|
214
|
+
status: HealthStatus
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Update component health tracking."""
|
|
217
|
+
if name not in self._components:
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
component = self._components[name]
|
|
221
|
+
component.last_check = utc_now()
|
|
222
|
+
component.check_count += 1
|
|
223
|
+
|
|
224
|
+
# Track status changes
|
|
225
|
+
if status != component.current_status:
|
|
226
|
+
if status == HealthStatus.HEALTHY:
|
|
227
|
+
component.uptime_start = utc_now()
|
|
228
|
+
self.logger.info(f"Component {name} became healthy")
|
|
229
|
+
else:
|
|
230
|
+
self.logger.warning(f"Component {name} status changed to {status}")
|
|
231
|
+
|
|
232
|
+
# Track failures
|
|
233
|
+
if status in [HealthStatus.UNHEALTHY, HealthStatus.DEGRADED]:
|
|
234
|
+
component.failure_count += 1
|
|
235
|
+
component.last_failure = utc_now()
|
|
236
|
+
|
|
237
|
+
component.current_status = status
|
|
238
|
+
|
|
239
|
+
async def start_monitoring(self) -> None:
|
|
240
|
+
"""Start continuous health monitoring."""
|
|
241
|
+
if self._running:
|
|
242
|
+
self.logger.warning("Health monitoring already running")
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
self._running = True
|
|
246
|
+
self._check_task = asyncio.create_task(self._monitoring_loop())
|
|
247
|
+
|
|
248
|
+
self.logger.info(f"Started health monitoring (interval: {self.check_interval}s)")
|
|
249
|
+
|
|
250
|
+
async def stop_monitoring(self) -> None:
|
|
251
|
+
"""Stop health monitoring."""
|
|
252
|
+
self._running = False
|
|
253
|
+
|
|
254
|
+
if self._check_task and not self._check_task.done():
|
|
255
|
+
self._check_task.cancel()
|
|
256
|
+
try:
|
|
257
|
+
await self._check_task
|
|
258
|
+
except asyncio.CancelledError:
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
self.logger.info("Stopped health monitoring")
|
|
262
|
+
|
|
263
|
+
async def _monitoring_loop(self) -> None:
|
|
264
|
+
"""Background monitoring loop."""
|
|
265
|
+
try:
|
|
266
|
+
while self._running:
|
|
267
|
+
await self.check_all_components()
|
|
268
|
+
await asyncio.sleep(self.check_interval)
|
|
269
|
+
|
|
270
|
+
except asyncio.CancelledError:
|
|
271
|
+
self.logger.debug("Monitoring loop cancelled")
|
|
272
|
+
raise
|
|
273
|
+
except Exception as e:
|
|
274
|
+
self.logger.error(f"Error in monitoring loop: {e}")
|
|
275
|
+
|
|
276
|
+
def get_system_health(self) -> Dict[str, Any]:
|
|
277
|
+
"""Get overall system health summary."""
|
|
278
|
+
if not self._components:
|
|
279
|
+
return {
|
|
280
|
+
'overall_status': HealthStatus.UNKNOWN,
|
|
281
|
+
'components': {},
|
|
282
|
+
'summary': {
|
|
283
|
+
'total': 0,
|
|
284
|
+
'healthy': 0,
|
|
285
|
+
'degraded': 0,
|
|
286
|
+
'unhealthy': 0,
|
|
287
|
+
'unknown': 0
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
# Count component statuses
|
|
292
|
+
status_counts = {
|
|
293
|
+
HealthStatus.HEALTHY: 0,
|
|
294
|
+
HealthStatus.DEGRADED: 0,
|
|
295
|
+
HealthStatus.UNHEALTHY: 0,
|
|
296
|
+
HealthStatus.UNKNOWN: 0
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
components_data = {}
|
|
300
|
+
|
|
301
|
+
for name, component in self._components.items():
|
|
302
|
+
status_counts[component.current_status] += 1
|
|
303
|
+
components_data[name] = {
|
|
304
|
+
'status': component.current_status,
|
|
305
|
+
'last_check': component.last_check.isoformat(),
|
|
306
|
+
'uptime_seconds': component.get_uptime_seconds(),
|
|
307
|
+
'failure_rate': component.get_failure_rate(),
|
|
308
|
+
'check_count': component.check_count
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
# Determine overall status
|
|
312
|
+
if status_counts[HealthStatus.UNHEALTHY] > 0:
|
|
313
|
+
overall_status = HealthStatus.UNHEALTHY
|
|
314
|
+
elif status_counts[HealthStatus.DEGRADED] > 0:
|
|
315
|
+
overall_status = HealthStatus.DEGRADED
|
|
316
|
+
elif status_counts[HealthStatus.HEALTHY] == len(self._components):
|
|
317
|
+
overall_status = HealthStatus.HEALTHY
|
|
318
|
+
else:
|
|
319
|
+
overall_status = HealthStatus.UNKNOWN
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
'overall_status': overall_status,
|
|
323
|
+
'components': components_data,
|
|
324
|
+
'summary': {
|
|
325
|
+
'total': len(self._components),
|
|
326
|
+
'healthy': status_counts[HealthStatus.HEALTHY],
|
|
327
|
+
'degraded': status_counts[HealthStatus.DEGRADED],
|
|
328
|
+
'unhealthy': status_counts[HealthStatus.UNHEALTHY],
|
|
329
|
+
'unknown': status_counts[HealthStatus.UNKNOWN]
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# Global health checker instance
|
|
335
|
+
_global_health_checker = HealthChecker()
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def get_health_checker() -> HealthChecker:
|
|
339
|
+
"""Get global health checker instance."""
|
|
340
|
+
return _global_health_checker
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def health_check_decorator(component_name: str):
|
|
344
|
+
"""
|
|
345
|
+
Decorator to register function as health check.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
component_name: Name of component to monitor
|
|
349
|
+
"""
|
|
350
|
+
def decorator(func: Callable) -> Callable:
|
|
351
|
+
_global_health_checker.register_component(component_name, func)
|
|
352
|
+
return func
|
|
353
|
+
|
|
354
|
+
return decorator
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metrics System
|
|
3
|
+
|
|
4
|
+
Lightweight metrics collection and aggregation.
|
|
5
|
+
Following critical requirements - max 500 lines, functions < 20 lines.
|
|
6
|
+
|
|
7
|
+
Phase 2: Core Systems - Monitoring
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Dict, Any, List, Optional, Union
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from collections import defaultdict, deque
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
18
|
+
|
|
19
|
+
from ..utils.time import utc_now
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MetricType(str, Enum):
|
|
26
|
+
"""Metric types."""
|
|
27
|
+
COUNTER = "counter"
|
|
28
|
+
GAUGE = "gauge"
|
|
29
|
+
HISTOGRAM = "histogram"
|
|
30
|
+
TIMER = "timer"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class MetricValue(BaseModel):
|
|
34
|
+
"""Single metric value with timestamp."""
|
|
35
|
+
|
|
36
|
+
model_config = ConfigDict(
|
|
37
|
+
validate_assignment=True,
|
|
38
|
+
extra="forbid"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
value: float = Field(description="Metric value")
|
|
42
|
+
timestamp: datetime = Field(description="When value was recorded")
|
|
43
|
+
labels: Dict[str, str] = Field(default_factory=dict, description="Metric labels")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Metric(BaseModel):
|
|
47
|
+
"""Metric definition and current state."""
|
|
48
|
+
|
|
49
|
+
model_config = ConfigDict(
|
|
50
|
+
validate_assignment=True,
|
|
51
|
+
extra="forbid"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
name: str = Field(description="Metric name")
|
|
55
|
+
metric_type: MetricType = Field(description="Type of metric")
|
|
56
|
+
description: str = Field(description="Metric description")
|
|
57
|
+
current_value: float = Field(default=0.0, description="Current metric value")
|
|
58
|
+
total_samples: int = Field(default=0, description="Total samples recorded")
|
|
59
|
+
last_updated: datetime = Field(description="Last update timestamp")
|
|
60
|
+
labels: Dict[str, str] = Field(default_factory=dict, description="Default labels")
|
|
61
|
+
|
|
62
|
+
# Histogram-specific fields
|
|
63
|
+
buckets: Optional[List[float]] = Field(default=None, description="Histogram buckets")
|
|
64
|
+
bucket_counts: Optional[Dict[str, int]] = Field(default=None, description="Bucket counts")
|
|
65
|
+
|
|
66
|
+
def update_value(self, value: float, labels: Optional[Dict[str, str]] = None) -> None:
|
|
67
|
+
"""Update metric value."""
|
|
68
|
+
self.current_value = value
|
|
69
|
+
self.total_samples += 1
|
|
70
|
+
self.last_updated = utc_now()
|
|
71
|
+
|
|
72
|
+
if labels:
|
|
73
|
+
self.labels.update(labels)
|
|
74
|
+
|
|
75
|
+
def increment(self, amount: float = 1.0) -> None:
|
|
76
|
+
"""Increment counter metric."""
|
|
77
|
+
if self.metric_type == MetricType.COUNTER:
|
|
78
|
+
self.current_value += amount
|
|
79
|
+
self.total_samples += 1
|
|
80
|
+
self.last_updated = utc_now()
|
|
81
|
+
|
|
82
|
+
def observe_histogram(self, value: float) -> None:
|
|
83
|
+
"""Observe value for histogram metric."""
|
|
84
|
+
if self.metric_type != MetricType.HISTOGRAM or not self.buckets:
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
if not self.bucket_counts:
|
|
88
|
+
self.bucket_counts = {str(bucket): 0 for bucket in self.buckets}
|
|
89
|
+
|
|
90
|
+
# Find appropriate bucket
|
|
91
|
+
for bucket in self.buckets:
|
|
92
|
+
if value <= bucket:
|
|
93
|
+
self.bucket_counts[str(bucket)] += 1
|
|
94
|
+
|
|
95
|
+
self.total_samples += 1
|
|
96
|
+
self.last_updated = utc_now()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MetricsCollector:
|
|
100
|
+
"""
|
|
101
|
+
Lightweight metrics collector.
|
|
102
|
+
|
|
103
|
+
Collects and aggregates metrics for monitoring and alerting.
|
|
104
|
+
Designed for simplicity and low overhead.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(self, max_history: int = 1000):
|
|
108
|
+
"""Initialize metrics collector."""
|
|
109
|
+
self.max_history = max_history
|
|
110
|
+
self._metrics: Dict[str, Metric] = {}
|
|
111
|
+
self._history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=max_history))
|
|
112
|
+
self._lock = asyncio.Lock()
|
|
113
|
+
self.logger = logging.getLogger("metrics_collector")
|
|
114
|
+
|
|
115
|
+
async def register_metric(
|
|
116
|
+
self,
|
|
117
|
+
name: str,
|
|
118
|
+
metric_type: MetricType,
|
|
119
|
+
description: str,
|
|
120
|
+
labels: Optional[Dict[str, str]] = None,
|
|
121
|
+
buckets: Optional[List[float]] = None
|
|
122
|
+
) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Register new metric.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
name: Metric name
|
|
128
|
+
metric_type: Type of metric
|
|
129
|
+
description: Metric description
|
|
130
|
+
labels: Default labels
|
|
131
|
+
buckets: Histogram buckets (for histogram metrics)
|
|
132
|
+
"""
|
|
133
|
+
async with self._lock:
|
|
134
|
+
if name in self._metrics:
|
|
135
|
+
self.logger.warning(f"Metric {name} already registered")
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
metric = Metric(
|
|
139
|
+
name=name,
|
|
140
|
+
metric_type=metric_type,
|
|
141
|
+
description=description,
|
|
142
|
+
last_updated=utc_now(),
|
|
143
|
+
labels=labels or {},
|
|
144
|
+
buckets=buckets,
|
|
145
|
+
bucket_counts={str(b): 0 for b in buckets} if buckets else None
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
self._metrics[name] = metric
|
|
149
|
+
self.logger.debug(f"Registered metric: {name} ({metric_type})")
|
|
150
|
+
|
|
151
|
+
async def record_counter(
|
|
152
|
+
self,
|
|
153
|
+
name: str,
|
|
154
|
+
value: float = 1.0,
|
|
155
|
+
labels: Optional[Dict[str, str]] = None
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Record counter metric."""
|
|
158
|
+
await self._record_metric(name, value, MetricType.COUNTER, labels)
|
|
159
|
+
|
|
160
|
+
async def record_gauge(
|
|
161
|
+
self,
|
|
162
|
+
name: str,
|
|
163
|
+
value: float,
|
|
164
|
+
labels: Optional[Dict[str, str]] = None
|
|
165
|
+
) -> None:
|
|
166
|
+
"""Record gauge metric."""
|
|
167
|
+
await self._record_metric(name, value, MetricType.GAUGE, labels)
|
|
168
|
+
|
|
169
|
+
async def record_histogram(
|
|
170
|
+
self,
|
|
171
|
+
name: str,
|
|
172
|
+
value: float,
|
|
173
|
+
labels: Optional[Dict[str, str]] = None
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Record histogram metric."""
|
|
176
|
+
await self._record_metric(name, value, MetricType.HISTOGRAM, labels)
|
|
177
|
+
|
|
178
|
+
async def record_timer(
|
|
179
|
+
self,
|
|
180
|
+
name: str,
|
|
181
|
+
duration_seconds: float,
|
|
182
|
+
labels: Optional[Dict[str, str]] = None
|
|
183
|
+
) -> None:
|
|
184
|
+
"""Record timer metric."""
|
|
185
|
+
await self._record_metric(name, duration_seconds, MetricType.TIMER, labels)
|
|
186
|
+
|
|
187
|
+
async def _record_metric(
|
|
188
|
+
self,
|
|
189
|
+
name: str,
|
|
190
|
+
value: float,
|
|
191
|
+
expected_type: MetricType,
|
|
192
|
+
labels: Optional[Dict[str, str]] = None
|
|
193
|
+
) -> None:
|
|
194
|
+
"""Internal method to record metric value."""
|
|
195
|
+
async with self._lock:
|
|
196
|
+
if name not in self._metrics:
|
|
197
|
+
# Auto-register metric
|
|
198
|
+
await self.register_metric(name, expected_type, f"Auto-registered {expected_type} metric")
|
|
199
|
+
|
|
200
|
+
metric = self._metrics[name]
|
|
201
|
+
|
|
202
|
+
if metric.metric_type != expected_type:
|
|
203
|
+
self.logger.error(f"Metric {name} type mismatch: expected {expected_type}, got {metric.metric_type}")
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
# Update metric based on type
|
|
207
|
+
if metric.metric_type == MetricType.COUNTER:
|
|
208
|
+
metric.increment(value)
|
|
209
|
+
elif metric.metric_type == MetricType.HISTOGRAM:
|
|
210
|
+
metric.observe_histogram(value)
|
|
211
|
+
else:
|
|
212
|
+
metric.update_value(value, labels)
|
|
213
|
+
|
|
214
|
+
# Store in history
|
|
215
|
+
metric_value = MetricValue(
|
|
216
|
+
value=value,
|
|
217
|
+
timestamp=utc_now(),
|
|
218
|
+
labels=labels or {}
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
self._history[name].append(metric_value)
|
|
222
|
+
|
|
223
|
+
async def get_metric(self, name: str) -> Optional[Metric]:
|
|
224
|
+
"""Get metric by name."""
|
|
225
|
+
async with self._lock:
|
|
226
|
+
return self._metrics.get(name)
|
|
227
|
+
|
|
228
|
+
async def get_all_metrics(self) -> Dict[str, Metric]:
|
|
229
|
+
"""Get all registered metrics."""
|
|
230
|
+
async with self._lock:
|
|
231
|
+
return self._metrics.copy()
|
|
232
|
+
|
|
233
|
+
async def get_metric_history(
|
|
234
|
+
self,
|
|
235
|
+
name: str,
|
|
236
|
+
limit: Optional[int] = None
|
|
237
|
+
) -> List[MetricValue]:
|
|
238
|
+
"""Get metric history."""
|
|
239
|
+
async with self._lock:
|
|
240
|
+
history = list(self._history.get(name, []))
|
|
241
|
+
|
|
242
|
+
if limit:
|
|
243
|
+
history = history[-limit:]
|
|
244
|
+
|
|
245
|
+
return history
|
|
246
|
+
|
|
247
|
+
async def get_metrics_summary(self) -> Dict[str, Any]:
|
|
248
|
+
"""Get summary of all metrics."""
|
|
249
|
+
async with self._lock:
|
|
250
|
+
summary = {
|
|
251
|
+
'total_metrics': len(self._metrics),
|
|
252
|
+
'metrics_by_type': defaultdict(int),
|
|
253
|
+
'metrics': {}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
for name, metric in self._metrics.items():
|
|
257
|
+
summary['metrics_by_type'][metric.metric_type] += 1
|
|
258
|
+
|
|
259
|
+
summary['metrics'][name] = {
|
|
260
|
+
'type': metric.metric_type,
|
|
261
|
+
'current_value': metric.current_value,
|
|
262
|
+
'total_samples': metric.total_samples,
|
|
263
|
+
'last_updated': metric.last_updated.isoformat(),
|
|
264
|
+
'description': metric.description
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
# Add histogram-specific info
|
|
268
|
+
if metric.metric_type == MetricType.HISTOGRAM and metric.bucket_counts:
|
|
269
|
+
summary['metrics'][name]['buckets'] = metric.bucket_counts
|
|
270
|
+
|
|
271
|
+
return summary
|
|
272
|
+
|
|
273
|
+
async def reset_metric(self, name: str) -> bool:
|
|
274
|
+
"""Reset metric to initial state."""
|
|
275
|
+
async with self._lock:
|
|
276
|
+
if name not in self._metrics:
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
metric = self._metrics[name]
|
|
280
|
+
metric.current_value = 0.0
|
|
281
|
+
metric.total_samples = 0
|
|
282
|
+
metric.last_updated = utc_now()
|
|
283
|
+
|
|
284
|
+
if metric.bucket_counts:
|
|
285
|
+
metric.bucket_counts = {bucket: 0 for bucket in metric.bucket_counts}
|
|
286
|
+
|
|
287
|
+
# Clear history
|
|
288
|
+
self._history[name].clear()
|
|
289
|
+
|
|
290
|
+
self.logger.info(f"Reset metric: {name}")
|
|
291
|
+
return True
|
|
292
|
+
|
|
293
|
+
async def clear_all_metrics(self) -> None:
|
|
294
|
+
"""Clear all metrics and history."""
|
|
295
|
+
async with self._lock:
|
|
296
|
+
self._metrics.clear()
|
|
297
|
+
self._history.clear()
|
|
298
|
+
self.logger.info("Cleared all metrics")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# Global metrics collector
|
|
302
|
+
_global_metrics = MetricsCollector()
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def get_metrics_collector() -> MetricsCollector:
|
|
306
|
+
"""Get global metrics collector."""
|
|
307
|
+
return _global_metrics
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def reset_global_metrics() -> None:
|
|
311
|
+
"""Reset global metrics collector (for testing)."""
|
|
312
|
+
global _global_metrics
|
|
313
|
+
_global_metrics = MetricsCollector()
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# Convenience functions
|
|
317
|
+
async def counter(name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None) -> None:
|
|
318
|
+
"""Record counter metric."""
|
|
319
|
+
await _global_metrics.record_counter(name, value, labels)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
async def gauge(name: str, value: float, labels: Optional[Dict[str, str]] = None) -> None:
|
|
323
|
+
"""Record gauge metric."""
|
|
324
|
+
await _global_metrics.record_gauge(name, value, labels)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
async def histogram(name: str, value: float, labels: Optional[Dict[str, str]] = None) -> None:
|
|
328
|
+
"""Record histogram metric."""
|
|
329
|
+
await _global_metrics.record_histogram(name, value, labels)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
class MetricTimer:
|
|
333
|
+
"""Context manager for timing operations."""
|
|
334
|
+
|
|
335
|
+
def __init__(self, metric_name: str, labels: Optional[Dict[str, str]] = None):
|
|
336
|
+
self.metric_name = metric_name
|
|
337
|
+
self.labels = labels
|
|
338
|
+
self.start_time: Optional[datetime] = None
|
|
339
|
+
|
|
340
|
+
async def __aenter__(self):
|
|
341
|
+
self.start_time = utc_now()
|
|
342
|
+
return self
|
|
343
|
+
|
|
344
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
345
|
+
if self.start_time:
|
|
346
|
+
duration = (utc_now() - self.start_time).total_seconds()
|
|
347
|
+
await _global_metrics.record_timer(self.metric_name, duration, self.labels)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def timer(metric_name: str, labels: Optional[Dict[str, str]] = None) -> MetricTimer:
|
|
351
|
+
"""Create metric timer context manager."""
|
|
352
|
+
return MetricTimer(metric_name, labels)
|