unrealon 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.1.dist-info/METADATA +722 -0
- unrealon-1.1.1.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info}/WHEEL +1 -1
- unrealon-1.1.1.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -1,747 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Health Monitor - Layer 3 Infrastructure Service
|
|
3
|
-
|
|
4
|
-
Comprehensive system health monitoring with diagnostics, alerting, and trend analysis.
|
|
5
|
-
Provides real-time health status for all SDK components with intelligent alerting
|
|
6
|
-
and predictive health analytics.
|
|
7
|
-
|
|
8
|
-
Features:
|
|
9
|
-
- Multi-component health monitoring
|
|
10
|
-
- Real-time health checks with configurable frequencies
|
|
11
|
-
- Intelligent alerting with severity-based routing
|
|
12
|
-
- Health trend analysis and predictions
|
|
13
|
-
- Dependency tracking and impact analysis
|
|
14
|
-
- System resource monitoring (CPU, memory, disk)
|
|
15
|
-
- Connection quality monitoring
|
|
16
|
-
- Automatic recovery recommendations
|
|
17
|
-
- Health dashboard metrics
|
|
18
|
-
- Integration with performance monitoring
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
import asyncio
|
|
22
|
-
import logging
|
|
23
|
-
import time
|
|
24
|
-
import threading
|
|
25
|
-
import psutil
|
|
26
|
-
import statistics
|
|
27
|
-
from typing import Dict, List, Optional, Any, Callable, Set, Union
|
|
28
|
-
from datetime import datetime, timezone, timedelta
|
|
29
|
-
from collections import defaultdict, deque
|
|
30
|
-
from dataclasses import dataclass, field
|
|
31
|
-
|
|
32
|
-
# Core SDK components
|
|
33
|
-
from unrealon_sdk.src.core.config import AdapterConfig
|
|
34
|
-
from unrealon_sdk.src.utils import generate_correlation_id
|
|
35
|
-
|
|
36
|
-
# DTO models
|
|
37
|
-
from unrealon_sdk.src.dto.logging import SDKEventType, SDKSeverity
|
|
38
|
-
from unrealon_sdk.src.dto.health import (
|
|
39
|
-
ComponentStatus,
|
|
40
|
-
HealthCheckType,
|
|
41
|
-
AlertSeverity,
|
|
42
|
-
HealthCheckFrequency,
|
|
43
|
-
ConnectionHealthStatus,
|
|
44
|
-
ComponentHealth,
|
|
45
|
-
HealthCheckConfig,
|
|
46
|
-
HealthAlert,
|
|
47
|
-
SystemHealthSummary,
|
|
48
|
-
HealthCheckResult,
|
|
49
|
-
HealthTrend,
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
# Development logging
|
|
53
|
-
from typing import TYPE_CHECKING
|
|
54
|
-
|
|
55
|
-
if TYPE_CHECKING:
|
|
56
|
-
from unrealon_sdk.src.enterprise.logging import DevelopmentLogger
|
|
57
|
-
|
|
58
|
-
logger = logging.getLogger(__name__)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@dataclass
|
|
62
|
-
class HealthMonitorConfig:
|
|
63
|
-
"""Configuration for health monitor."""
|
|
64
|
-
|
|
65
|
-
# Check intervals
|
|
66
|
-
default_check_interval_seconds: float = 30.0
|
|
67
|
-
critical_check_interval_seconds: float = 5.0
|
|
68
|
-
degraded_check_interval_seconds: float = 10.0
|
|
69
|
-
|
|
70
|
-
# Thresholds
|
|
71
|
-
cpu_warning_threshold: float = 70.0
|
|
72
|
-
cpu_critical_threshold: float = 90.0
|
|
73
|
-
memory_warning_threshold: float = 80.0
|
|
74
|
-
memory_critical_threshold: float = 95.0
|
|
75
|
-
disk_warning_threshold: float = 85.0
|
|
76
|
-
disk_critical_threshold: float = 95.0
|
|
77
|
-
|
|
78
|
-
# Response time thresholds (ms)
|
|
79
|
-
response_time_warning_threshold: float = 1000.0
|
|
80
|
-
response_time_critical_threshold: float = 5000.0
|
|
81
|
-
|
|
82
|
-
# Error rate thresholds (%)
|
|
83
|
-
error_rate_warning_threshold: float = 5.0
|
|
84
|
-
error_rate_critical_threshold: float = 15.0
|
|
85
|
-
|
|
86
|
-
# Alert settings
|
|
87
|
-
enable_alerting: bool = True
|
|
88
|
-
alert_cooldown_seconds: float = 300.0 # 5 minutes
|
|
89
|
-
auto_resolve_alerts: bool = True
|
|
90
|
-
|
|
91
|
-
# Trend analysis
|
|
92
|
-
enable_trend_analysis: bool = True
|
|
93
|
-
trend_analysis_hours: int = 24
|
|
94
|
-
min_data_points_for_trend: int = 10
|
|
95
|
-
|
|
96
|
-
# Recovery
|
|
97
|
-
enable_auto_recovery: bool = False
|
|
98
|
-
max_auto_recovery_attempts: int = 3
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
class HealthMonitor:
|
|
102
|
-
"""
|
|
103
|
-
Enterprise-grade health monitoring system.
|
|
104
|
-
|
|
105
|
-
Provides comprehensive health monitoring for all SDK components
|
|
106
|
-
with intelligent alerting, trend analysis, and recovery recommendations.
|
|
107
|
-
"""
|
|
108
|
-
|
|
109
|
-
def __init__(
|
|
110
|
-
self,
|
|
111
|
-
config: AdapterConfig,
|
|
112
|
-
health_config: Optional[HealthMonitorConfig] = None,
|
|
113
|
-
dev_logger: Optional["DevelopmentLogger"] = None,
|
|
114
|
-
):
|
|
115
|
-
"""Initialize health monitor."""
|
|
116
|
-
self.config = config
|
|
117
|
-
self.health_config = health_config or HealthMonitorConfig()
|
|
118
|
-
self.dev_logger = dev_logger
|
|
119
|
-
|
|
120
|
-
# Thread safety
|
|
121
|
-
self._lock = threading.RLock()
|
|
122
|
-
|
|
123
|
-
# Component tracking
|
|
124
|
-
self._components: Dict[str, ComponentHealth] = {}
|
|
125
|
-
self._health_checks: Dict[str, HealthCheckConfig] = {}
|
|
126
|
-
self._check_results: Dict[str, deque[HealthCheckResult]] = defaultdict(
|
|
127
|
-
lambda: deque(maxlen=1000)
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
# Alert management
|
|
131
|
-
self._active_alerts: Dict[str, HealthAlert] = {}
|
|
132
|
-
self._alert_history: deque[HealthAlert] = deque(maxlen=10000)
|
|
133
|
-
self._alert_cooldowns: Dict[str, datetime] = {}
|
|
134
|
-
|
|
135
|
-
# Health trend data
|
|
136
|
-
self._health_metrics: Dict[str, deque[float]] = defaultdict(
|
|
137
|
-
lambda: deque(maxlen=1000)
|
|
138
|
-
)
|
|
139
|
-
self._trend_analysis: Dict[str, HealthTrend] = {}
|
|
140
|
-
|
|
141
|
-
# System metrics
|
|
142
|
-
self._system_metrics: Dict[str, float] = {}
|
|
143
|
-
self._system_history: deque[Dict[str, float]] = deque(maxlen=1000)
|
|
144
|
-
|
|
145
|
-
# Background tasks
|
|
146
|
-
self._monitor_task: Optional[asyncio.Task[None]] = None
|
|
147
|
-
self._trend_analysis_task: Optional[asyncio.Task[None]] = None
|
|
148
|
-
self._system_metrics_task: Optional[asyncio.Task[None]] = None
|
|
149
|
-
self._shutdown = False
|
|
150
|
-
|
|
151
|
-
# Health check callbacks
|
|
152
|
-
self._custom_checks: Dict[str, Callable[[], HealthCheckResult]] = {}
|
|
153
|
-
|
|
154
|
-
# Statistics
|
|
155
|
-
self._total_checks = 0
|
|
156
|
-
self._failed_checks = 0
|
|
157
|
-
self._monitoring_start_time = datetime.now(timezone.utc)
|
|
158
|
-
|
|
159
|
-
self._log_info("Health monitor initialized")
|
|
160
|
-
|
|
161
|
-
async def start(self) -> None:
|
|
162
|
-
"""Start health monitoring."""
|
|
163
|
-
if self._monitor_task is None:
|
|
164
|
-
self._monitor_task = asyncio.create_task(self._monitoring_loop())
|
|
165
|
-
|
|
166
|
-
if self._trend_analysis_task is None and self.health_config.enable_trend_analysis:
|
|
167
|
-
self._trend_analysis_task = asyncio.create_task(self._trend_analysis_loop())
|
|
168
|
-
|
|
169
|
-
if self._system_metrics_task is None:
|
|
170
|
-
self._system_metrics_task = asyncio.create_task(self._system_metrics_loop())
|
|
171
|
-
|
|
172
|
-
self._log_info("Health monitor started")
|
|
173
|
-
|
|
174
|
-
async def stop(self) -> None:
|
|
175
|
-
"""Stop health monitoring."""
|
|
176
|
-
self._shutdown = True
|
|
177
|
-
|
|
178
|
-
# Cancel background tasks
|
|
179
|
-
for task in [self._monitor_task, self._trend_analysis_task, self._system_metrics_task]:
|
|
180
|
-
if task:
|
|
181
|
-
task.cancel()
|
|
182
|
-
try:
|
|
183
|
-
await task
|
|
184
|
-
except asyncio.CancelledError:
|
|
185
|
-
pass
|
|
186
|
-
|
|
187
|
-
self._log_info("Health monitor stopped")
|
|
188
|
-
|
|
189
|
-
def register_component(
|
|
190
|
-
self,
|
|
191
|
-
component_id: str,
|
|
192
|
-
component_name: str,
|
|
193
|
-
component_type: str,
|
|
194
|
-
health_check_config: Optional[HealthCheckConfig] = None,
|
|
195
|
-
custom_check: Optional[Callable[[], HealthCheckResult]] = None,
|
|
196
|
-
) -> None:
|
|
197
|
-
"""Register a component for health monitoring."""
|
|
198
|
-
with self._lock:
|
|
199
|
-
# Create component health record
|
|
200
|
-
component_health = ComponentHealth(
|
|
201
|
-
component_id=component_id,
|
|
202
|
-
component_name=component_name,
|
|
203
|
-
component_type=component_type,
|
|
204
|
-
status=ComponentStatus.UNKNOWN,
|
|
205
|
-
environment=getattr(self.config, 'environment', 'unknown'),
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
self._components[component_id] = component_health
|
|
209
|
-
|
|
210
|
-
# Configure health checks
|
|
211
|
-
if health_check_config:
|
|
212
|
-
self._health_checks[component_id] = health_check_config
|
|
213
|
-
else:
|
|
214
|
-
# Default health check config
|
|
215
|
-
self._health_checks[component_id] = HealthCheckConfig(
|
|
216
|
-
check_type=HealthCheckType.CUSTOM,
|
|
217
|
-
frequency=HealthCheckFrequency.NORMAL,
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
# Register custom check if provided
|
|
221
|
-
if custom_check:
|
|
222
|
-
self._custom_checks[component_id] = custom_check
|
|
223
|
-
|
|
224
|
-
self._log_info(f"Registered component: {component_name} ({component_id})")
|
|
225
|
-
|
|
226
|
-
def unregister_component(self, component_id: str) -> None:
|
|
227
|
-
"""Unregister a component from health monitoring."""
|
|
228
|
-
with self._lock:
|
|
229
|
-
if component_id in self._components:
|
|
230
|
-
component_name = self._components[component_id].component_name
|
|
231
|
-
|
|
232
|
-
# Clean up all related data
|
|
233
|
-
del self._components[component_id]
|
|
234
|
-
self._health_checks.pop(component_id, None)
|
|
235
|
-
self._check_results.pop(component_id, None)
|
|
236
|
-
self._custom_checks.pop(component_id, None)
|
|
237
|
-
|
|
238
|
-
# Remove component-specific metrics and trends
|
|
239
|
-
keys_to_remove = [k for k in self._health_metrics.keys() if k.startswith(f"{component_id}:")]
|
|
240
|
-
for key in keys_to_remove:
|
|
241
|
-
del self._health_metrics[key]
|
|
242
|
-
self._trend_analysis.pop(key, None)
|
|
243
|
-
|
|
244
|
-
self._log_info(f"Unregistered component: {component_name} ({component_id})")
|
|
245
|
-
|
|
246
|
-
async def perform_health_check(self, component_id: str) -> HealthCheckResult:
|
|
247
|
-
"""Perform health check for specific component."""
|
|
248
|
-
if component_id not in self._components:
|
|
249
|
-
raise ValueError(f"Component {component_id} not registered")
|
|
250
|
-
|
|
251
|
-
start_time = time.time()
|
|
252
|
-
check_id = generate_correlation_id()
|
|
253
|
-
|
|
254
|
-
try:
|
|
255
|
-
# Get health check configuration
|
|
256
|
-
health_check = self._health_checks[component_id]
|
|
257
|
-
component = self._components[component_id]
|
|
258
|
-
|
|
259
|
-
# Perform custom check if available
|
|
260
|
-
if component_id in self._custom_checks:
|
|
261
|
-
result = await self._execute_custom_check(component_id, check_id)
|
|
262
|
-
else:
|
|
263
|
-
result = await self._execute_standard_check(component_id, check_id, health_check)
|
|
264
|
-
|
|
265
|
-
# Update component health
|
|
266
|
-
self._update_component_health(component_id, result)
|
|
267
|
-
|
|
268
|
-
# Store check result
|
|
269
|
-
with self._lock:
|
|
270
|
-
self._check_results[component_id].append(result)
|
|
271
|
-
self._total_checks += 1
|
|
272
|
-
if not result.success:
|
|
273
|
-
self._failed_checks += 1
|
|
274
|
-
|
|
275
|
-
# Check for alerts
|
|
276
|
-
await self._check_for_alerts(component_id, result)
|
|
277
|
-
|
|
278
|
-
return result
|
|
279
|
-
|
|
280
|
-
except Exception as e:
|
|
281
|
-
# Create failed result
|
|
282
|
-
result = HealthCheckResult(
|
|
283
|
-
check_id=check_id,
|
|
284
|
-
component_id=component_id,
|
|
285
|
-
check_type=self._health_checks[component_id].check_type,
|
|
286
|
-
status=ComponentStatus.UNHEALTHY,
|
|
287
|
-
success=False,
|
|
288
|
-
response_time_ms=(time.time() - start_time) * 1000,
|
|
289
|
-
error_message=str(e),
|
|
290
|
-
completed_at=datetime.now(timezone.utc),
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
# Update component health
|
|
294
|
-
self._update_component_health(component_id, result)
|
|
295
|
-
|
|
296
|
-
with self._lock:
|
|
297
|
-
self._check_results[component_id].append(result)
|
|
298
|
-
self._total_checks += 1
|
|
299
|
-
self._failed_checks += 1
|
|
300
|
-
|
|
301
|
-
logger.error(f"Health check failed for {component_id}: {e}")
|
|
302
|
-
return result
|
|
303
|
-
|
|
304
|
-
async def _execute_custom_check(self, component_id: str, check_id: str) -> HealthCheckResult:
|
|
305
|
-
"""Execute custom health check."""
|
|
306
|
-
custom_check = self._custom_checks[component_id]
|
|
307
|
-
|
|
308
|
-
try:
|
|
309
|
-
if asyncio.iscoroutinefunction(custom_check):
|
|
310
|
-
result = await custom_check()
|
|
311
|
-
else:
|
|
312
|
-
result = custom_check()
|
|
313
|
-
|
|
314
|
-
# Ensure result has correct check_id
|
|
315
|
-
result.check_id = check_id
|
|
316
|
-
return result
|
|
317
|
-
|
|
318
|
-
except Exception as e:
|
|
319
|
-
return HealthCheckResult(
|
|
320
|
-
check_id=check_id,
|
|
321
|
-
component_id=component_id,
|
|
322
|
-
check_type=HealthCheckType.CUSTOM,
|
|
323
|
-
status=ComponentStatus.UNHEALTHY,
|
|
324
|
-
success=False,
|
|
325
|
-
response_time_ms=0.0,
|
|
326
|
-
error_message=f"Custom check failed: {str(e)}",
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
async def _execute_standard_check(
|
|
330
|
-
self, component_id: str, check_id: str, health_check: HealthCheckConfig
|
|
331
|
-
) -> HealthCheckResult:
|
|
332
|
-
"""Execute standard health check based on type."""
|
|
333
|
-
start_time = time.time()
|
|
334
|
-
|
|
335
|
-
try:
|
|
336
|
-
if health_check.check_type == HealthCheckType.CONNECTION:
|
|
337
|
-
success = await self._check_connection_health(component_id)
|
|
338
|
-
elif health_check.check_type == HealthCheckType.API:
|
|
339
|
-
success = await self._check_api_health(component_id, health_check)
|
|
340
|
-
elif health_check.check_type == HealthCheckType.MEMORY:
|
|
341
|
-
success = await self._check_memory_health()
|
|
342
|
-
elif health_check.check_type == HealthCheckType.CPU:
|
|
343
|
-
success = await self._check_cpu_health()
|
|
344
|
-
else:
|
|
345
|
-
success = True # Default to healthy for unknown types
|
|
346
|
-
|
|
347
|
-
response_time = (time.time() - start_time) * 1000
|
|
348
|
-
|
|
349
|
-
return HealthCheckResult(
|
|
350
|
-
check_id=check_id,
|
|
351
|
-
component_id=component_id,
|
|
352
|
-
check_type=health_check.check_type,
|
|
353
|
-
status=ComponentStatus.HEALTHY if success else ComponentStatus.UNHEALTHY,
|
|
354
|
-
success=success,
|
|
355
|
-
response_time_ms=response_time,
|
|
356
|
-
message="Health check completed",
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
except Exception as e:
|
|
360
|
-
return HealthCheckResult(
|
|
361
|
-
check_id=check_id,
|
|
362
|
-
component_id=component_id,
|
|
363
|
-
check_type=health_check.check_type,
|
|
364
|
-
status=ComponentStatus.UNHEALTHY,
|
|
365
|
-
success=False,
|
|
366
|
-
response_time_ms=(time.time() - start_time) * 1000,
|
|
367
|
-
error_message=str(e),
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
async def _check_connection_health(self, component_id: str) -> bool:
|
|
371
|
-
"""Check connection health for component."""
|
|
372
|
-
# This would integrate with actual connection managers
|
|
373
|
-
# For now, return True as placeholder
|
|
374
|
-
return True
|
|
375
|
-
|
|
376
|
-
async def _check_api_health(self, component_id: str, health_check: HealthCheckConfig) -> bool:
|
|
377
|
-
"""Check API health for component."""
|
|
378
|
-
# This would make actual API calls to check health endpoints
|
|
379
|
-
# For now, return True as placeholder
|
|
380
|
-
return True
|
|
381
|
-
|
|
382
|
-
async def _check_memory_health(self) -> bool:
|
|
383
|
-
"""Check system memory health."""
|
|
384
|
-
try:
|
|
385
|
-
memory = psutil.virtual_memory()
|
|
386
|
-
return memory.percent < self.health_config.memory_critical_threshold
|
|
387
|
-
except Exception:
|
|
388
|
-
return False
|
|
389
|
-
|
|
390
|
-
async def _check_cpu_health(self) -> bool:
|
|
391
|
-
"""Check system CPU health."""
|
|
392
|
-
try:
|
|
393
|
-
cpu_percent = psutil.cpu_percent(interval=1)
|
|
394
|
-
return cpu_percent < self.health_config.cpu_critical_threshold
|
|
395
|
-
except Exception:
|
|
396
|
-
return False
|
|
397
|
-
|
|
398
|
-
def _update_component_health(self, component_id: str, result: HealthCheckResult) -> None:
|
|
399
|
-
"""Update component health based on check result."""
|
|
400
|
-
with self._lock:
|
|
401
|
-
if component_id in self._components:
|
|
402
|
-
component = self._components[component_id]
|
|
403
|
-
old_status = component.status
|
|
404
|
-
|
|
405
|
-
component.status = result.status
|
|
406
|
-
component.last_check_time = result.completed_at
|
|
407
|
-
|
|
408
|
-
if result.response_time_ms:
|
|
409
|
-
component.response_time_ms = result.response_time_ms
|
|
410
|
-
|
|
411
|
-
if result.error_message:
|
|
412
|
-
component.status_message = result.error_message
|
|
413
|
-
else:
|
|
414
|
-
component.status_message = "Component healthy"
|
|
415
|
-
|
|
416
|
-
# Track status changes
|
|
417
|
-
if old_status != result.status:
|
|
418
|
-
self._log_info(
|
|
419
|
-
f"Component {component.component_name} status changed: {old_status.value} → {result.status.value}"
|
|
420
|
-
)
|
|
421
|
-
|
|
422
|
-
async def _check_for_alerts(self, component_id: str, result: HealthCheckResult) -> None:
|
|
423
|
-
"""Check if health check result should trigger alerts."""
|
|
424
|
-
if not self.health_config.enable_alerting:
|
|
425
|
-
return
|
|
426
|
-
|
|
427
|
-
component = self._components[component_id]
|
|
428
|
-
alert_key = f"{component_id}:status"
|
|
429
|
-
|
|
430
|
-
# Check cooldown
|
|
431
|
-
if alert_key in self._alert_cooldowns:
|
|
432
|
-
cooldown_end = self._alert_cooldowns[alert_key]
|
|
433
|
-
if datetime.now(timezone.utc) < cooldown_end:
|
|
434
|
-
return
|
|
435
|
-
|
|
436
|
-
# Determine if alert is needed
|
|
437
|
-
should_alert = False
|
|
438
|
-
severity = AlertSeverity.INFO
|
|
439
|
-
|
|
440
|
-
if result.status == ComponentStatus.UNHEALTHY:
|
|
441
|
-
should_alert = True
|
|
442
|
-
severity = AlertSeverity.CRITICAL
|
|
443
|
-
elif result.status == ComponentStatus.DEGRADED:
|
|
444
|
-
should_alert = True
|
|
445
|
-
severity = AlertSeverity.WARNING
|
|
446
|
-
|
|
447
|
-
# Check response time thresholds
|
|
448
|
-
if result.response_time_ms:
|
|
449
|
-
if result.response_time_ms > self.health_config.response_time_critical_threshold:
|
|
450
|
-
should_alert = True
|
|
451
|
-
severity = AlertSeverity.CRITICAL
|
|
452
|
-
elif result.response_time_ms > self.health_config.response_time_warning_threshold:
|
|
453
|
-
should_alert = True
|
|
454
|
-
severity = AlertSeverity.WARNING
|
|
455
|
-
|
|
456
|
-
if should_alert:
|
|
457
|
-
await self._create_alert(component_id, result, severity)
|
|
458
|
-
|
|
459
|
-
async def _create_alert(
|
|
460
|
-
self, component_id: str, result: HealthCheckResult, severity: AlertSeverity
|
|
461
|
-
) -> None:
|
|
462
|
-
"""Create health alert."""
|
|
463
|
-
component = self._components[component_id]
|
|
464
|
-
|
|
465
|
-
alert = HealthAlert(
|
|
466
|
-
component_id=component_id,
|
|
467
|
-
alert_type=result.check_type,
|
|
468
|
-
severity=severity,
|
|
469
|
-
title=f"Health Alert: {component.component_name}",
|
|
470
|
-
message=f"Component {component.component_name} is {result.status.value}",
|
|
471
|
-
description=result.error_message or result.message,
|
|
472
|
-
current_status=result.status,
|
|
473
|
-
current_value=result.response_time_ms,
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
with self._lock:
|
|
477
|
-
alert_key = f"{component_id}:status"
|
|
478
|
-
self._active_alerts[alert_key] = alert
|
|
479
|
-
self._alert_history.append(alert)
|
|
480
|
-
|
|
481
|
-
# Set cooldown
|
|
482
|
-
cooldown_end = datetime.now(timezone.utc) + timedelta(
|
|
483
|
-
seconds=self.health_config.alert_cooldown_seconds
|
|
484
|
-
)
|
|
485
|
-
self._alert_cooldowns[alert_key] = cooldown_end
|
|
486
|
-
|
|
487
|
-
self._log_alert(alert)
|
|
488
|
-
|
|
489
|
-
async def _monitoring_loop(self) -> None:
|
|
490
|
-
"""Main health monitoring loop."""
|
|
491
|
-
while not self._shutdown:
|
|
492
|
-
try:
|
|
493
|
-
# Perform health checks for all registered components
|
|
494
|
-
for component_id in list(self._components.keys()):
|
|
495
|
-
if self._shutdown:
|
|
496
|
-
break
|
|
497
|
-
|
|
498
|
-
try:
|
|
499
|
-
await self.perform_health_check(component_id)
|
|
500
|
-
except Exception as e:
|
|
501
|
-
logger.error(f"Error in health check for {component_id}: {e}")
|
|
502
|
-
|
|
503
|
-
# Wait for next cycle
|
|
504
|
-
await asyncio.sleep(self.health_config.default_check_interval_seconds)
|
|
505
|
-
|
|
506
|
-
except asyncio.CancelledError:
|
|
507
|
-
break
|
|
508
|
-
except Exception as e:
|
|
509
|
-
logger.error(f"Error in health monitoring loop: {e}")
|
|
510
|
-
await asyncio.sleep(5) # Short delay before retrying
|
|
511
|
-
|
|
512
|
-
async def _trend_analysis_loop(self) -> None:
|
|
513
|
-
"""Background trend analysis loop."""
|
|
514
|
-
while not self._shutdown:
|
|
515
|
-
try:
|
|
516
|
-
await asyncio.sleep(3600) # Run every hour
|
|
517
|
-
await self._perform_trend_analysis()
|
|
518
|
-
except asyncio.CancelledError:
|
|
519
|
-
break
|
|
520
|
-
except Exception as e:
|
|
521
|
-
logger.error(f"Error in trend analysis: {e}")
|
|
522
|
-
|
|
523
|
-
async def _system_metrics_loop(self) -> None:
|
|
524
|
-
"""Background system metrics collection loop."""
|
|
525
|
-
while not self._shutdown:
|
|
526
|
-
try:
|
|
527
|
-
await self._collect_system_metrics()
|
|
528
|
-
await asyncio.sleep(60) # Collect every minute
|
|
529
|
-
except asyncio.CancelledError:
|
|
530
|
-
break
|
|
531
|
-
except Exception as e:
|
|
532
|
-
logger.error(f"Error collecting system metrics: {e}")
|
|
533
|
-
|
|
534
|
-
async def _collect_system_metrics(self) -> None:
|
|
535
|
-
"""Collect system resource metrics."""
|
|
536
|
-
try:
|
|
537
|
-
metrics = {
|
|
538
|
-
'cpu_percent': psutil.cpu_percent(interval=1),
|
|
539
|
-
'memory_percent': psutil.virtual_memory().percent,
|
|
540
|
-
'disk_percent': psutil.disk_usage('/').percent,
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
with self._lock:
|
|
544
|
-
self._system_metrics = metrics
|
|
545
|
-
self._system_history.append(metrics.copy())
|
|
546
|
-
|
|
547
|
-
# Store in health metrics for trend analysis
|
|
548
|
-
for metric_name, value in metrics.items():
|
|
549
|
-
self._health_metrics[f"system:{metric_name}"].append(value)
|
|
550
|
-
|
|
551
|
-
except Exception as e:
|
|
552
|
-
logger.error(f"Error collecting system metrics: {e}")
|
|
553
|
-
|
|
554
|
-
async def _perform_trend_analysis(self) -> None:
|
|
555
|
-
"""Perform trend analysis on health metrics."""
|
|
556
|
-
with self._lock:
|
|
557
|
-
for metric_key, values in self._health_metrics.items():
|
|
558
|
-
if len(values) >= self.health_config.min_data_points_for_trend:
|
|
559
|
-
try:
|
|
560
|
-
trend = self._analyze_metric_trend(metric_key, values)
|
|
561
|
-
self._trend_analysis[metric_key] = trend
|
|
562
|
-
except Exception as e:
|
|
563
|
-
logger.error(f"Error analyzing trend for {metric_key}: {e}")
|
|
564
|
-
|
|
565
|
-
def _analyze_metric_trend(self, metric_key: str, values: deque[float]) -> HealthTrend:
|
|
566
|
-
"""Analyze trend for a specific metric."""
|
|
567
|
-
values_list = list(values)
|
|
568
|
-
|
|
569
|
-
# Basic statistical analysis
|
|
570
|
-
current_value = values_list[-1]
|
|
571
|
-
average_value = statistics.mean(values_list)
|
|
572
|
-
min_value = min(values_list)
|
|
573
|
-
max_value = max(values_list)
|
|
574
|
-
std_dev = statistics.stdev(values_list) if len(values_list) > 1 else 0.0
|
|
575
|
-
|
|
576
|
-
# Simple trend analysis (could be enhanced with more sophisticated algorithms)
|
|
577
|
-
recent_values = values_list[-10:] # Last 10 values
|
|
578
|
-
older_values = values_list[-20:-10] if len(values_list) >= 20 else values_list[:-10]
|
|
579
|
-
|
|
580
|
-
if older_values:
|
|
581
|
-
recent_avg = statistics.mean(recent_values)
|
|
582
|
-
older_avg = statistics.mean(older_values)
|
|
583
|
-
change_percentage = ((recent_avg - older_avg) / older_avg) * 100 if older_avg != 0 else 0.0
|
|
584
|
-
|
|
585
|
-
if abs(change_percentage) < 5:
|
|
586
|
-
trend_direction = "stable"
|
|
587
|
-
trend_strength = 0.1
|
|
588
|
-
elif change_percentage > 0:
|
|
589
|
-
trend_direction = "up"
|
|
590
|
-
trend_strength = min(1.0, abs(change_percentage) / 50.0)
|
|
591
|
-
else:
|
|
592
|
-
trend_direction = "down"
|
|
593
|
-
trend_strength = min(1.0, abs(change_percentage) / 50.0)
|
|
594
|
-
else:
|
|
595
|
-
change_percentage = 0.0
|
|
596
|
-
trend_direction = "stable"
|
|
597
|
-
trend_strength = 0.0
|
|
598
|
-
|
|
599
|
-
return HealthTrend(
|
|
600
|
-
component_id=metric_key.split(':')[0],
|
|
601
|
-
metric_name=metric_key.split(':', 1)[1] if ':' in metric_key else metric_key,
|
|
602
|
-
trend_direction=trend_direction,
|
|
603
|
-
trend_strength=trend_strength,
|
|
604
|
-
change_percentage=change_percentage,
|
|
605
|
-
analysis_period_hours=self.health_config.trend_analysis_hours,
|
|
606
|
-
data_points=len(values_list),
|
|
607
|
-
current_value=current_value,
|
|
608
|
-
average_value=average_value,
|
|
609
|
-
min_value=min_value,
|
|
610
|
-
max_value=max_value,
|
|
611
|
-
standard_deviation=std_dev,
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
def get_system_health_summary(self) -> SystemHealthSummary:
|
|
615
|
-
"""Get overall system health summary."""
|
|
616
|
-
with self._lock:
|
|
617
|
-
healthy = sum(1 for c in self._components.values() if c.status == ComponentStatus.HEALTHY)
|
|
618
|
-
degraded = sum(1 for c in self._components.values() if c.status == ComponentStatus.DEGRADED)
|
|
619
|
-
unhealthy = sum(1 for c in self._components.values() if c.status == ComponentStatus.UNHEALTHY)
|
|
620
|
-
total = len(self._components)
|
|
621
|
-
|
|
622
|
-
# Determine overall status
|
|
623
|
-
if unhealthy > 0:
|
|
624
|
-
overall_status = ComponentStatus.UNHEALTHY
|
|
625
|
-
elif degraded > 0:
|
|
626
|
-
overall_status = ComponentStatus.DEGRADED
|
|
627
|
-
elif healthy > 0:
|
|
628
|
-
overall_status = ComponentStatus.HEALTHY
|
|
629
|
-
else:
|
|
630
|
-
overall_status = ComponentStatus.UNKNOWN
|
|
631
|
-
|
|
632
|
-
# Calculate averages
|
|
633
|
-
avg_response_time = 0.0
|
|
634
|
-
if self._components:
|
|
635
|
-
response_times = [
|
|
636
|
-
c.response_time_ms for c in self._components.values()
|
|
637
|
-
if c.response_time_ms is not None
|
|
638
|
-
]
|
|
639
|
-
if response_times:
|
|
640
|
-
avg_response_time = statistics.mean(response_times)
|
|
641
|
-
|
|
642
|
-
# System resource averages
|
|
643
|
-
avg_cpu = self._system_metrics.get('cpu_percent', 0.0)
|
|
644
|
-
avg_memory = self._system_metrics.get('memory_percent', 0.0)
|
|
645
|
-
avg_disk = self._system_metrics.get('disk_percent', 0.0)
|
|
646
|
-
|
|
647
|
-
# Alert counts
|
|
648
|
-
active_alerts = len(self._active_alerts)
|
|
649
|
-
critical_alerts = sum(
|
|
650
|
-
1 for alert in self._active_alerts.values()
|
|
651
|
-
if alert.severity == AlertSeverity.CRITICAL
|
|
652
|
-
)
|
|
653
|
-
warning_alerts = sum(
|
|
654
|
-
1 for alert in self._active_alerts.values()
|
|
655
|
-
if alert.severity == AlertSeverity.WARNING
|
|
656
|
-
)
|
|
657
|
-
|
|
658
|
-
# System uptime
|
|
659
|
-
uptime = (datetime.now(timezone.utc) - self._monitoring_start_time).total_seconds()
|
|
660
|
-
|
|
661
|
-
return SystemHealthSummary(
|
|
662
|
-
overall_status=overall_status,
|
|
663
|
-
healthy_components=healthy,
|
|
664
|
-
degraded_components=degraded,
|
|
665
|
-
unhealthy_components=unhealthy,
|
|
666
|
-
total_components=total,
|
|
667
|
-
system_uptime_seconds=uptime,
|
|
668
|
-
avg_response_time_ms=avg_response_time,
|
|
669
|
-
total_requests=self._total_checks,
|
|
670
|
-
error_rate_percent=(self._failed_checks / self._total_checks * 100) if self._total_checks > 0 else 0.0,
|
|
671
|
-
avg_cpu_usage_percent=avg_cpu,
|
|
672
|
-
avg_memory_usage_percent=avg_memory,
|
|
673
|
-
avg_disk_usage_percent=avg_disk,
|
|
674
|
-
active_alerts=active_alerts,
|
|
675
|
-
critical_alerts=critical_alerts,
|
|
676
|
-
warning_alerts=warning_alerts,
|
|
677
|
-
)
|
|
678
|
-
|
|
679
|
-
def get_component_health(self, component_id: str) -> Optional[ComponentHealth]:
|
|
680
|
-
"""Get health status for specific component."""
|
|
681
|
-
with self._lock:
|
|
682
|
-
return self._components.get(component_id)
|
|
683
|
-
|
|
684
|
-
def get_all_components_health(self) -> List[ComponentHealth]:
|
|
685
|
-
"""Get health status for all components."""
|
|
686
|
-
with self._lock:
|
|
687
|
-
return list(self._components.values())
|
|
688
|
-
|
|
689
|
-
def get_active_alerts(self) -> List[HealthAlert]:
|
|
690
|
-
"""Get all active health alerts."""
|
|
691
|
-
with self._lock:
|
|
692
|
-
return list(self._active_alerts.values())
|
|
693
|
-
|
|
694
|
-
def get_trend_analysis(self, metric_key: Optional[str] = None) -> Union[HealthTrend, Dict[str, HealthTrend]]:
|
|
695
|
-
"""Get trend analysis for specific metric or all metrics."""
|
|
696
|
-
with self._lock:
|
|
697
|
-
if metric_key:
|
|
698
|
-
return self._trend_analysis.get(metric_key)
|
|
699
|
-
else:
|
|
700
|
-
return self._trend_analysis.copy()
|
|
701
|
-
|
|
702
|
-
def _log_alert(self, alert: HealthAlert) -> None:
|
|
703
|
-
"""Log health alert."""
|
|
704
|
-
severity_map = {
|
|
705
|
-
AlertSeverity.INFO: SDKSeverity.INFO,
|
|
706
|
-
AlertSeverity.WARNING: SDKSeverity.WARNING,
|
|
707
|
-
AlertSeverity.ERROR: SDKSeverity.ERROR,
|
|
708
|
-
AlertSeverity.CRITICAL: SDKSeverity.CRITICAL,
|
|
709
|
-
AlertSeverity.FATAL: SDKSeverity.CRITICAL,
|
|
710
|
-
}
|
|
711
|
-
|
|
712
|
-
message = f"Health Alert: {alert.title} - {alert.message}"
|
|
713
|
-
|
|
714
|
-
if self.dev_logger:
|
|
715
|
-
self.dev_logger.log(
|
|
716
|
-
SDKEventType.SYSTEM_HEALTH_DEGRADED,
|
|
717
|
-
severity_map.get(alert.severity, SDKSeverity.INFO),
|
|
718
|
-
message,
|
|
719
|
-
details={
|
|
720
|
-
"alert_id": alert.alert_id,
|
|
721
|
-
"component_id": alert.component_id,
|
|
722
|
-
"severity": alert.severity.value,
|
|
723
|
-
"current_status": alert.current_status.value,
|
|
724
|
-
},
|
|
725
|
-
)
|
|
726
|
-
else:
|
|
727
|
-
logger.warning(message)
|
|
728
|
-
|
|
729
|
-
def _log_info(self, message: str, **kwargs: Any) -> None:
|
|
730
|
-
"""Log info message."""
|
|
731
|
-
if self.dev_logger:
|
|
732
|
-
self.dev_logger.log_info(
|
|
733
|
-
SDKEventType.HEALTH_CHECK_PASSED, message, **kwargs
|
|
734
|
-
)
|
|
735
|
-
else:
|
|
736
|
-
logger.info(message)
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
__all__ = [
|
|
740
|
-
# Main business logic class
|
|
741
|
-
"HealthMonitor",
|
|
742
|
-
# Configuration
|
|
743
|
-
"HealthMonitorConfig",
|
|
744
|
-
|
|
745
|
-
# Note: Health monitoring models are available via DTO imports:
|
|
746
|
-
# from unrealon_sdk.src.dto.health import ...
|
|
747
|
-
]
|