truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
- truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -153,3 +153,52 @@ class TestNotificationEvent(NotificationEvent):
|
|
|
153
153
|
base = super().to_dict()
|
|
154
154
|
base.update({"channel_name": self.channel_name})
|
|
155
155
|
return base
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class SchemaChangedEvent(NotificationEvent):
|
|
160
|
+
"""Event triggered when schema changes are detected.
|
|
161
|
+
|
|
162
|
+
Attributes:
|
|
163
|
+
from_version: Previous schema version number (null if first version).
|
|
164
|
+
to_version: New schema version number.
|
|
165
|
+
total_changes: Total number of changes detected.
|
|
166
|
+
breaking_changes: Number of breaking changes.
|
|
167
|
+
changes: List of change details.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
event_type: str = field(default="schema_changed", init=False)
|
|
171
|
+
from_version: int | None = None
|
|
172
|
+
to_version: int = 0
|
|
173
|
+
total_changes: int = 0
|
|
174
|
+
breaking_changes: int = 0
|
|
175
|
+
changes: list[dict[str, Any]] = field(default_factory=list)
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def severity(self) -> str:
|
|
179
|
+
"""Get the severity level based on changes."""
|
|
180
|
+
if self.breaking_changes > 0:
|
|
181
|
+
return "Critical"
|
|
182
|
+
if self.total_changes >= 5:
|
|
183
|
+
return "High"
|
|
184
|
+
return "Medium"
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def has_breaking_changes(self) -> bool:
|
|
188
|
+
"""Check if there are breaking changes."""
|
|
189
|
+
return self.breaking_changes > 0
|
|
190
|
+
|
|
191
|
+
def to_dict(self) -> dict[str, Any]:
|
|
192
|
+
"""Convert event to dictionary."""
|
|
193
|
+
base = super().to_dict()
|
|
194
|
+
base.update(
|
|
195
|
+
{
|
|
196
|
+
"from_version": self.from_version,
|
|
197
|
+
"to_version": self.to_version,
|
|
198
|
+
"total_changes": self.total_changes,
|
|
199
|
+
"breaking_changes": self.breaking_changes,
|
|
200
|
+
"severity": self.severity,
|
|
201
|
+
"has_breaking_changes": self.has_breaking_changes,
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
return base
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Metrics collection infrastructure for notifications.
|
|
2
|
+
|
|
3
|
+
This module provides metrics collection and aggregation for the
|
|
4
|
+
advanced notification system components.
|
|
5
|
+
|
|
6
|
+
Architecture:
|
|
7
|
+
- Base Infrastructure: Abstract base classes and registry for extensible
|
|
8
|
+
metrics collection (supports future Redis-based implementations).
|
|
9
|
+
- Specialized Collectors: Domain-specific collectors for deduplication,
|
|
10
|
+
throttling, and escalation with pre-defined metrics.
|
|
11
|
+
|
|
12
|
+
Classes:
|
|
13
|
+
Base Infrastructure:
|
|
14
|
+
BaseMetricsCollector: Abstract base for generic metric collectors
|
|
15
|
+
InMemoryMetricsCollector: In-memory implementation with async locks
|
|
16
|
+
MetricsRegistry: Singleton registry for managing collectors
|
|
17
|
+
MetricEvent: Data class for recorded events
|
|
18
|
+
MetricSnapshot: Data class for metric snapshots
|
|
19
|
+
MetricType: Enum for metric types
|
|
20
|
+
|
|
21
|
+
Specialized Collectors:
|
|
22
|
+
DeduplicationMetrics: Track deduplication rates and active fingerprints
|
|
23
|
+
ThrottlingMetrics: Track throttling rates and window counts
|
|
24
|
+
EscalationMetrics: Track incidents by state and resolution times
|
|
25
|
+
MetricsCollector: Aggregated collector for all subsystems
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
# Using the base infrastructure
|
|
29
|
+
from truthound_dashboard.core.notifications.metrics import (
|
|
30
|
+
MetricsRegistry,
|
|
31
|
+
InMemoryMetricsCollector,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
registry = MetricsRegistry.get_instance()
|
|
35
|
+
collector = await registry.get_collector("my_component")
|
|
36
|
+
await collector.record_event("event_happened", {"detail": "value"})
|
|
37
|
+
await collector.increment("counter_name")
|
|
38
|
+
stats = await collector.get_stats()
|
|
39
|
+
|
|
40
|
+
# Using specialized collectors
|
|
41
|
+
from truthound_dashboard.core.notifications.metrics import (
|
|
42
|
+
DeduplicationMetrics,
|
|
43
|
+
ThrottlingMetrics,
|
|
44
|
+
EscalationMetrics,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
dedup_metrics = DeduplicationMetrics()
|
|
48
|
+
await dedup_metrics.record_received()
|
|
49
|
+
stats = await dedup_metrics.get_stats()
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
from .base import (
|
|
53
|
+
InMemoryMetricsCollector,
|
|
54
|
+
MetricEvent,
|
|
55
|
+
MetricSnapshot,
|
|
56
|
+
MetricsCollector as BaseMetricsCollector,
|
|
57
|
+
MetricsRegistry,
|
|
58
|
+
MetricType,
|
|
59
|
+
)
|
|
60
|
+
from .collectors import (
|
|
61
|
+
DeduplicationMetrics,
|
|
62
|
+
DeduplicationStats,
|
|
63
|
+
EscalationMetrics,
|
|
64
|
+
EscalationStats,
|
|
65
|
+
IncidentRecord,
|
|
66
|
+
MetricsCollector,
|
|
67
|
+
NotificationMetrics,
|
|
68
|
+
ThrottlingMetrics,
|
|
69
|
+
ThrottlingStats,
|
|
70
|
+
WindowCount,
|
|
71
|
+
)
|
|
72
|
+
from ..stats_aggregator import (
|
|
73
|
+
StatsAggregator,
|
|
74
|
+
StatsCache,
|
|
75
|
+
TimeRange,
|
|
76
|
+
get_stats_cache,
|
|
77
|
+
reset_stats_cache,
|
|
78
|
+
CacheStrategy,
|
|
79
|
+
DeduplicationStatsResult,
|
|
80
|
+
EscalationStatsResult,
|
|
81
|
+
ThrottlingStatsResult,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
__all__ = [
|
|
85
|
+
# Base infrastructure
|
|
86
|
+
"BaseMetricsCollector",
|
|
87
|
+
"InMemoryMetricsCollector",
|
|
88
|
+
"MetricsRegistry",
|
|
89
|
+
"MetricEvent",
|
|
90
|
+
"MetricSnapshot",
|
|
91
|
+
"MetricType",
|
|
92
|
+
# Specialized collectors
|
|
93
|
+
"DeduplicationMetrics",
|
|
94
|
+
"ThrottlingMetrics",
|
|
95
|
+
"EscalationMetrics",
|
|
96
|
+
"MetricsCollector",
|
|
97
|
+
# Stats dataclasses
|
|
98
|
+
"DeduplicationStats",
|
|
99
|
+
"ThrottlingStats",
|
|
100
|
+
"EscalationStats",
|
|
101
|
+
"NotificationMetrics",
|
|
102
|
+
# Supporting types
|
|
103
|
+
"WindowCount",
|
|
104
|
+
"IncidentRecord",
|
|
105
|
+
# Stats aggregation (efficient DB queries with caching)
|
|
106
|
+
"StatsAggregator",
|
|
107
|
+
"StatsCache",
|
|
108
|
+
"TimeRange",
|
|
109
|
+
"get_stats_cache",
|
|
110
|
+
"reset_stats_cache",
|
|
111
|
+
"CacheStrategy",
|
|
112
|
+
"DeduplicationStatsResult",
|
|
113
|
+
"EscalationStatsResult",
|
|
114
|
+
"ThrottlingStatsResult",
|
|
115
|
+
]
|
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""Metrics collection infrastructure for advanced notifications.
|
|
2
|
+
|
|
3
|
+
This module provides an extensible metrics collection system for tracking
|
|
4
|
+
notification-related events and statistics across different components.
|
|
5
|
+
|
|
6
|
+
Architecture:
|
|
7
|
+
- MetricsCollector: Abstract base for collecting metrics
|
|
8
|
+
- InMemoryMetricsCollector: Thread-safe in-memory implementation
|
|
9
|
+
- MetricsRegistry: Singleton registry for managing collectors
|
|
10
|
+
|
|
11
|
+
The design supports future extension to Redis-based or other distributed
|
|
12
|
+
storage backends while maintaining a consistent interface.
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
# Get or create a collector for a component
|
|
16
|
+
registry = MetricsRegistry.get_instance()
|
|
17
|
+
collector = registry.get_collector("deduplication")
|
|
18
|
+
|
|
19
|
+
# Record events and counters
|
|
20
|
+
await collector.record_event("duplicate_detected", {"fingerprint": "abc123"})
|
|
21
|
+
await collector.increment("duplicates_blocked")
|
|
22
|
+
|
|
23
|
+
# Get aggregated stats
|
|
24
|
+
stats = await collector.get_stats()
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import asyncio
|
|
30
|
+
import time
|
|
31
|
+
from abc import ABC, abstractmethod
|
|
32
|
+
from collections import defaultdict
|
|
33
|
+
from dataclasses import dataclass, field
|
|
34
|
+
from datetime import datetime
|
|
35
|
+
from enum import Enum
|
|
36
|
+
from typing import Any, ClassVar
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MetricType(str, Enum):
|
|
40
|
+
"""Types of metrics that can be collected."""
|
|
41
|
+
|
|
42
|
+
COUNTER = "counter"
|
|
43
|
+
GAUGE = "gauge"
|
|
44
|
+
HISTOGRAM = "histogram"
|
|
45
|
+
EVENT = "event"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class MetricEvent:
|
|
50
|
+
"""A recorded metric event.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
event_type: Type identifier for the event.
|
|
54
|
+
timestamp: When the event occurred.
|
|
55
|
+
metadata: Additional event-specific data.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
event_type: str
|
|
59
|
+
timestamp: datetime = field(default_factory=datetime.utcnow)
|
|
60
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
def to_dict(self) -> dict[str, Any]:
|
|
63
|
+
"""Convert event to dictionary for serialization."""
|
|
64
|
+
return {
|
|
65
|
+
"event_type": self.event_type,
|
|
66
|
+
"timestamp": self.timestamp.isoformat(),
|
|
67
|
+
"metadata": self.metadata,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class MetricSnapshot:
|
|
73
|
+
"""A snapshot of all collected metrics.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
counters: Current counter values.
|
|
77
|
+
events: Recent events (limited by retention).
|
|
78
|
+
event_counts: Total count per event type.
|
|
79
|
+
last_reset: When metrics were last reset.
|
|
80
|
+
collected_at: When this snapshot was taken.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
counters: dict[str, int]
|
|
84
|
+
events: list[MetricEvent]
|
|
85
|
+
event_counts: dict[str, int]
|
|
86
|
+
last_reset: datetime | None
|
|
87
|
+
collected_at: datetime = field(default_factory=datetime.utcnow)
|
|
88
|
+
|
|
89
|
+
def to_dict(self) -> dict[str, Any]:
|
|
90
|
+
"""Convert snapshot to dictionary for serialization."""
|
|
91
|
+
return {
|
|
92
|
+
"counters": self.counters,
|
|
93
|
+
"events": [e.to_dict() for e in self.events],
|
|
94
|
+
"event_counts": self.event_counts,
|
|
95
|
+
"last_reset": self.last_reset.isoformat() if self.last_reset else None,
|
|
96
|
+
"collected_at": self.collected_at.isoformat(),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class MetricsCollector(ABC):
|
|
101
|
+
"""Abstract base class for metrics collection.
|
|
102
|
+
|
|
103
|
+
Each collector manages metrics for a specific component (e.g.,
|
|
104
|
+
deduplication, throttling, escalation) and provides methods for
|
|
105
|
+
recording events, incrementing counters, and retrieving statistics.
|
|
106
|
+
|
|
107
|
+
Implementations must be thread-safe for use in async contexts.
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
class RedisMetricsCollector(MetricsCollector):
|
|
111
|
+
async def record_event(self, event_type: str, metadata: dict) -> None:
|
|
112
|
+
# Store in Redis stream
|
|
113
|
+
await self.redis.xadd(f"metrics:{self.component}", {...})
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(self, component: str, max_events: int = 1000) -> None:
|
|
117
|
+
"""Initialize the collector.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
component: Name of the component being monitored.
|
|
121
|
+
max_events: Maximum number of events to retain.
|
|
122
|
+
"""
|
|
123
|
+
self.component = component
|
|
124
|
+
self.max_events = max_events
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
async def record_event(
|
|
128
|
+
self,
|
|
129
|
+
event_type: str,
|
|
130
|
+
metadata: dict[str, Any] | None = None,
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Record a metric event.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
event_type: Type identifier for the event.
|
|
136
|
+
metadata: Optional event-specific data.
|
|
137
|
+
"""
|
|
138
|
+
...
|
|
139
|
+
|
|
140
|
+
@abstractmethod
|
|
141
|
+
async def increment(self, metric: str, value: int = 1) -> None:
|
|
142
|
+
"""Increment a counter metric.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
metric: Name of the counter.
|
|
146
|
+
value: Amount to increment by (default 1).
|
|
147
|
+
"""
|
|
148
|
+
...
|
|
149
|
+
|
|
150
|
+
@abstractmethod
|
|
151
|
+
async def decrement(self, metric: str, value: int = 1) -> None:
|
|
152
|
+
"""Decrement a counter metric.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
metric: Name of the counter.
|
|
156
|
+
value: Amount to decrement by (default 1).
|
|
157
|
+
"""
|
|
158
|
+
...
|
|
159
|
+
|
|
160
|
+
@abstractmethod
|
|
161
|
+
async def set_gauge(self, metric: str, value: float) -> None:
|
|
162
|
+
"""Set a gauge metric to a specific value.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
metric: Name of the gauge.
|
|
166
|
+
value: Value to set.
|
|
167
|
+
"""
|
|
168
|
+
...
|
|
169
|
+
|
|
170
|
+
@abstractmethod
|
|
171
|
+
async def get_counter(self, metric: str) -> int:
|
|
172
|
+
"""Get current counter value.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
metric: Name of the counter.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Current counter value (0 if not set).
|
|
179
|
+
"""
|
|
180
|
+
...
|
|
181
|
+
|
|
182
|
+
@abstractmethod
|
|
183
|
+
async def get_stats(self) -> MetricSnapshot:
|
|
184
|
+
"""Get aggregated statistics.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
MetricSnapshot with current metrics state.
|
|
188
|
+
"""
|
|
189
|
+
...
|
|
190
|
+
|
|
191
|
+
@abstractmethod
|
|
192
|
+
async def reset(self) -> None:
|
|
193
|
+
"""Reset all metrics to initial state."""
|
|
194
|
+
...
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class InMemoryMetricsCollector(MetricsCollector):
|
|
198
|
+
"""In-memory metrics collector with async-safe operations.
|
|
199
|
+
|
|
200
|
+
Provides thread-safe metrics collection suitable for
|
|
201
|
+
single-process deployments and development/testing.
|
|
202
|
+
|
|
203
|
+
All operations use an async lock to ensure consistency
|
|
204
|
+
in concurrent contexts.
|
|
205
|
+
|
|
206
|
+
Note: Data is lost on process restart.
|
|
207
|
+
|
|
208
|
+
Attributes:
|
|
209
|
+
component: Name of the component being monitored.
|
|
210
|
+
max_events: Maximum number of events to retain.
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
def __init__(self, component: str, max_events: int = 1000) -> None:
|
|
214
|
+
"""Initialize in-memory collector.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
component: Name of the component being monitored.
|
|
218
|
+
max_events: Maximum number of events to retain.
|
|
219
|
+
"""
|
|
220
|
+
super().__init__(component, max_events)
|
|
221
|
+
self._counters: dict[str, int] = defaultdict(int)
|
|
222
|
+
self._gauges: dict[str, float] = {}
|
|
223
|
+
self._events: list[MetricEvent] = []
|
|
224
|
+
self._event_counts: dict[str, int] = defaultdict(int)
|
|
225
|
+
self._last_reset: datetime | None = None
|
|
226
|
+
self._lock = asyncio.Lock()
|
|
227
|
+
|
|
228
|
+
async def record_event(
|
|
229
|
+
self,
|
|
230
|
+
event_type: str,
|
|
231
|
+
metadata: dict[str, Any] | None = None,
|
|
232
|
+
) -> None:
|
|
233
|
+
"""Record a metric event.
|
|
234
|
+
|
|
235
|
+
Events are stored in a circular buffer limited by max_events.
|
|
236
|
+
Older events are discarded when the limit is reached.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
event_type: Type identifier for the event.
|
|
240
|
+
metadata: Optional event-specific data.
|
|
241
|
+
"""
|
|
242
|
+
event = MetricEvent(
|
|
243
|
+
event_type=event_type,
|
|
244
|
+
timestamp=datetime.utcnow(),
|
|
245
|
+
metadata=metadata or {},
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
async with self._lock:
|
|
249
|
+
self._events.append(event)
|
|
250
|
+
self._event_counts[event_type] += 1
|
|
251
|
+
|
|
252
|
+
# Trim events if exceeding limit
|
|
253
|
+
if len(self._events) > self.max_events:
|
|
254
|
+
self._events = self._events[-self.max_events :]
|
|
255
|
+
|
|
256
|
+
async def increment(self, metric: str, value: int = 1) -> None:
|
|
257
|
+
"""Increment a counter metric.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
metric: Name of the counter.
|
|
261
|
+
value: Amount to increment by (default 1).
|
|
262
|
+
"""
|
|
263
|
+
async with self._lock:
|
|
264
|
+
self._counters[metric] += value
|
|
265
|
+
|
|
266
|
+
async def decrement(self, metric: str, value: int = 1) -> None:
|
|
267
|
+
"""Decrement a counter metric.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
metric: Name of the counter.
|
|
271
|
+
value: Amount to decrement by (default 1).
|
|
272
|
+
"""
|
|
273
|
+
async with self._lock:
|
|
274
|
+
self._counters[metric] -= value
|
|
275
|
+
|
|
276
|
+
async def set_gauge(self, metric: str, value: float) -> None:
|
|
277
|
+
"""Set a gauge metric to a specific value.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
metric: Name of the gauge.
|
|
281
|
+
value: Value to set.
|
|
282
|
+
"""
|
|
283
|
+
async with self._lock:
|
|
284
|
+
self._gauges[metric] = value
|
|
285
|
+
|
|
286
|
+
async def get_counter(self, metric: str) -> int:
|
|
287
|
+
"""Get current counter value.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
metric: Name of the counter.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Current counter value (0 if not set).
|
|
294
|
+
"""
|
|
295
|
+
async with self._lock:
|
|
296
|
+
return self._counters.get(metric, 0)
|
|
297
|
+
|
|
298
|
+
async def get_gauge(self, metric: str) -> float | None:
|
|
299
|
+
"""Get current gauge value.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
metric: Name of the gauge.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Current gauge value or None if not set.
|
|
306
|
+
"""
|
|
307
|
+
async with self._lock:
|
|
308
|
+
return self._gauges.get(metric)
|
|
309
|
+
|
|
310
|
+
async def get_events(
|
|
311
|
+
self,
|
|
312
|
+
event_type: str | None = None,
|
|
313
|
+
limit: int = 100,
|
|
314
|
+
) -> list[MetricEvent]:
|
|
315
|
+
"""Get recent events, optionally filtered by type.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
event_type: Optional filter for specific event type.
|
|
319
|
+
limit: Maximum events to return.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
List of matching events (newest first).
|
|
323
|
+
"""
|
|
324
|
+
async with self._lock:
|
|
325
|
+
events = self._events
|
|
326
|
+
if event_type:
|
|
327
|
+
events = [e for e in events if e.event_type == event_type]
|
|
328
|
+
return list(reversed(events[-limit:]))
|
|
329
|
+
|
|
330
|
+
async def get_stats(self) -> MetricSnapshot:
|
|
331
|
+
"""Get aggregated statistics.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
MetricSnapshot with current metrics state.
|
|
335
|
+
"""
|
|
336
|
+
async with self._lock:
|
|
337
|
+
return MetricSnapshot(
|
|
338
|
+
counters=dict(self._counters),
|
|
339
|
+
events=list(self._events[-100:]), # Last 100 events in snapshot
|
|
340
|
+
event_counts=dict(self._event_counts),
|
|
341
|
+
last_reset=self._last_reset,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
async def reset(self) -> None:
|
|
345
|
+
"""Reset all metrics to initial state."""
|
|
346
|
+
async with self._lock:
|
|
347
|
+
self._counters.clear()
|
|
348
|
+
self._gauges.clear()
|
|
349
|
+
self._events.clear()
|
|
350
|
+
self._event_counts.clear()
|
|
351
|
+
self._last_reset = datetime.utcnow()
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class MetricsRegistry:
|
|
355
|
+
"""Singleton registry for managing metrics collectors.
|
|
356
|
+
|
|
357
|
+
Provides centralized access to collectors for different components.
|
|
358
|
+
Uses lazy initialization to create collectors on first access.
|
|
359
|
+
|
|
360
|
+
The registry pattern ensures all parts of the application share
|
|
361
|
+
the same collector instances, enabling consistent metrics aggregation.
|
|
362
|
+
|
|
363
|
+
Usage:
|
|
364
|
+
# Get the singleton instance
|
|
365
|
+
registry = MetricsRegistry.get_instance()
|
|
366
|
+
|
|
367
|
+
# Get or create a collector
|
|
368
|
+
dedup_metrics = registry.get_collector("deduplication")
|
|
369
|
+
throttle_metrics = registry.get_collector("throttling")
|
|
370
|
+
|
|
371
|
+
# Get all collectors
|
|
372
|
+
all_collectors = registry.list_collectors()
|
|
373
|
+
|
|
374
|
+
# Get combined stats
|
|
375
|
+
combined = await registry.get_all_stats()
|
|
376
|
+
|
|
377
|
+
Thread Safety:
|
|
378
|
+
The registry uses an async lock for collector creation to prevent
|
|
379
|
+
race conditions when multiple coroutines request the same collector.
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
_instance: ClassVar[MetricsRegistry | None] = None
|
|
383
|
+
_instance_lock: ClassVar[asyncio.Lock | None] = None
|
|
384
|
+
|
|
385
|
+
# Component names for standard collectors
|
|
386
|
+
DEDUPLICATION = "deduplication"
|
|
387
|
+
THROTTLING = "throttling"
|
|
388
|
+
ESCALATION = "escalation"
|
|
389
|
+
ROUTING = "routing"
|
|
390
|
+
DISPATCHER = "dispatcher"
|
|
391
|
+
|
|
392
|
+
def __init__(self) -> None:
|
|
393
|
+
"""Initialize the registry.
|
|
394
|
+
|
|
395
|
+
Note: Use get_instance() instead of direct instantiation.
|
|
396
|
+
"""
|
|
397
|
+
self._collectors: dict[str, MetricsCollector] = {}
|
|
398
|
+
self._collector_factory = InMemoryMetricsCollector
|
|
399
|
+
self._lock = asyncio.Lock()
|
|
400
|
+
|
|
401
|
+
@classmethod
|
|
402
|
+
def get_instance(cls) -> MetricsRegistry:
|
|
403
|
+
"""Get the singleton registry instance.
|
|
404
|
+
|
|
405
|
+
Creates a new instance if none exists.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
The singleton MetricsRegistry instance.
|
|
409
|
+
"""
|
|
410
|
+
if cls._instance is None:
|
|
411
|
+
cls._instance = cls()
|
|
412
|
+
return cls._instance
|
|
413
|
+
|
|
414
|
+
@classmethod
|
|
415
|
+
def reset_instance(cls) -> None:
|
|
416
|
+
"""Reset the singleton instance.
|
|
417
|
+
|
|
418
|
+
Primarily useful for testing to ensure a clean state.
|
|
419
|
+
"""
|
|
420
|
+
cls._instance = None
|
|
421
|
+
|
|
422
|
+
def set_collector_factory(
|
|
423
|
+
self,
|
|
424
|
+
factory: type[MetricsCollector],
|
|
425
|
+
) -> None:
|
|
426
|
+
"""Set the factory for creating new collectors.
|
|
427
|
+
|
|
428
|
+
Allows switching to a different collector implementation
|
|
429
|
+
(e.g., Redis-based) without changing consumer code.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
factory: Class to use for creating collectors.
|
|
433
|
+
|
|
434
|
+
Example:
|
|
435
|
+
# Switch to Redis-based collectors
|
|
436
|
+
registry.set_collector_factory(RedisMetricsCollector)
|
|
437
|
+
"""
|
|
438
|
+
self._collector_factory = factory
|
|
439
|
+
|
|
440
|
+
async def get_collector(
|
|
441
|
+
self,
|
|
442
|
+
component: str,
|
|
443
|
+
max_events: int = 1000,
|
|
444
|
+
) -> MetricsCollector:
|
|
445
|
+
"""Get or create a collector for a component.
|
|
446
|
+
|
|
447
|
+
If a collector for the component doesn't exist, creates one
|
|
448
|
+
using the configured factory.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
component: Name of the component.
|
|
452
|
+
max_events: Maximum events for new collectors.
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
MetricsCollector for the component.
|
|
456
|
+
"""
|
|
457
|
+
async with self._lock:
|
|
458
|
+
if component not in self._collectors:
|
|
459
|
+
self._collectors[component] = self._collector_factory(
|
|
460
|
+
component=component,
|
|
461
|
+
max_events=max_events,
|
|
462
|
+
)
|
|
463
|
+
return self._collectors[component]
|
|
464
|
+
|
|
465
|
+
def get_collector_sync(
|
|
466
|
+
self,
|
|
467
|
+
component: str,
|
|
468
|
+
max_events: int = 1000,
|
|
469
|
+
) -> MetricsCollector:
|
|
470
|
+
"""Synchronous version of get_collector.
|
|
471
|
+
|
|
472
|
+
Creates collectors without async lock. Use with caution in
|
|
473
|
+
concurrent contexts - prefer get_collector() when possible.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
component: Name of the component.
|
|
477
|
+
max_events: Maximum events for new collectors.
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
MetricsCollector for the component.
|
|
481
|
+
"""
|
|
482
|
+
if component not in self._collectors:
|
|
483
|
+
self._collectors[component] = self._collector_factory(
|
|
484
|
+
component=component,
|
|
485
|
+
max_events=max_events,
|
|
486
|
+
)
|
|
487
|
+
return self._collectors[component]
|
|
488
|
+
|
|
489
|
+
def list_collectors(self) -> list[str]:
|
|
490
|
+
"""List all registered component names.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
List of component names with collectors.
|
|
494
|
+
"""
|
|
495
|
+
return list(self._collectors.keys())
|
|
496
|
+
|
|
497
|
+
async def get_all_stats(self) -> dict[str, MetricSnapshot]:
|
|
498
|
+
"""Get stats from all registered collectors.
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
Dictionary mapping component names to their snapshots.
|
|
502
|
+
"""
|
|
503
|
+
results = {}
|
|
504
|
+
for component, collector in self._collectors.items():
|
|
505
|
+
results[component] = await collector.get_stats()
|
|
506
|
+
return results
|
|
507
|
+
|
|
508
|
+
async def reset_all(self) -> None:
|
|
509
|
+
"""Reset all collectors.
|
|
510
|
+
|
|
511
|
+
Clears metrics for all registered components.
|
|
512
|
+
"""
|
|
513
|
+
for collector in self._collectors.values():
|
|
514
|
+
await collector.reset()
|
|
515
|
+
|
|
516
|
+
def remove_collector(self, component: str) -> bool:
|
|
517
|
+
"""Remove a collector from the registry.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
component: Name of the component.
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
True if removed, False if not found.
|
|
524
|
+
"""
|
|
525
|
+
if component in self._collectors:
|
|
526
|
+
del self._collectors[component]
|
|
527
|
+
return True
|
|
528
|
+
return False
|