truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
- truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,2365 @@
|
|
|
1
|
+
"""Storage backends for deduplication state.
|
|
2
|
+
|
|
3
|
+
This module provides storage backends for tracking sent notifications
|
|
4
|
+
and detecting duplicates.
|
|
5
|
+
|
|
6
|
+
Storage Backends:
|
|
7
|
+
- InMemoryDeduplicationStore: Simple in-memory storage (development)
|
|
8
|
+
- SQLiteDeduplicationStore: Persistent SQLite storage (production)
|
|
9
|
+
- RedisDeduplicationStore: Redis-based storage (distributed deployments)
|
|
10
|
+
|
|
11
|
+
Each store tracks fingerprints with timestamps and supports
|
|
12
|
+
automatic cleanup of expired entries.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import sqlite3
|
|
19
|
+
import threading
|
|
20
|
+
import time
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from datetime import datetime, timedelta
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import TYPE_CHECKING, Any
|
|
26
|
+
|
|
27
|
+
# Optional Redis dependency
|
|
28
|
+
try:
|
|
29
|
+
import redis
|
|
30
|
+
import redis.asyncio
|
|
31
|
+
|
|
32
|
+
REDIS_AVAILABLE = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
REDIS_AVAILABLE = False
|
|
35
|
+
redis = None # type: ignore[assignment]
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
import redis as redis_sync
|
|
39
|
+
import redis.asyncio as redis_async
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class DeduplicationEntry:
|
|
44
|
+
"""A stored deduplication entry.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
fingerprint: Unique fingerprint identifying the notification.
|
|
48
|
+
first_seen: When this fingerprint was first seen.
|
|
49
|
+
last_seen: When this fingerprint was last seen.
|
|
50
|
+
count: Number of times this fingerprint was seen.
|
|
51
|
+
metadata: Additional entry metadata.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
fingerprint: str
|
|
55
|
+
first_seen: datetime
|
|
56
|
+
last_seen: datetime
|
|
57
|
+
count: int = 1
|
|
58
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
59
|
+
|
|
60
|
+
def is_expired(self, window_seconds: int) -> bool:
|
|
61
|
+
"""Check if entry has expired based on window."""
|
|
62
|
+
expiry = self.last_seen + timedelta(seconds=window_seconds)
|
|
63
|
+
return datetime.utcnow() > expiry
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class BaseDeduplicationStore(ABC):
|
|
67
|
+
"""Abstract base class for deduplication storage.
|
|
68
|
+
|
|
69
|
+
All stores must implement methods for checking, recording,
|
|
70
|
+
and cleaning up deduplication entries.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def exists(self, fingerprint: str, window_seconds: int) -> bool:
|
|
75
|
+
"""Check if fingerprint exists within window.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
fingerprint: The fingerprint to check.
|
|
79
|
+
window_seconds: Time window in seconds.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if fingerprint exists and is not expired.
|
|
83
|
+
"""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def record(self, fingerprint: str, metadata: dict[str, Any] | None = None) -> None:
|
|
88
|
+
"""Record a fingerprint as sent.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
fingerprint: The fingerprint to record.
|
|
92
|
+
metadata: Optional metadata to store.
|
|
93
|
+
"""
|
|
94
|
+
...
|
|
95
|
+
|
|
96
|
+
@abstractmethod
|
|
97
|
+
def get(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
98
|
+
"""Get entry by fingerprint.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
fingerprint: The fingerprint to look up.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Entry if found, None otherwise.
|
|
105
|
+
"""
|
|
106
|
+
...
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def cleanup(self, max_age_seconds: int) -> int:
|
|
110
|
+
"""Remove expired entries.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
max_age_seconds: Maximum age of entries to keep.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Number of entries removed.
|
|
117
|
+
"""
|
|
118
|
+
...
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def clear(self) -> None:
|
|
122
|
+
"""Clear all entries."""
|
|
123
|
+
...
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def count(self) -> int:
|
|
127
|
+
"""Get total entry count."""
|
|
128
|
+
...
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class InMemoryDeduplicationStore(BaseDeduplicationStore):
|
|
132
|
+
"""In-memory deduplication storage.
|
|
133
|
+
|
|
134
|
+
Simple thread-safe in-memory storage suitable for
|
|
135
|
+
development and single-process deployments.
|
|
136
|
+
|
|
137
|
+
Note: Data is lost on process restart.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(self) -> None:
|
|
141
|
+
"""Initialize in-memory store."""
|
|
142
|
+
self._entries: dict[str, DeduplicationEntry] = {}
|
|
143
|
+
self._lock = threading.RLock()
|
|
144
|
+
|
|
145
|
+
def exists(self, fingerprint: str, window_seconds: int) -> bool:
|
|
146
|
+
"""Check if fingerprint exists within window."""
|
|
147
|
+
with self._lock:
|
|
148
|
+
entry = self._entries.get(fingerprint)
|
|
149
|
+
if entry is None:
|
|
150
|
+
return False
|
|
151
|
+
return not entry.is_expired(window_seconds)
|
|
152
|
+
|
|
153
|
+
def record(self, fingerprint: str, metadata: dict[str, Any] | None = None) -> None:
|
|
154
|
+
"""Record a fingerprint."""
|
|
155
|
+
now = datetime.utcnow()
|
|
156
|
+
with self._lock:
|
|
157
|
+
if fingerprint in self._entries:
|
|
158
|
+
entry = self._entries[fingerprint]
|
|
159
|
+
entry.last_seen = now
|
|
160
|
+
entry.count += 1
|
|
161
|
+
if metadata:
|
|
162
|
+
entry.metadata.update(metadata)
|
|
163
|
+
else:
|
|
164
|
+
self._entries[fingerprint] = DeduplicationEntry(
|
|
165
|
+
fingerprint=fingerprint,
|
|
166
|
+
first_seen=now,
|
|
167
|
+
last_seen=now,
|
|
168
|
+
count=1,
|
|
169
|
+
metadata=metadata or {},
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def get(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
173
|
+
"""Get entry by fingerprint."""
|
|
174
|
+
with self._lock:
|
|
175
|
+
return self._entries.get(fingerprint)
|
|
176
|
+
|
|
177
|
+
def cleanup(self, max_age_seconds: int) -> int:
|
|
178
|
+
"""Remove expired entries."""
|
|
179
|
+
cutoff = datetime.utcnow() - timedelta(seconds=max_age_seconds)
|
|
180
|
+
removed = 0
|
|
181
|
+
|
|
182
|
+
with self._lock:
|
|
183
|
+
expired = [
|
|
184
|
+
fp for fp, entry in self._entries.items()
|
|
185
|
+
if entry.last_seen < cutoff
|
|
186
|
+
]
|
|
187
|
+
for fp in expired:
|
|
188
|
+
del self._entries[fp]
|
|
189
|
+
removed += 1
|
|
190
|
+
|
|
191
|
+
return removed
|
|
192
|
+
|
|
193
|
+
def clear(self) -> None:
|
|
194
|
+
"""Clear all entries."""
|
|
195
|
+
with self._lock:
|
|
196
|
+
self._entries.clear()
|
|
197
|
+
|
|
198
|
+
def count(self) -> int:
|
|
199
|
+
"""Get total entry count."""
|
|
200
|
+
with self._lock:
|
|
201
|
+
return len(self._entries)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class SQLiteDeduplicationStore(BaseDeduplicationStore):
|
|
205
|
+
"""SQLite-based persistent deduplication storage.
|
|
206
|
+
|
|
207
|
+
Provides durable storage that survives process restarts.
|
|
208
|
+
Thread-safe using connection pooling.
|
|
209
|
+
|
|
210
|
+
Attributes:
|
|
211
|
+
db_path: Path to SQLite database file.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
def __init__(self, db_path: str | Path = "deduplication.db") -> None:
|
|
215
|
+
"""Initialize SQLite store.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
db_path: Path to database file.
|
|
219
|
+
"""
|
|
220
|
+
self.db_path = Path(db_path)
|
|
221
|
+
self._local = threading.local()
|
|
222
|
+
self._init_db()
|
|
223
|
+
|
|
224
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
225
|
+
"""Get thread-local database connection."""
|
|
226
|
+
if not hasattr(self._local, "connection"):
|
|
227
|
+
self._local.connection = sqlite3.connect(
|
|
228
|
+
str(self.db_path),
|
|
229
|
+
check_same_thread=False,
|
|
230
|
+
)
|
|
231
|
+
self._local.connection.row_factory = sqlite3.Row
|
|
232
|
+
return self._local.connection
|
|
233
|
+
|
|
234
|
+
def _init_db(self) -> None:
|
|
235
|
+
"""Initialize database schema."""
|
|
236
|
+
conn = self._get_connection()
|
|
237
|
+
conn.execute("""
|
|
238
|
+
CREATE TABLE IF NOT EXISTS deduplication_entries (
|
|
239
|
+
fingerprint TEXT PRIMARY KEY,
|
|
240
|
+
first_seen REAL NOT NULL,
|
|
241
|
+
last_seen REAL NOT NULL,
|
|
242
|
+
count INTEGER NOT NULL DEFAULT 1,
|
|
243
|
+
metadata TEXT
|
|
244
|
+
)
|
|
245
|
+
""")
|
|
246
|
+
conn.execute("""
|
|
247
|
+
CREATE INDEX IF NOT EXISTS idx_dedup_last_seen
|
|
248
|
+
ON deduplication_entries(last_seen)
|
|
249
|
+
""")
|
|
250
|
+
conn.commit()
|
|
251
|
+
|
|
252
|
+
def exists(self, fingerprint: str, window_seconds: int) -> bool:
|
|
253
|
+
"""Check if fingerprint exists within window."""
|
|
254
|
+
conn = self._get_connection()
|
|
255
|
+
cutoff = time.time() - window_seconds
|
|
256
|
+
|
|
257
|
+
cursor = conn.execute(
|
|
258
|
+
"""
|
|
259
|
+
SELECT 1 FROM deduplication_entries
|
|
260
|
+
WHERE fingerprint = ? AND last_seen >= ?
|
|
261
|
+
""",
|
|
262
|
+
(fingerprint, cutoff),
|
|
263
|
+
)
|
|
264
|
+
return cursor.fetchone() is not None
|
|
265
|
+
|
|
266
|
+
def record(self, fingerprint: str, metadata: dict[str, Any] | None = None) -> None:
|
|
267
|
+
"""Record a fingerprint."""
|
|
268
|
+
import json
|
|
269
|
+
|
|
270
|
+
now = time.time()
|
|
271
|
+
conn = self._get_connection()
|
|
272
|
+
|
|
273
|
+
# Try to update existing
|
|
274
|
+
cursor = conn.execute(
|
|
275
|
+
"""
|
|
276
|
+
UPDATE deduplication_entries
|
|
277
|
+
SET last_seen = ?, count = count + 1
|
|
278
|
+
WHERE fingerprint = ?
|
|
279
|
+
""",
|
|
280
|
+
(now, fingerprint),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
if cursor.rowcount == 0:
|
|
284
|
+
# Insert new entry
|
|
285
|
+
metadata_json = json.dumps(metadata) if metadata else None
|
|
286
|
+
conn.execute(
|
|
287
|
+
"""
|
|
288
|
+
INSERT INTO deduplication_entries
|
|
289
|
+
(fingerprint, first_seen, last_seen, count, metadata)
|
|
290
|
+
VALUES (?, ?, ?, 1, ?)
|
|
291
|
+
""",
|
|
292
|
+
(fingerprint, now, now, metadata_json),
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
conn.commit()
|
|
296
|
+
|
|
297
|
+
def get(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
298
|
+
"""Get entry by fingerprint."""
|
|
299
|
+
import json
|
|
300
|
+
|
|
301
|
+
conn = self._get_connection()
|
|
302
|
+
cursor = conn.execute(
|
|
303
|
+
"""
|
|
304
|
+
SELECT fingerprint, first_seen, last_seen, count, metadata
|
|
305
|
+
FROM deduplication_entries
|
|
306
|
+
WHERE fingerprint = ?
|
|
307
|
+
""",
|
|
308
|
+
(fingerprint,),
|
|
309
|
+
)
|
|
310
|
+
row = cursor.fetchone()
|
|
311
|
+
|
|
312
|
+
if row is None:
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
metadata = {}
|
|
316
|
+
if row["metadata"]:
|
|
317
|
+
try:
|
|
318
|
+
metadata = json.loads(row["metadata"])
|
|
319
|
+
except json.JSONDecodeError:
|
|
320
|
+
pass
|
|
321
|
+
|
|
322
|
+
return DeduplicationEntry(
|
|
323
|
+
fingerprint=row["fingerprint"],
|
|
324
|
+
first_seen=datetime.fromtimestamp(row["first_seen"]),
|
|
325
|
+
last_seen=datetime.fromtimestamp(row["last_seen"]),
|
|
326
|
+
count=row["count"],
|
|
327
|
+
metadata=metadata,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
def cleanup(self, max_age_seconds: int) -> int:
|
|
331
|
+
"""Remove expired entries."""
|
|
332
|
+
conn = self._get_connection()
|
|
333
|
+
cutoff = time.time() - max_age_seconds
|
|
334
|
+
|
|
335
|
+
cursor = conn.execute(
|
|
336
|
+
"""
|
|
337
|
+
DELETE FROM deduplication_entries
|
|
338
|
+
WHERE last_seen < ?
|
|
339
|
+
""",
|
|
340
|
+
(cutoff,),
|
|
341
|
+
)
|
|
342
|
+
conn.commit()
|
|
343
|
+
|
|
344
|
+
return cursor.rowcount
|
|
345
|
+
|
|
346
|
+
def clear(self) -> None:
|
|
347
|
+
"""Clear all entries."""
|
|
348
|
+
conn = self._get_connection()
|
|
349
|
+
conn.execute("DELETE FROM deduplication_entries")
|
|
350
|
+
conn.commit()
|
|
351
|
+
|
|
352
|
+
def count(self) -> int:
|
|
353
|
+
"""Get total entry count."""
|
|
354
|
+
conn = self._get_connection()
|
|
355
|
+
cursor = conn.execute("SELECT COUNT(*) FROM deduplication_entries")
|
|
356
|
+
return cursor.fetchone()[0]
|
|
357
|
+
|
|
358
|
+
def close(self) -> None:
|
|
359
|
+
"""Close database connection."""
|
|
360
|
+
if hasattr(self._local, "connection"):
|
|
361
|
+
self._local.connection.close()
|
|
362
|
+
del self._local.connection
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class RedisDeduplicationStore(BaseDeduplicationStore):
|
|
366
|
+
"""Redis-based deduplication store for distributed deployments.
|
|
367
|
+
|
|
368
|
+
Uses Redis strings with TTL for automatic expiration.
|
|
369
|
+
Supports both sync and async Redis clients with connection pooling.
|
|
370
|
+
|
|
371
|
+
This store is ideal for:
|
|
372
|
+
- Multi-process deployments
|
|
373
|
+
- Distributed systems
|
|
374
|
+
- High-concurrency scenarios
|
|
375
|
+
- Deployments requiring shared state
|
|
376
|
+
|
|
377
|
+
Note: Requires the 'redis' optional dependency.
|
|
378
|
+
Install with: pip install truthound-dashboard[redis]
|
|
379
|
+
|
|
380
|
+
Attributes:
|
|
381
|
+
redis_url: Redis connection URL.
|
|
382
|
+
key_prefix: Prefix for all Redis keys.
|
|
383
|
+
default_ttl: Default TTL in seconds for entries.
|
|
384
|
+
"""
|
|
385
|
+
|
|
386
|
+
def __init__(
|
|
387
|
+
self,
|
|
388
|
+
redis_url: str = "redis://localhost:6379/0",
|
|
389
|
+
key_prefix: str = "truthound:dedup:",
|
|
390
|
+
default_ttl: int = 3600, # 1 hour
|
|
391
|
+
max_connections: int = 10,
|
|
392
|
+
socket_timeout: float = 5.0,
|
|
393
|
+
socket_connect_timeout: float = 5.0,
|
|
394
|
+
retry_on_timeout: bool = True,
|
|
395
|
+
) -> None:
|
|
396
|
+
"""Initialize Redis store.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
redis_url: Redis connection URL (e.g., redis://localhost:6379/0).
|
|
400
|
+
key_prefix: Prefix for all deduplication keys.
|
|
401
|
+
default_ttl: Default TTL in seconds for entries.
|
|
402
|
+
max_connections: Maximum connections in the pool.
|
|
403
|
+
socket_timeout: Socket timeout in seconds.
|
|
404
|
+
socket_connect_timeout: Connection timeout in seconds.
|
|
405
|
+
retry_on_timeout: Whether to retry on timeout.
|
|
406
|
+
|
|
407
|
+
Raises:
|
|
408
|
+
ImportError: If redis package is not installed.
|
|
409
|
+
"""
|
|
410
|
+
if not REDIS_AVAILABLE:
|
|
411
|
+
raise ImportError(
|
|
412
|
+
"Redis support requires the 'redis' package. "
|
|
413
|
+
"Install with: pip install truthound-dashboard[redis] "
|
|
414
|
+
"or pip install redis"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
self.redis_url = redis_url
|
|
418
|
+
self.key_prefix = key_prefix
|
|
419
|
+
self.default_ttl = default_ttl
|
|
420
|
+
self.max_connections = max_connections
|
|
421
|
+
self.socket_timeout = socket_timeout
|
|
422
|
+
self.socket_connect_timeout = socket_connect_timeout
|
|
423
|
+
self.retry_on_timeout = retry_on_timeout
|
|
424
|
+
|
|
425
|
+
# Connection pool for sync client
|
|
426
|
+
self._pool: redis.ConnectionPool | None = None
|
|
427
|
+
self._client: redis.Redis | None = None
|
|
428
|
+
|
|
429
|
+
# Connection pool for async client
|
|
430
|
+
self._async_pool: redis.asyncio.ConnectionPool | None = None
|
|
431
|
+
self._async_client: redis.asyncio.Redis | None = None
|
|
432
|
+
|
|
433
|
+
# Lock for thread-safe initialization
|
|
434
|
+
self._lock = threading.Lock()
|
|
435
|
+
|
|
436
|
+
def _get_key(self, fingerprint: str) -> str:
|
|
437
|
+
"""Get full Redis key for fingerprint.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
fingerprint: The fingerprint string.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Full Redis key with prefix.
|
|
444
|
+
"""
|
|
445
|
+
return f"{self.key_prefix}{fingerprint}"
|
|
446
|
+
|
|
447
|
+
def _create_pool(self) -> redis.ConnectionPool:
|
|
448
|
+
"""Create a connection pool for sync client.
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
Configured connection pool.
|
|
452
|
+
"""
|
|
453
|
+
return redis.ConnectionPool.from_url(
|
|
454
|
+
self.redis_url,
|
|
455
|
+
max_connections=self.max_connections,
|
|
456
|
+
socket_timeout=self.socket_timeout,
|
|
457
|
+
socket_connect_timeout=self.socket_connect_timeout,
|
|
458
|
+
retry_on_timeout=self.retry_on_timeout,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
async def _create_async_pool(self) -> redis.asyncio.ConnectionPool:
|
|
462
|
+
"""Create a connection pool for async client.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
Configured async connection pool.
|
|
466
|
+
"""
|
|
467
|
+
return redis.asyncio.ConnectionPool.from_url(
|
|
468
|
+
self.redis_url,
|
|
469
|
+
max_connections=self.max_connections,
|
|
470
|
+
socket_timeout=self.socket_timeout,
|
|
471
|
+
socket_connect_timeout=self.socket_connect_timeout,
|
|
472
|
+
retry_on_timeout=self.retry_on_timeout,
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
@property
|
|
476
|
+
def client(self) -> redis.Redis:
|
|
477
|
+
"""Get sync Redis client with connection pooling.
|
|
478
|
+
|
|
479
|
+
Creates the connection pool and client on first access.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Redis client instance.
|
|
483
|
+
"""
|
|
484
|
+
if self._client is None:
|
|
485
|
+
with self._lock:
|
|
486
|
+
if self._client is None:
|
|
487
|
+
self._pool = self._create_pool()
|
|
488
|
+
self._client = redis.Redis(connection_pool=self._pool)
|
|
489
|
+
return self._client
|
|
490
|
+
|
|
491
|
+
async def get_async_client(self) -> redis.asyncio.Redis:
|
|
492
|
+
"""Get async Redis client with connection pooling.
|
|
493
|
+
|
|
494
|
+
Creates the async connection pool and client on first access.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
Async Redis client instance.
|
|
498
|
+
"""
|
|
499
|
+
if self._async_client is None:
|
|
500
|
+
self._async_pool = await self._create_async_pool()
|
|
501
|
+
self._async_client = redis.asyncio.Redis(
|
|
502
|
+
connection_pool=self._async_pool
|
|
503
|
+
)
|
|
504
|
+
return self._async_client
|
|
505
|
+
|
|
506
|
+
def exists(self, fingerprint: str, window_seconds: int) -> bool:
|
|
507
|
+
"""Check if fingerprint exists in Redis.
|
|
508
|
+
|
|
509
|
+
Note: Redis handles expiration via TTL, so window_seconds is not
|
|
510
|
+
used here. The entry either exists (not expired) or doesn't.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
fingerprint: The fingerprint to check.
|
|
514
|
+
window_seconds: Time window (unused, TTL handles expiration).
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
True if fingerprint exists and hasn't expired.
|
|
518
|
+
"""
|
|
519
|
+
key = self._get_key(fingerprint)
|
|
520
|
+
return self.client.exists(key) > 0
|
|
521
|
+
|
|
522
|
+
async def exists_async(self, fingerprint: str, window_seconds: int) -> bool:
|
|
523
|
+
"""Async check if fingerprint exists in Redis.
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
fingerprint: The fingerprint to check.
|
|
527
|
+
window_seconds: Time window (unused, TTL handles expiration).
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
True if fingerprint exists and hasn't expired.
|
|
531
|
+
"""
|
|
532
|
+
client = await self.get_async_client()
|
|
533
|
+
key = self._get_key(fingerprint)
|
|
534
|
+
return await client.exists(key) > 0
|
|
535
|
+
|
|
536
|
+
def record(self, fingerprint: str, metadata: dict[str, Any] | None = None) -> None:
|
|
537
|
+
"""Record fingerprint with TTL.
|
|
538
|
+
|
|
539
|
+
Stores the fingerprint with metadata and sets TTL for auto-expiration.
|
|
540
|
+
If the fingerprint already exists, updates metadata and resets TTL.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
fingerprint: The fingerprint to record.
|
|
544
|
+
metadata: Optional metadata to store with the entry.
|
|
545
|
+
"""
|
|
546
|
+
key = self._get_key(fingerprint)
|
|
547
|
+
now = time.time()
|
|
548
|
+
|
|
549
|
+
# Get existing entry to preserve first_seen and increment count
|
|
550
|
+
existing = self.client.get(key)
|
|
551
|
+
if existing:
|
|
552
|
+
try:
|
|
553
|
+
data = json.loads(existing)
|
|
554
|
+
data["last_seen"] = now
|
|
555
|
+
data["count"] = data.get("count", 1) + 1
|
|
556
|
+
if metadata:
|
|
557
|
+
data["metadata"].update(metadata)
|
|
558
|
+
except (json.JSONDecodeError, KeyError):
|
|
559
|
+
data = {
|
|
560
|
+
"first_seen": now,
|
|
561
|
+
"last_seen": now,
|
|
562
|
+
"count": 1,
|
|
563
|
+
"metadata": metadata or {},
|
|
564
|
+
}
|
|
565
|
+
else:
|
|
566
|
+
data = {
|
|
567
|
+
"first_seen": now,
|
|
568
|
+
"last_seen": now,
|
|
569
|
+
"count": 1,
|
|
570
|
+
"metadata": metadata or {},
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
value = json.dumps(data)
|
|
574
|
+
self.client.setex(key, self.default_ttl, value)
|
|
575
|
+
|
|
576
|
+
async def record_async(
|
|
577
|
+
self, fingerprint: str, metadata: dict[str, Any] | None = None
|
|
578
|
+
) -> None:
|
|
579
|
+
"""Async record fingerprint with TTL.
|
|
580
|
+
|
|
581
|
+
Args:
|
|
582
|
+
fingerprint: The fingerprint to record.
|
|
583
|
+
metadata: Optional metadata to store with the entry.
|
|
584
|
+
"""
|
|
585
|
+
client = await self.get_async_client()
|
|
586
|
+
key = self._get_key(fingerprint)
|
|
587
|
+
now = time.time()
|
|
588
|
+
|
|
589
|
+
# Get existing entry to preserve first_seen and increment count
|
|
590
|
+
existing = await client.get(key)
|
|
591
|
+
if existing:
|
|
592
|
+
try:
|
|
593
|
+
data = json.loads(existing)
|
|
594
|
+
data["last_seen"] = now
|
|
595
|
+
data["count"] = data.get("count", 1) + 1
|
|
596
|
+
if metadata:
|
|
597
|
+
data["metadata"].update(metadata)
|
|
598
|
+
except (json.JSONDecodeError, KeyError):
|
|
599
|
+
data = {
|
|
600
|
+
"first_seen": now,
|
|
601
|
+
"last_seen": now,
|
|
602
|
+
"count": 1,
|
|
603
|
+
"metadata": metadata or {},
|
|
604
|
+
}
|
|
605
|
+
else:
|
|
606
|
+
data = {
|
|
607
|
+
"first_seen": now,
|
|
608
|
+
"last_seen": now,
|
|
609
|
+
"count": 1,
|
|
610
|
+
"metadata": metadata or {},
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
value = json.dumps(data)
|
|
614
|
+
await client.setex(key, self.default_ttl, value)
|
|
615
|
+
|
|
616
|
+
def get(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
617
|
+
"""Get entry by fingerprint.
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
fingerprint: The fingerprint to look up.
|
|
621
|
+
|
|
622
|
+
Returns:
|
|
623
|
+
Entry if found, None otherwise.
|
|
624
|
+
"""
|
|
625
|
+
key = self._get_key(fingerprint)
|
|
626
|
+
data = self.client.get(key)
|
|
627
|
+
|
|
628
|
+
if data is None:
|
|
629
|
+
return None
|
|
630
|
+
|
|
631
|
+
try:
|
|
632
|
+
parsed = json.loads(data)
|
|
633
|
+
return DeduplicationEntry(
|
|
634
|
+
fingerprint=fingerprint,
|
|
635
|
+
first_seen=datetime.fromtimestamp(parsed["first_seen"]),
|
|
636
|
+
last_seen=datetime.fromtimestamp(parsed["last_seen"]),
|
|
637
|
+
count=parsed.get("count", 1),
|
|
638
|
+
metadata=parsed.get("metadata", {}),
|
|
639
|
+
)
|
|
640
|
+
except (json.JSONDecodeError, KeyError):
|
|
641
|
+
return None
|
|
642
|
+
|
|
643
|
+
async def get_async(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
644
|
+
"""Async get entry by fingerprint.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
fingerprint: The fingerprint to look up.
|
|
648
|
+
|
|
649
|
+
Returns:
|
|
650
|
+
Entry if found, None otherwise.
|
|
651
|
+
"""
|
|
652
|
+
client = await self.get_async_client()
|
|
653
|
+
key = self._get_key(fingerprint)
|
|
654
|
+
data = await client.get(key)
|
|
655
|
+
|
|
656
|
+
if data is None:
|
|
657
|
+
return None
|
|
658
|
+
|
|
659
|
+
try:
|
|
660
|
+
parsed = json.loads(data)
|
|
661
|
+
return DeduplicationEntry(
|
|
662
|
+
fingerprint=fingerprint,
|
|
663
|
+
first_seen=datetime.fromtimestamp(parsed["first_seen"]),
|
|
664
|
+
last_seen=datetime.fromtimestamp(parsed["last_seen"]),
|
|
665
|
+
count=parsed.get("count", 1),
|
|
666
|
+
metadata=parsed.get("metadata", {}),
|
|
667
|
+
)
|
|
668
|
+
except (json.JSONDecodeError, KeyError):
|
|
669
|
+
return None
|
|
670
|
+
|
|
671
|
+
def count(self) -> int:
|
|
672
|
+
"""Count entries (approximate using SCAN).
|
|
673
|
+
|
|
674
|
+
Uses SCAN to iterate through keys without blocking Redis.
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
Approximate count of deduplication entries.
|
|
678
|
+
"""
|
|
679
|
+
count = 0
|
|
680
|
+
cursor = 0
|
|
681
|
+
pattern = f"{self.key_prefix}*"
|
|
682
|
+
|
|
683
|
+
while True:
|
|
684
|
+
cursor, keys = self.client.scan(cursor, match=pattern, count=100)
|
|
685
|
+
count += len(keys)
|
|
686
|
+
if cursor == 0:
|
|
687
|
+
break
|
|
688
|
+
|
|
689
|
+
return count
|
|
690
|
+
|
|
691
|
+
async def count_async(self) -> int:
|
|
692
|
+
"""Async count entries.
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
Approximate count of deduplication entries.
|
|
696
|
+
"""
|
|
697
|
+
client = await self.get_async_client()
|
|
698
|
+
count = 0
|
|
699
|
+
cursor = 0
|
|
700
|
+
pattern = f"{self.key_prefix}*"
|
|
701
|
+
|
|
702
|
+
while True:
|
|
703
|
+
cursor, keys = await client.scan(cursor, match=pattern, count=100)
|
|
704
|
+
count += len(keys)
|
|
705
|
+
if cursor == 0:
|
|
706
|
+
break
|
|
707
|
+
|
|
708
|
+
return count
|
|
709
|
+
|
|
710
|
+
def cleanup(self, max_age_seconds: int) -> int:
|
|
711
|
+
"""Redis handles expiration via TTL, no manual cleanup needed.
|
|
712
|
+
|
|
713
|
+
This method is a no-op for Redis as TTL handles expiration automatically.
|
|
714
|
+
|
|
715
|
+
Args:
|
|
716
|
+
max_age_seconds: Maximum age (unused for Redis).
|
|
717
|
+
|
|
718
|
+
Returns:
|
|
719
|
+
Always returns 0 as Redis handles expiration.
|
|
720
|
+
"""
|
|
721
|
+
# Redis handles expiration automatically via TTL
|
|
722
|
+
return 0
|
|
723
|
+
|
|
724
|
+
def clear(self) -> None:
|
|
725
|
+
"""Clear all deduplication keys.
|
|
726
|
+
|
|
727
|
+
Uses SCAN to find and delete all keys with the dedup prefix.
|
|
728
|
+
This is done in batches to avoid blocking Redis.
|
|
729
|
+
"""
|
|
730
|
+
pattern = f"{self.key_prefix}*"
|
|
731
|
+
cursor = 0
|
|
732
|
+
|
|
733
|
+
while True:
|
|
734
|
+
cursor, keys = self.client.scan(cursor, match=pattern, count=100)
|
|
735
|
+
if keys:
|
|
736
|
+
self.client.delete(*keys)
|
|
737
|
+
if cursor == 0:
|
|
738
|
+
break
|
|
739
|
+
|
|
740
|
+
async def clear_async(self) -> None:
|
|
741
|
+
"""Async clear all deduplication keys."""
|
|
742
|
+
client = await self.get_async_client()
|
|
743
|
+
pattern = f"{self.key_prefix}*"
|
|
744
|
+
cursor = 0
|
|
745
|
+
|
|
746
|
+
while True:
|
|
747
|
+
cursor, keys = await client.scan(cursor, match=pattern, count=100)
|
|
748
|
+
if keys:
|
|
749
|
+
await client.delete(*keys)
|
|
750
|
+
if cursor == 0:
|
|
751
|
+
break
|
|
752
|
+
|
|
753
|
+
def health_check(self) -> bool:
|
|
754
|
+
"""Check Redis connection health.
|
|
755
|
+
|
|
756
|
+
Performs a PING command to verify connectivity.
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
True if Redis is reachable, False otherwise.
|
|
760
|
+
"""
|
|
761
|
+
try:
|
|
762
|
+
return self.client.ping()
|
|
763
|
+
except Exception:
|
|
764
|
+
return False
|
|
765
|
+
|
|
766
|
+
async def health_check_async(self) -> bool:
|
|
767
|
+
"""Async check Redis connection health.
|
|
768
|
+
|
|
769
|
+
Returns:
|
|
770
|
+
True if Redis is reachable, False otherwise.
|
|
771
|
+
"""
|
|
772
|
+
try:
|
|
773
|
+
client = await self.get_async_client()
|
|
774
|
+
return await client.ping()
|
|
775
|
+
except Exception:
|
|
776
|
+
return False
|
|
777
|
+
|
|
778
|
+
def get_info(self) -> dict[str, Any]:
|
|
779
|
+
"""Get Redis server information.
|
|
780
|
+
|
|
781
|
+
Returns:
|
|
782
|
+
Dictionary containing Redis server info.
|
|
783
|
+
"""
|
|
784
|
+
try:
|
|
785
|
+
info = self.client.info()
|
|
786
|
+
return {
|
|
787
|
+
"redis_version": info.get("redis_version"),
|
|
788
|
+
"connected_clients": info.get("connected_clients"),
|
|
789
|
+
"used_memory_human": info.get("used_memory_human"),
|
|
790
|
+
"uptime_in_seconds": info.get("uptime_in_seconds"),
|
|
791
|
+
"db0": info.get("db0", {}),
|
|
792
|
+
}
|
|
793
|
+
except Exception as e:
|
|
794
|
+
return {"error": str(e)}
|
|
795
|
+
|
|
796
|
+
def set_ttl(self, fingerprint: str, ttl_seconds: int) -> bool:
|
|
797
|
+
"""Set custom TTL for a specific fingerprint.
|
|
798
|
+
|
|
799
|
+
Args:
|
|
800
|
+
fingerprint: The fingerprint to update.
|
|
801
|
+
ttl_seconds: New TTL in seconds.
|
|
802
|
+
|
|
803
|
+
Returns:
|
|
804
|
+
True if TTL was set, False if key doesn't exist.
|
|
805
|
+
"""
|
|
806
|
+
key = self._get_key(fingerprint)
|
|
807
|
+
return self.client.expire(key, ttl_seconds)
|
|
808
|
+
|
|
809
|
+
def get_ttl(self, fingerprint: str) -> int:
|
|
810
|
+
"""Get remaining TTL for a fingerprint.
|
|
811
|
+
|
|
812
|
+
Args:
|
|
813
|
+
fingerprint: The fingerprint to check.
|
|
814
|
+
|
|
815
|
+
Returns:
|
|
816
|
+
TTL in seconds, -1 if no TTL, -2 if key doesn't exist.
|
|
817
|
+
"""
|
|
818
|
+
key = self._get_key(fingerprint)
|
|
819
|
+
return self.client.ttl(key)
|
|
820
|
+
|
|
821
|
+
def close(self) -> None:
|
|
822
|
+
"""Close all connections and pools.
|
|
823
|
+
|
|
824
|
+
Should be called when the store is no longer needed
|
|
825
|
+
to release resources.
|
|
826
|
+
"""
|
|
827
|
+
if self._client is not None:
|
|
828
|
+
self._client.close()
|
|
829
|
+
self._client = None
|
|
830
|
+
|
|
831
|
+
if self._pool is not None:
|
|
832
|
+
self._pool.disconnect()
|
|
833
|
+
self._pool = None
|
|
834
|
+
|
|
835
|
+
# Note: Async client/pool should be closed in async context
|
|
836
|
+
# using close_async() method
|
|
837
|
+
|
|
838
|
+
async def close_async(self) -> None:
|
|
839
|
+
"""Async close all connections and pools."""
|
|
840
|
+
if self._async_client is not None:
|
|
841
|
+
await self._async_client.close()
|
|
842
|
+
self._async_client = None
|
|
843
|
+
|
|
844
|
+
if self._async_pool is not None:
|
|
845
|
+
await self._async_pool.disconnect()
|
|
846
|
+
self._async_pool = None
|
|
847
|
+
|
|
848
|
+
def __enter__(self) -> "RedisDeduplicationStore":
|
|
849
|
+
"""Context manager entry."""
|
|
850
|
+
return self
|
|
851
|
+
|
|
852
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
853
|
+
"""Context manager exit, closes connections."""
|
|
854
|
+
self.close()
|
|
855
|
+
|
|
856
|
+
async def __aenter__(self) -> "RedisDeduplicationStore":
|
|
857
|
+
"""Async context manager entry."""
|
|
858
|
+
return self
|
|
859
|
+
|
|
860
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
861
|
+
"""Async context manager exit, closes connections."""
|
|
862
|
+
await self.close_async()
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
# ============================================================================
|
|
866
|
+
# Redis Streams Deduplication Store
|
|
867
|
+
# ============================================================================
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
@dataclass
|
|
871
|
+
class DeduplicationMetrics:
|
|
872
|
+
"""Metrics for deduplication store operations.
|
|
873
|
+
|
|
874
|
+
Attributes:
|
|
875
|
+
hits: Number of duplicate detections (cache hits).
|
|
876
|
+
misses: Number of non-duplicate entries (cache misses).
|
|
877
|
+
records: Number of fingerprints recorded.
|
|
878
|
+
errors: Number of Redis errors encountered.
|
|
879
|
+
fallbacks: Number of times fallback to InMemory was used.
|
|
880
|
+
reconnections: Number of successful reconnections.
|
|
881
|
+
"""
|
|
882
|
+
|
|
883
|
+
hits: int = 0
|
|
884
|
+
misses: int = 0
|
|
885
|
+
records: int = 0
|
|
886
|
+
errors: int = 0
|
|
887
|
+
fallbacks: int = 0
|
|
888
|
+
reconnections: int = 0
|
|
889
|
+
|
|
890
|
+
def to_dict(self) -> dict[str, int]:
|
|
891
|
+
"""Convert metrics to dictionary."""
|
|
892
|
+
return {
|
|
893
|
+
"hits": self.hits,
|
|
894
|
+
"misses": self.misses,
|
|
895
|
+
"records": self.records,
|
|
896
|
+
"errors": self.errors,
|
|
897
|
+
"fallbacks": self.fallbacks,
|
|
898
|
+
"reconnections": self.reconnections,
|
|
899
|
+
"total_checks": self.hits + self.misses,
|
|
900
|
+
"hit_rate": round(self.hits / max(1, self.hits + self.misses) * 100, 2),
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
|
|
904
|
+
class RedisStreamsDeduplicationStore(BaseDeduplicationStore):
|
|
905
|
+
"""Production-ready Redis Streams based deduplication store.
|
|
906
|
+
|
|
907
|
+
Uses Redis Streams for robust distributed deduplication with:
|
|
908
|
+
- Connection pool management with configurable pool size
|
|
909
|
+
- Automatic reconnection with exponential backoff
|
|
910
|
+
- TTL management for stream entries (auto-cleanup)
|
|
911
|
+
- Consumer groups for distributed deduplication
|
|
912
|
+
- Graceful degradation (fallback to InMemory on Redis failure)
|
|
913
|
+
- Health check endpoint support
|
|
914
|
+
- Comprehensive metrics collection
|
|
915
|
+
|
|
916
|
+
Configuration via environment variables:
|
|
917
|
+
TRUTHOUND_DEDUP_REDIS_URL: Redis connection URL (default: redis://localhost:6379/0)
|
|
918
|
+
TRUTHOUND_DEDUP_REDIS_PREFIX: Key prefix (default: truthound:dedup:streams:)
|
|
919
|
+
TRUTHOUND_DEDUP_REDIS_TTL: Default TTL in seconds (default: 3600)
|
|
920
|
+
TRUTHOUND_DEDUP_REDIS_POOL_SIZE: Connection pool size (default: 10)
|
|
921
|
+
TRUTHOUND_DEDUP_REDIS_SOCKET_TIMEOUT: Socket timeout (default: 5.0)
|
|
922
|
+
TRUTHOUND_DEDUP_REDIS_CONNECT_TIMEOUT: Connection timeout (default: 5.0)
|
|
923
|
+
TRUTHOUND_DEDUP_REDIS_MAX_RETRIES: Max retry attempts (default: 3)
|
|
924
|
+
TRUTHOUND_DEDUP_REDIS_RETRY_BASE_DELAY: Base delay for exponential backoff (default: 1.0)
|
|
925
|
+
TRUTHOUND_DEDUP_REDIS_CONSUMER_GROUP: Consumer group name (default: truthound-dedup)
|
|
926
|
+
TRUTHOUND_DEDUP_REDIS_CONSUMER_NAME: Consumer name (default: auto-generated)
|
|
927
|
+
TRUTHOUND_DEDUP_REDIS_STREAM_MAX_LEN: Max stream length (default: 100000)
|
|
928
|
+
TRUTHOUND_DEDUP_FALLBACK_ENABLED: Enable fallback to InMemory (default: true)
|
|
929
|
+
|
|
930
|
+
Example:
|
|
931
|
+
# Basic usage
|
|
932
|
+
store = RedisStreamsDeduplicationStore()
|
|
933
|
+
|
|
934
|
+
# Custom configuration
|
|
935
|
+
store = RedisStreamsDeduplicationStore(
|
|
936
|
+
redis_url="redis://myredis:6379/1",
|
|
937
|
+
default_ttl=7200,
|
|
938
|
+
max_connections=20,
|
|
939
|
+
enable_fallback=True,
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
# With context manager
|
|
943
|
+
async with RedisStreamsDeduplicationStore() as store:
|
|
944
|
+
if not await store.exists_async("fingerprint", 300):
|
|
945
|
+
await store.record_async("fingerprint", {"key": "value"})
|
|
946
|
+
|
|
947
|
+
Note: Requires the 'redis' optional dependency.
|
|
948
|
+
Install with: pip install truthound-dashboard[redis]
|
|
949
|
+
"""
|
|
950
|
+
|
|
951
|
+
# Stream entry field names
|
|
952
|
+
FIELD_FINGERPRINT = "fingerprint"
|
|
953
|
+
FIELD_FIRST_SEEN = "first_seen"
|
|
954
|
+
FIELD_LAST_SEEN = "last_seen"
|
|
955
|
+
FIELD_COUNT = "count"
|
|
956
|
+
FIELD_METADATA = "metadata"
|
|
957
|
+
|
|
958
|
+
def __init__(
|
|
959
|
+
self,
|
|
960
|
+
redis_url: str | None = None,
|
|
961
|
+
key_prefix: str | None = None,
|
|
962
|
+
default_ttl: int | None = None,
|
|
963
|
+
max_connections: int | None = None,
|
|
964
|
+
socket_timeout: float | None = None,
|
|
965
|
+
socket_connect_timeout: float | None = None,
|
|
966
|
+
max_retries: int | None = None,
|
|
967
|
+
retry_base_delay: float | None = None,
|
|
968
|
+
consumer_group: str | None = None,
|
|
969
|
+
consumer_name: str | None = None,
|
|
970
|
+
stream_max_len: int | None = None,
|
|
971
|
+
enable_fallback: bool | None = None,
|
|
972
|
+
logger: Any | None = None,
|
|
973
|
+
) -> None:
|
|
974
|
+
"""Initialize Redis Streams deduplication store.
|
|
975
|
+
|
|
976
|
+
All parameters can be configured via environment variables if not
|
|
977
|
+
explicitly provided.
|
|
978
|
+
|
|
979
|
+
Args:
|
|
980
|
+
redis_url: Redis connection URL.
|
|
981
|
+
key_prefix: Prefix for all Redis keys.
|
|
982
|
+
default_ttl: Default TTL in seconds for entries.
|
|
983
|
+
max_connections: Maximum connections in the pool.
|
|
984
|
+
socket_timeout: Socket timeout in seconds.
|
|
985
|
+
socket_connect_timeout: Connection timeout in seconds.
|
|
986
|
+
max_retries: Maximum retry attempts for reconnection.
|
|
987
|
+
retry_base_delay: Base delay for exponential backoff.
|
|
988
|
+
consumer_group: Consumer group name for stream processing.
|
|
989
|
+
consumer_name: Consumer name (auto-generated if not provided).
|
|
990
|
+
stream_max_len: Maximum stream length (MAXLEN).
|
|
991
|
+
enable_fallback: Enable fallback to InMemory on Redis failure.
|
|
992
|
+
logger: Custom logger instance.
|
|
993
|
+
|
|
994
|
+
Raises:
|
|
995
|
+
ImportError: If redis package is not installed.
|
|
996
|
+
"""
|
|
997
|
+
import logging
|
|
998
|
+
import os
|
|
999
|
+
import uuid
|
|
1000
|
+
|
|
1001
|
+
if not REDIS_AVAILABLE:
|
|
1002
|
+
raise ImportError(
|
|
1003
|
+
"Redis support requires the 'redis' package. "
|
|
1004
|
+
"Install with: pip install truthound-dashboard[redis] "
|
|
1005
|
+
"or pip install redis"
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
# Configuration from environment or parameters
|
|
1009
|
+
self.redis_url = redis_url or os.getenv(
|
|
1010
|
+
"TRUTHOUND_DEDUP_REDIS_URL", "redis://localhost:6379/0"
|
|
1011
|
+
)
|
|
1012
|
+
self.key_prefix = key_prefix or os.getenv(
|
|
1013
|
+
"TRUTHOUND_DEDUP_REDIS_PREFIX", "truthound:dedup:streams:"
|
|
1014
|
+
)
|
|
1015
|
+
self.default_ttl = default_ttl or int(
|
|
1016
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_TTL", "3600")
|
|
1017
|
+
)
|
|
1018
|
+
self.max_connections = max_connections or int(
|
|
1019
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_POOL_SIZE", "10")
|
|
1020
|
+
)
|
|
1021
|
+
self.socket_timeout = socket_timeout or float(
|
|
1022
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_SOCKET_TIMEOUT", "5.0")
|
|
1023
|
+
)
|
|
1024
|
+
self.socket_connect_timeout = socket_connect_timeout or float(
|
|
1025
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_CONNECT_TIMEOUT", "5.0")
|
|
1026
|
+
)
|
|
1027
|
+
self.max_retries = max_retries or int(
|
|
1028
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_MAX_RETRIES", "3")
|
|
1029
|
+
)
|
|
1030
|
+
self.retry_base_delay = retry_base_delay or float(
|
|
1031
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_RETRY_BASE_DELAY", "1.0")
|
|
1032
|
+
)
|
|
1033
|
+
self.consumer_group = consumer_group or os.getenv(
|
|
1034
|
+
"TRUTHOUND_DEDUP_REDIS_CONSUMER_GROUP", "truthound-dedup"
|
|
1035
|
+
)
|
|
1036
|
+
self.consumer_name = consumer_name or os.getenv(
|
|
1037
|
+
"TRUTHOUND_DEDUP_REDIS_CONSUMER_NAME", f"consumer-{uuid.uuid4().hex[:8]}"
|
|
1038
|
+
)
|
|
1039
|
+
self.stream_max_len = stream_max_len or int(
|
|
1040
|
+
os.getenv("TRUTHOUND_DEDUP_REDIS_STREAM_MAX_LEN", "100000")
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
fallback_env = os.getenv("TRUTHOUND_DEDUP_FALLBACK_ENABLED", "true")
|
|
1044
|
+
self.enable_fallback = (
|
|
1045
|
+
enable_fallback
|
|
1046
|
+
if enable_fallback is not None
|
|
1047
|
+
else fallback_env.lower() == "true"
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
# Logger setup
|
|
1051
|
+
self._logger = logger or logging.getLogger(__name__)
|
|
1052
|
+
|
|
1053
|
+
# Connection pool for sync client
|
|
1054
|
+
self._pool: redis.ConnectionPool | None = None
|
|
1055
|
+
self._client: redis.Redis | None = None
|
|
1056
|
+
|
|
1057
|
+
# Connection pool for async client
|
|
1058
|
+
self._async_pool: redis.asyncio.ConnectionPool | None = None
|
|
1059
|
+
self._async_client: redis.asyncio.Redis | None = None
|
|
1060
|
+
|
|
1061
|
+
# Locks for thread-safe initialization
|
|
1062
|
+
self._lock = threading.Lock()
|
|
1063
|
+
self._async_lock: Any = None # Created lazily for asyncio
|
|
1064
|
+
|
|
1065
|
+
# Fallback store for graceful degradation
|
|
1066
|
+
self._fallback_store: InMemoryDeduplicationStore | None = None
|
|
1067
|
+
self._using_fallback = False
|
|
1068
|
+
|
|
1069
|
+
# Connection state tracking
|
|
1070
|
+
self._connected = False
|
|
1071
|
+
self._retry_count = 0
|
|
1072
|
+
self._last_error: Exception | None = None
|
|
1073
|
+
self._last_error_time: float | None = None
|
|
1074
|
+
|
|
1075
|
+
# Metrics
|
|
1076
|
+
self._metrics = DeduplicationMetrics()
|
|
1077
|
+
|
|
1078
|
+
# Index tracking key (for fast lookups)
|
|
1079
|
+
self._index_key = f"{self.key_prefix}index"
|
|
1080
|
+
|
|
1081
|
+
# Stream key
|
|
1082
|
+
self._stream_key = f"{self.key_prefix}stream"
|
|
1083
|
+
|
|
1084
|
+
def _get_key(self, fingerprint: str) -> str:
|
|
1085
|
+
"""Get full Redis key for fingerprint.
|
|
1086
|
+
|
|
1087
|
+
Args:
|
|
1088
|
+
fingerprint: The fingerprint string.
|
|
1089
|
+
|
|
1090
|
+
Returns:
|
|
1091
|
+
Full Redis key with prefix.
|
|
1092
|
+
"""
|
|
1093
|
+
return f"{self.key_prefix}fp:{fingerprint}"
|
|
1094
|
+
|
|
1095
|
+
def _create_pool(self) -> "redis.ConnectionPool":
|
|
1096
|
+
"""Create a connection pool for sync client.
|
|
1097
|
+
|
|
1098
|
+
Returns:
|
|
1099
|
+
Configured connection pool.
|
|
1100
|
+
"""
|
|
1101
|
+
return redis.ConnectionPool.from_url(
|
|
1102
|
+
self.redis_url,
|
|
1103
|
+
max_connections=self.max_connections,
|
|
1104
|
+
socket_timeout=self.socket_timeout,
|
|
1105
|
+
socket_connect_timeout=self.socket_connect_timeout,
|
|
1106
|
+
retry_on_timeout=True,
|
|
1107
|
+
decode_responses=True,
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
async def _create_async_pool(self) -> "redis.asyncio.ConnectionPool":
|
|
1111
|
+
"""Create a connection pool for async client.
|
|
1112
|
+
|
|
1113
|
+
Returns:
|
|
1114
|
+
Configured async connection pool.
|
|
1115
|
+
"""
|
|
1116
|
+
return redis.asyncio.ConnectionPool.from_url(
|
|
1117
|
+
self.redis_url,
|
|
1118
|
+
max_connections=self.max_connections,
|
|
1119
|
+
socket_timeout=self.socket_timeout,
|
|
1120
|
+
socket_connect_timeout=self.socket_connect_timeout,
|
|
1121
|
+
retry_on_timeout=True,
|
|
1122
|
+
decode_responses=True,
|
|
1123
|
+
)
|
|
1124
|
+
|
|
1125
|
+
def _get_fallback_store(self) -> InMemoryDeduplicationStore:
|
|
1126
|
+
"""Get or create fallback in-memory store.
|
|
1127
|
+
|
|
1128
|
+
Returns:
|
|
1129
|
+
InMemoryDeduplicationStore instance.
|
|
1130
|
+
"""
|
|
1131
|
+
if self._fallback_store is None:
|
|
1132
|
+
self._fallback_store = InMemoryDeduplicationStore()
|
|
1133
|
+
return self._fallback_store
|
|
1134
|
+
|
|
1135
|
+
def _calculate_backoff_delay(self) -> float:
|
|
1136
|
+
"""Calculate exponential backoff delay.
|
|
1137
|
+
|
|
1138
|
+
Returns:
|
|
1139
|
+
Delay in seconds.
|
|
1140
|
+
"""
|
|
1141
|
+
import random
|
|
1142
|
+
|
|
1143
|
+
# Exponential backoff with jitter
|
|
1144
|
+
delay = self.retry_base_delay * (2**self._retry_count)
|
|
1145
|
+
# Add jitter (up to 25% of delay)
|
|
1146
|
+
jitter = delay * random.uniform(0, 0.25)
|
|
1147
|
+
return min(delay + jitter, 60.0) # Cap at 60 seconds
|
|
1148
|
+
|
|
1149
|
+
def _handle_redis_error(self, error: Exception, operation: str) -> None:
|
|
1150
|
+
"""Handle Redis errors with logging and metrics.
|
|
1151
|
+
|
|
1152
|
+
Args:
|
|
1153
|
+
error: The exception that occurred.
|
|
1154
|
+
operation: Name of the operation that failed.
|
|
1155
|
+
"""
|
|
1156
|
+
self._metrics.errors += 1
|
|
1157
|
+
self._last_error = error
|
|
1158
|
+
self._last_error_time = time.time()
|
|
1159
|
+
self._connected = False
|
|
1160
|
+
|
|
1161
|
+
self._logger.error(
|
|
1162
|
+
f"Redis error during {operation}: {error}",
|
|
1163
|
+
extra={
|
|
1164
|
+
"operation": operation,
|
|
1165
|
+
"error_type": type(error).__name__,
|
|
1166
|
+
"retry_count": self._retry_count,
|
|
1167
|
+
},
|
|
1168
|
+
)
|
|
1169
|
+
|
|
1170
|
+
def _try_reconnect_sync(self) -> bool:
|
|
1171
|
+
"""Attempt to reconnect to Redis synchronously.
|
|
1172
|
+
|
|
1173
|
+
Returns:
|
|
1174
|
+
True if reconnection successful, False otherwise.
|
|
1175
|
+
"""
|
|
1176
|
+
if self._retry_count >= self.max_retries:
|
|
1177
|
+
self._logger.warning(
|
|
1178
|
+
f"Max retries ({self.max_retries}) reached, using fallback"
|
|
1179
|
+
)
|
|
1180
|
+
return False
|
|
1181
|
+
|
|
1182
|
+
delay = self._calculate_backoff_delay()
|
|
1183
|
+
self._logger.info(
|
|
1184
|
+
f"Attempting Redis reconnection in {delay:.2f}s (attempt {self._retry_count + 1}/{self.max_retries})"
|
|
1185
|
+
)
|
|
1186
|
+
|
|
1187
|
+
time.sleep(delay)
|
|
1188
|
+
self._retry_count += 1
|
|
1189
|
+
|
|
1190
|
+
try:
|
|
1191
|
+
# Close existing connections
|
|
1192
|
+
if self._client:
|
|
1193
|
+
try:
|
|
1194
|
+
self._client.close()
|
|
1195
|
+
except Exception:
|
|
1196
|
+
pass
|
|
1197
|
+
self._client = None
|
|
1198
|
+
|
|
1199
|
+
if self._pool:
|
|
1200
|
+
try:
|
|
1201
|
+
self._pool.disconnect()
|
|
1202
|
+
except Exception:
|
|
1203
|
+
pass
|
|
1204
|
+
self._pool = None
|
|
1205
|
+
|
|
1206
|
+
# Create new connection
|
|
1207
|
+
self._pool = self._create_pool()
|
|
1208
|
+
self._client = redis.Redis(connection_pool=self._pool)
|
|
1209
|
+
|
|
1210
|
+
# Test connection
|
|
1211
|
+
if self._client.ping():
|
|
1212
|
+
self._connected = True
|
|
1213
|
+
self._retry_count = 0
|
|
1214
|
+
self._using_fallback = False
|
|
1215
|
+
self._metrics.reconnections += 1
|
|
1216
|
+
self._logger.info("Redis reconnection successful")
|
|
1217
|
+
return True
|
|
1218
|
+
except Exception as e:
|
|
1219
|
+
self._logger.warning(f"Reconnection attempt failed: {e}")
|
|
1220
|
+
|
|
1221
|
+
return False
|
|
1222
|
+
|
|
1223
|
+
async def _try_reconnect_async(self) -> bool:
|
|
1224
|
+
"""Attempt to reconnect to Redis asynchronously.
|
|
1225
|
+
|
|
1226
|
+
Returns:
|
|
1227
|
+
True if reconnection successful, False otherwise.
|
|
1228
|
+
"""
|
|
1229
|
+
import asyncio
|
|
1230
|
+
|
|
1231
|
+
if self._retry_count >= self.max_retries:
|
|
1232
|
+
self._logger.warning(
|
|
1233
|
+
f"Max retries ({self.max_retries}) reached, using fallback"
|
|
1234
|
+
)
|
|
1235
|
+
return False
|
|
1236
|
+
|
|
1237
|
+
delay = self._calculate_backoff_delay()
|
|
1238
|
+
self._logger.info(
|
|
1239
|
+
f"Attempting async Redis reconnection in {delay:.2f}s (attempt {self._retry_count + 1}/{self.max_retries})"
|
|
1240
|
+
)
|
|
1241
|
+
|
|
1242
|
+
await asyncio.sleep(delay)
|
|
1243
|
+
self._retry_count += 1
|
|
1244
|
+
|
|
1245
|
+
try:
|
|
1246
|
+
# Close existing connections
|
|
1247
|
+
if self._async_client:
|
|
1248
|
+
try:
|
|
1249
|
+
await self._async_client.close()
|
|
1250
|
+
except Exception:
|
|
1251
|
+
pass
|
|
1252
|
+
self._async_client = None
|
|
1253
|
+
|
|
1254
|
+
if self._async_pool:
|
|
1255
|
+
try:
|
|
1256
|
+
await self._async_pool.disconnect()
|
|
1257
|
+
except Exception:
|
|
1258
|
+
pass
|
|
1259
|
+
self._async_pool = None
|
|
1260
|
+
|
|
1261
|
+
# Create new connection
|
|
1262
|
+
self._async_pool = await self._create_async_pool()
|
|
1263
|
+
self._async_client = redis.asyncio.Redis(connection_pool=self._async_pool)
|
|
1264
|
+
|
|
1265
|
+
# Test connection
|
|
1266
|
+
if await self._async_client.ping():
|
|
1267
|
+
self._connected = True
|
|
1268
|
+
self._retry_count = 0
|
|
1269
|
+
self._using_fallback = False
|
|
1270
|
+
self._metrics.reconnections += 1
|
|
1271
|
+
self._logger.info("Async Redis reconnection successful")
|
|
1272
|
+
return True
|
|
1273
|
+
except Exception as e:
|
|
1274
|
+
self._logger.warning(f"Async reconnection attempt failed: {e}")
|
|
1275
|
+
|
|
1276
|
+
return False
|
|
1277
|
+
|
|
1278
|
+
@property
|
|
1279
|
+
def client(self) -> "redis.Redis":
|
|
1280
|
+
"""Get sync Redis client with connection pooling.
|
|
1281
|
+
|
|
1282
|
+
Creates the connection pool and client on first access.
|
|
1283
|
+
Handles reconnection on failure.
|
|
1284
|
+
|
|
1285
|
+
Returns:
|
|
1286
|
+
Redis client instance.
|
|
1287
|
+
"""
|
|
1288
|
+
if self._client is None or not self._connected:
|
|
1289
|
+
with self._lock:
|
|
1290
|
+
if self._client is None or not self._connected:
|
|
1291
|
+
try:
|
|
1292
|
+
self._pool = self._create_pool()
|
|
1293
|
+
self._client = redis.Redis(connection_pool=self._pool)
|
|
1294
|
+
# Test connection
|
|
1295
|
+
self._client.ping()
|
|
1296
|
+
self._connected = True
|
|
1297
|
+
self._retry_count = 0
|
|
1298
|
+
self._logger.debug("Redis sync client connected")
|
|
1299
|
+
except Exception as e:
|
|
1300
|
+
self._handle_redis_error(e, "client_init")
|
|
1301
|
+
raise
|
|
1302
|
+
return self._client
|
|
1303
|
+
|
|
1304
|
+
async def get_async_client(self) -> "redis.asyncio.Redis":
|
|
1305
|
+
"""Get async Redis client with connection pooling.
|
|
1306
|
+
|
|
1307
|
+
Creates the async connection pool and client on first access.
|
|
1308
|
+
|
|
1309
|
+
Returns:
|
|
1310
|
+
Async Redis client instance.
|
|
1311
|
+
"""
|
|
1312
|
+
import asyncio
|
|
1313
|
+
|
|
1314
|
+
if self._async_lock is None:
|
|
1315
|
+
self._async_lock = asyncio.Lock()
|
|
1316
|
+
|
|
1317
|
+
if self._async_client is None or not self._connected:
|
|
1318
|
+
async with self._async_lock:
|
|
1319
|
+
if self._async_client is None or not self._connected:
|
|
1320
|
+
try:
|
|
1321
|
+
self._async_pool = await self._create_async_pool()
|
|
1322
|
+
self._async_client = redis.asyncio.Redis(
|
|
1323
|
+
connection_pool=self._async_pool
|
|
1324
|
+
)
|
|
1325
|
+
# Test connection
|
|
1326
|
+
await self._async_client.ping()
|
|
1327
|
+
self._connected = True
|
|
1328
|
+
self._retry_count = 0
|
|
1329
|
+
self._logger.debug("Redis async client connected")
|
|
1330
|
+
except Exception as e:
|
|
1331
|
+
self._handle_redis_error(e, "async_client_init")
|
|
1332
|
+
raise
|
|
1333
|
+
return self._async_client
|
|
1334
|
+
|
|
1335
|
+
async def _ensure_consumer_group(self, client: "redis.asyncio.Redis") -> None:
|
|
1336
|
+
"""Ensure consumer group exists for stream.
|
|
1337
|
+
|
|
1338
|
+
Args:
|
|
1339
|
+
client: Redis async client.
|
|
1340
|
+
"""
|
|
1341
|
+
try:
|
|
1342
|
+
await client.xgroup_create(
|
|
1343
|
+
self._stream_key,
|
|
1344
|
+
self.consumer_group,
|
|
1345
|
+
id="0",
|
|
1346
|
+
mkstream=True,
|
|
1347
|
+
)
|
|
1348
|
+
self._logger.debug(f"Created consumer group: {self.consumer_group}")
|
|
1349
|
+
except redis.ResponseError as e:
|
|
1350
|
+
if "BUSYGROUP" not in str(e):
|
|
1351
|
+
raise
|
|
1352
|
+
# Group already exists, which is fine
|
|
1353
|
+
|
|
1354
|
+
def _serialize_entry(
|
|
1355
|
+
self,
|
|
1356
|
+
fingerprint: str,
|
|
1357
|
+
first_seen: float,
|
|
1358
|
+
last_seen: float,
|
|
1359
|
+
count: int,
|
|
1360
|
+
metadata: dict[str, Any] | None,
|
|
1361
|
+
) -> dict[str, str]:
|
|
1362
|
+
"""Serialize entry for Redis storage.
|
|
1363
|
+
|
|
1364
|
+
Args:
|
|
1365
|
+
fingerprint: The fingerprint.
|
|
1366
|
+
first_seen: First seen timestamp.
|
|
1367
|
+
last_seen: Last seen timestamp.
|
|
1368
|
+
count: Occurrence count.
|
|
1369
|
+
metadata: Optional metadata.
|
|
1370
|
+
|
|
1371
|
+
Returns:
|
|
1372
|
+
Dictionary suitable for Redis.
|
|
1373
|
+
"""
|
|
1374
|
+
return {
|
|
1375
|
+
self.FIELD_FINGERPRINT: fingerprint,
|
|
1376
|
+
self.FIELD_FIRST_SEEN: str(first_seen),
|
|
1377
|
+
self.FIELD_LAST_SEEN: str(last_seen),
|
|
1378
|
+
self.FIELD_COUNT: str(count),
|
|
1379
|
+
self.FIELD_METADATA: json.dumps(metadata or {}),
|
|
1380
|
+
}
|
|
1381
|
+
|
|
1382
|
+
def _deserialize_entry(
|
|
1383
|
+
self, fingerprint: str, data: dict[str, str]
|
|
1384
|
+
) -> DeduplicationEntry:
|
|
1385
|
+
"""Deserialize entry from Redis storage.
|
|
1386
|
+
|
|
1387
|
+
Args:
|
|
1388
|
+
fingerprint: The fingerprint.
|
|
1389
|
+
data: Dictionary from Redis.
|
|
1390
|
+
|
|
1391
|
+
Returns:
|
|
1392
|
+
DeduplicationEntry instance.
|
|
1393
|
+
"""
|
|
1394
|
+
metadata = {}
|
|
1395
|
+
if data.get(self.FIELD_METADATA):
|
|
1396
|
+
try:
|
|
1397
|
+
metadata = json.loads(data[self.FIELD_METADATA])
|
|
1398
|
+
except json.JSONDecodeError:
|
|
1399
|
+
pass
|
|
1400
|
+
|
|
1401
|
+
return DeduplicationEntry(
|
|
1402
|
+
fingerprint=fingerprint,
|
|
1403
|
+
first_seen=datetime.fromtimestamp(float(data.get(self.FIELD_FIRST_SEEN, 0))),
|
|
1404
|
+
last_seen=datetime.fromtimestamp(float(data.get(self.FIELD_LAST_SEEN, 0))),
|
|
1405
|
+
count=int(data.get(self.FIELD_COUNT, 1)),
|
|
1406
|
+
metadata=metadata,
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
def exists(self, fingerprint: str, window_seconds: int) -> bool:
|
|
1410
|
+
"""Check if fingerprint exists within window.
|
|
1411
|
+
|
|
1412
|
+
Falls back to InMemory store on Redis failure if enabled.
|
|
1413
|
+
|
|
1414
|
+
Args:
|
|
1415
|
+
fingerprint: The fingerprint to check.
|
|
1416
|
+
window_seconds: Time window in seconds.
|
|
1417
|
+
|
|
1418
|
+
Returns:
|
|
1419
|
+
True if fingerprint exists and is not expired.
|
|
1420
|
+
"""
|
|
1421
|
+
# Use fallback if already in fallback mode
|
|
1422
|
+
if self._using_fallback and self.enable_fallback:
|
|
1423
|
+
result = self._get_fallback_store().exists(fingerprint, window_seconds)
|
|
1424
|
+
if result:
|
|
1425
|
+
self._metrics.hits += 1
|
|
1426
|
+
else:
|
|
1427
|
+
self._metrics.misses += 1
|
|
1428
|
+
return result
|
|
1429
|
+
|
|
1430
|
+
try:
|
|
1431
|
+
key = self._get_key(fingerprint)
|
|
1432
|
+
data = self.client.hgetall(key)
|
|
1433
|
+
|
|
1434
|
+
if not data:
|
|
1435
|
+
self._metrics.misses += 1
|
|
1436
|
+
return False
|
|
1437
|
+
|
|
1438
|
+
# Check if expired based on window
|
|
1439
|
+
last_seen = float(data.get(self.FIELD_LAST_SEEN, 0))
|
|
1440
|
+
cutoff = time.time() - window_seconds
|
|
1441
|
+
|
|
1442
|
+
if last_seen >= cutoff:
|
|
1443
|
+
self._metrics.hits += 1
|
|
1444
|
+
return True
|
|
1445
|
+
else:
|
|
1446
|
+
self._metrics.misses += 1
|
|
1447
|
+
return False
|
|
1448
|
+
|
|
1449
|
+
except Exception as e:
|
|
1450
|
+
self._handle_redis_error(e, "exists")
|
|
1451
|
+
|
|
1452
|
+
if self.enable_fallback:
|
|
1453
|
+
self._using_fallback = True
|
|
1454
|
+
self._metrics.fallbacks += 1
|
|
1455
|
+
self._logger.warning("Falling back to InMemory store")
|
|
1456
|
+
result = self._get_fallback_store().exists(fingerprint, window_seconds)
|
|
1457
|
+
if result:
|
|
1458
|
+
self._metrics.hits += 1
|
|
1459
|
+
else:
|
|
1460
|
+
self._metrics.misses += 1
|
|
1461
|
+
return result
|
|
1462
|
+
|
|
1463
|
+
raise
|
|
1464
|
+
|
|
1465
|
+
async def exists_async(self, fingerprint: str, window_seconds: int) -> bool:
|
|
1466
|
+
"""Async check if fingerprint exists within window.
|
|
1467
|
+
|
|
1468
|
+
Falls back to InMemory store on Redis failure if enabled.
|
|
1469
|
+
|
|
1470
|
+
Args:
|
|
1471
|
+
fingerprint: The fingerprint to check.
|
|
1472
|
+
window_seconds: Time window in seconds.
|
|
1473
|
+
|
|
1474
|
+
Returns:
|
|
1475
|
+
True if fingerprint exists and is not expired.
|
|
1476
|
+
"""
|
|
1477
|
+
# Use fallback if already in fallback mode
|
|
1478
|
+
if self._using_fallback and self.enable_fallback:
|
|
1479
|
+
result = self._get_fallback_store().exists(fingerprint, window_seconds)
|
|
1480
|
+
if result:
|
|
1481
|
+
self._metrics.hits += 1
|
|
1482
|
+
else:
|
|
1483
|
+
self._metrics.misses += 1
|
|
1484
|
+
return result
|
|
1485
|
+
|
|
1486
|
+
try:
|
|
1487
|
+
client = await self.get_async_client()
|
|
1488
|
+
key = self._get_key(fingerprint)
|
|
1489
|
+
data = await client.hgetall(key)
|
|
1490
|
+
|
|
1491
|
+
if not data:
|
|
1492
|
+
self._metrics.misses += 1
|
|
1493
|
+
return False
|
|
1494
|
+
|
|
1495
|
+
# Check if expired based on window
|
|
1496
|
+
last_seen = float(data.get(self.FIELD_LAST_SEEN, 0))
|
|
1497
|
+
cutoff = time.time() - window_seconds
|
|
1498
|
+
|
|
1499
|
+
if last_seen >= cutoff:
|
|
1500
|
+
self._metrics.hits += 1
|
|
1501
|
+
return True
|
|
1502
|
+
else:
|
|
1503
|
+
self._metrics.misses += 1
|
|
1504
|
+
return False
|
|
1505
|
+
|
|
1506
|
+
except Exception as e:
|
|
1507
|
+
self._handle_redis_error(e, "exists_async")
|
|
1508
|
+
|
|
1509
|
+
if self.enable_fallback:
|
|
1510
|
+
self._using_fallback = True
|
|
1511
|
+
self._metrics.fallbacks += 1
|
|
1512
|
+
self._logger.warning("Falling back to InMemory store")
|
|
1513
|
+
result = self._get_fallback_store().exists(fingerprint, window_seconds)
|
|
1514
|
+
if result:
|
|
1515
|
+
self._metrics.hits += 1
|
|
1516
|
+
else:
|
|
1517
|
+
self._metrics.misses += 1
|
|
1518
|
+
return result
|
|
1519
|
+
|
|
1520
|
+
raise
|
|
1521
|
+
|
|
1522
|
+
def record(self, fingerprint: str, metadata: dict[str, Any] | None = None) -> None:
|
|
1523
|
+
"""Record a fingerprint with automatic TTL and stream logging.
|
|
1524
|
+
|
|
1525
|
+
Args:
|
|
1526
|
+
fingerprint: The fingerprint to record.
|
|
1527
|
+
metadata: Optional metadata to store.
|
|
1528
|
+
"""
|
|
1529
|
+
# Use fallback if already in fallback mode
|
|
1530
|
+
if self._using_fallback and self.enable_fallback:
|
|
1531
|
+
self._get_fallback_store().record(fingerprint, metadata)
|
|
1532
|
+
self._metrics.records += 1
|
|
1533
|
+
return
|
|
1534
|
+
|
|
1535
|
+
try:
|
|
1536
|
+
key = self._get_key(fingerprint)
|
|
1537
|
+
now = time.time()
|
|
1538
|
+
client = self.client
|
|
1539
|
+
|
|
1540
|
+
# Use pipeline for atomicity
|
|
1541
|
+
pipe = client.pipeline()
|
|
1542
|
+
|
|
1543
|
+
# Get existing entry
|
|
1544
|
+
existing = client.hgetall(key)
|
|
1545
|
+
|
|
1546
|
+
if existing:
|
|
1547
|
+
# Update existing entry
|
|
1548
|
+
first_seen = float(existing.get(self.FIELD_FIRST_SEEN, now))
|
|
1549
|
+
count = int(existing.get(self.FIELD_COUNT, 0)) + 1
|
|
1550
|
+
old_metadata = {}
|
|
1551
|
+
if existing.get(self.FIELD_METADATA):
|
|
1552
|
+
try:
|
|
1553
|
+
old_metadata = json.loads(existing[self.FIELD_METADATA])
|
|
1554
|
+
except json.JSONDecodeError:
|
|
1555
|
+
pass
|
|
1556
|
+
if metadata:
|
|
1557
|
+
old_metadata.update(metadata)
|
|
1558
|
+
final_metadata = old_metadata
|
|
1559
|
+
else:
|
|
1560
|
+
first_seen = now
|
|
1561
|
+
count = 1
|
|
1562
|
+
final_metadata = metadata or {}
|
|
1563
|
+
|
|
1564
|
+
# Store entry as hash
|
|
1565
|
+
entry_data = self._serialize_entry(
|
|
1566
|
+
fingerprint, first_seen, now, count, final_metadata
|
|
1567
|
+
)
|
|
1568
|
+
pipe.hset(key, mapping=entry_data)
|
|
1569
|
+
pipe.expire(key, self.default_ttl)
|
|
1570
|
+
|
|
1571
|
+
# Add to index set for tracking
|
|
1572
|
+
pipe.sadd(self._index_key, fingerprint)
|
|
1573
|
+
pipe.expire(self._index_key, self.default_ttl * 2)
|
|
1574
|
+
|
|
1575
|
+
# Add to stream for audit/replay (with MAXLEN for auto-trimming)
|
|
1576
|
+
stream_entry = {
|
|
1577
|
+
"fingerprint": fingerprint,
|
|
1578
|
+
"timestamp": str(now),
|
|
1579
|
+
"action": "record",
|
|
1580
|
+
"count": str(count),
|
|
1581
|
+
}
|
|
1582
|
+
pipe.xadd(
|
|
1583
|
+
self._stream_key,
|
|
1584
|
+
stream_entry,
|
|
1585
|
+
maxlen=self.stream_max_len,
|
|
1586
|
+
approximate=True,
|
|
1587
|
+
)
|
|
1588
|
+
|
|
1589
|
+
pipe.execute()
|
|
1590
|
+
self._metrics.records += 1
|
|
1591
|
+
|
|
1592
|
+
except Exception as e:
|
|
1593
|
+
self._handle_redis_error(e, "record")
|
|
1594
|
+
|
|
1595
|
+
if self.enable_fallback:
|
|
1596
|
+
self._using_fallback = True
|
|
1597
|
+
self._metrics.fallbacks += 1
|
|
1598
|
+
self._logger.warning("Falling back to InMemory store")
|
|
1599
|
+
self._get_fallback_store().record(fingerprint, metadata)
|
|
1600
|
+
self._metrics.records += 1
|
|
1601
|
+
return
|
|
1602
|
+
|
|
1603
|
+
raise
|
|
1604
|
+
|
|
1605
|
+
async def record_async(
|
|
1606
|
+
self, fingerprint: str, metadata: dict[str, Any] | None = None
|
|
1607
|
+
) -> None:
|
|
1608
|
+
"""Async record a fingerprint with automatic TTL and stream logging.
|
|
1609
|
+
|
|
1610
|
+
Args:
|
|
1611
|
+
fingerprint: The fingerprint to record.
|
|
1612
|
+
metadata: Optional metadata to store.
|
|
1613
|
+
"""
|
|
1614
|
+
# Use fallback if already in fallback mode
|
|
1615
|
+
if self._using_fallback and self.enable_fallback:
|
|
1616
|
+
self._get_fallback_store().record(fingerprint, metadata)
|
|
1617
|
+
self._metrics.records += 1
|
|
1618
|
+
return
|
|
1619
|
+
|
|
1620
|
+
try:
|
|
1621
|
+
client = await self.get_async_client()
|
|
1622
|
+
key = self._get_key(fingerprint)
|
|
1623
|
+
now = time.time()
|
|
1624
|
+
|
|
1625
|
+
# Ensure consumer group exists
|
|
1626
|
+
await self._ensure_consumer_group(client)
|
|
1627
|
+
|
|
1628
|
+
# Use pipeline for atomicity
|
|
1629
|
+
pipe = client.pipeline()
|
|
1630
|
+
|
|
1631
|
+
# Get existing entry
|
|
1632
|
+
existing = await client.hgetall(key)
|
|
1633
|
+
|
|
1634
|
+
if existing:
|
|
1635
|
+
# Update existing entry
|
|
1636
|
+
first_seen = float(existing.get(self.FIELD_FIRST_SEEN, now))
|
|
1637
|
+
count = int(existing.get(self.FIELD_COUNT, 0)) + 1
|
|
1638
|
+
old_metadata = {}
|
|
1639
|
+
if existing.get(self.FIELD_METADATA):
|
|
1640
|
+
try:
|
|
1641
|
+
old_metadata = json.loads(existing[self.FIELD_METADATA])
|
|
1642
|
+
except json.JSONDecodeError:
|
|
1643
|
+
pass
|
|
1644
|
+
if metadata:
|
|
1645
|
+
old_metadata.update(metadata)
|
|
1646
|
+
final_metadata = old_metadata
|
|
1647
|
+
else:
|
|
1648
|
+
first_seen = now
|
|
1649
|
+
count = 1
|
|
1650
|
+
final_metadata = metadata or {}
|
|
1651
|
+
|
|
1652
|
+
# Store entry as hash
|
|
1653
|
+
entry_data = self._serialize_entry(
|
|
1654
|
+
fingerprint, first_seen, now, count, final_metadata
|
|
1655
|
+
)
|
|
1656
|
+
pipe.hset(key, mapping=entry_data)
|
|
1657
|
+
pipe.expire(key, self.default_ttl)
|
|
1658
|
+
|
|
1659
|
+
# Add to index set for tracking
|
|
1660
|
+
pipe.sadd(self._index_key, fingerprint)
|
|
1661
|
+
pipe.expire(self._index_key, self.default_ttl * 2)
|
|
1662
|
+
|
|
1663
|
+
# Add to stream for audit/replay (with MAXLEN for auto-trimming)
|
|
1664
|
+
stream_entry = {
|
|
1665
|
+
"fingerprint": fingerprint,
|
|
1666
|
+
"timestamp": str(now),
|
|
1667
|
+
"action": "record",
|
|
1668
|
+
"count": str(count),
|
|
1669
|
+
}
|
|
1670
|
+
pipe.xadd(
|
|
1671
|
+
self._stream_key,
|
|
1672
|
+
stream_entry,
|
|
1673
|
+
maxlen=self.stream_max_len,
|
|
1674
|
+
approximate=True,
|
|
1675
|
+
)
|
|
1676
|
+
|
|
1677
|
+
await pipe.execute()
|
|
1678
|
+
self._metrics.records += 1
|
|
1679
|
+
|
|
1680
|
+
except Exception as e:
|
|
1681
|
+
self._handle_redis_error(e, "record_async")
|
|
1682
|
+
|
|
1683
|
+
if self.enable_fallback:
|
|
1684
|
+
self._using_fallback = True
|
|
1685
|
+
self._metrics.fallbacks += 1
|
|
1686
|
+
self._logger.warning("Falling back to InMemory store")
|
|
1687
|
+
self._get_fallback_store().record(fingerprint, metadata)
|
|
1688
|
+
self._metrics.records += 1
|
|
1689
|
+
return
|
|
1690
|
+
|
|
1691
|
+
raise
|
|
1692
|
+
|
|
1693
|
+
def get(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
1694
|
+
"""Get entry by fingerprint.
|
|
1695
|
+
|
|
1696
|
+
Args:
|
|
1697
|
+
fingerprint: The fingerprint to look up.
|
|
1698
|
+
|
|
1699
|
+
Returns:
|
|
1700
|
+
Entry if found, None otherwise.
|
|
1701
|
+
"""
|
|
1702
|
+
if self._using_fallback and self.enable_fallback:
|
|
1703
|
+
return self._get_fallback_store().get(fingerprint)
|
|
1704
|
+
|
|
1705
|
+
try:
|
|
1706
|
+
key = self._get_key(fingerprint)
|
|
1707
|
+
data = self.client.hgetall(key)
|
|
1708
|
+
|
|
1709
|
+
if not data:
|
|
1710
|
+
return None
|
|
1711
|
+
|
|
1712
|
+
return self._deserialize_entry(fingerprint, data)
|
|
1713
|
+
|
|
1714
|
+
except Exception as e:
|
|
1715
|
+
self._handle_redis_error(e, "get")
|
|
1716
|
+
|
|
1717
|
+
if self.enable_fallback:
|
|
1718
|
+
self._using_fallback = True
|
|
1719
|
+
self._metrics.fallbacks += 1
|
|
1720
|
+
return self._get_fallback_store().get(fingerprint)
|
|
1721
|
+
|
|
1722
|
+
raise
|
|
1723
|
+
|
|
1724
|
+
async def get_async(self, fingerprint: str) -> DeduplicationEntry | None:
|
|
1725
|
+
"""Async get entry by fingerprint.
|
|
1726
|
+
|
|
1727
|
+
Args:
|
|
1728
|
+
fingerprint: The fingerprint to look up.
|
|
1729
|
+
|
|
1730
|
+
Returns:
|
|
1731
|
+
Entry if found, None otherwise.
|
|
1732
|
+
"""
|
|
1733
|
+
if self._using_fallback and self.enable_fallback:
|
|
1734
|
+
return self._get_fallback_store().get(fingerprint)
|
|
1735
|
+
|
|
1736
|
+
try:
|
|
1737
|
+
client = await self.get_async_client()
|
|
1738
|
+
key = self._get_key(fingerprint)
|
|
1739
|
+
data = await client.hgetall(key)
|
|
1740
|
+
|
|
1741
|
+
if not data:
|
|
1742
|
+
return None
|
|
1743
|
+
|
|
1744
|
+
return self._deserialize_entry(fingerprint, data)
|
|
1745
|
+
|
|
1746
|
+
except Exception as e:
|
|
1747
|
+
self._handle_redis_error(e, "get_async")
|
|
1748
|
+
|
|
1749
|
+
if self.enable_fallback:
|
|
1750
|
+
self._using_fallback = True
|
|
1751
|
+
self._metrics.fallbacks += 1
|
|
1752
|
+
return self._get_fallback_store().get(fingerprint)
|
|
1753
|
+
|
|
1754
|
+
raise
|
|
1755
|
+
|
|
1756
|
+
def cleanup(self, max_age_seconds: int) -> int:
|
|
1757
|
+
"""Remove expired entries.
|
|
1758
|
+
|
|
1759
|
+
Redis handles TTL automatically, but this method can be used
|
|
1760
|
+
to perform explicit cleanup of old stream entries.
|
|
1761
|
+
|
|
1762
|
+
Args:
|
|
1763
|
+
max_age_seconds: Maximum age of entries to keep.
|
|
1764
|
+
|
|
1765
|
+
Returns:
|
|
1766
|
+
Number of entries removed.
|
|
1767
|
+
"""
|
|
1768
|
+
if self._using_fallback and self.enable_fallback:
|
|
1769
|
+
return self._get_fallback_store().cleanup(max_age_seconds)
|
|
1770
|
+
|
|
1771
|
+
try:
|
|
1772
|
+
client = self.client
|
|
1773
|
+
cutoff = time.time() - max_age_seconds
|
|
1774
|
+
removed = 0
|
|
1775
|
+
|
|
1776
|
+
# Get all fingerprints from index
|
|
1777
|
+
fingerprints = client.smembers(self._index_key)
|
|
1778
|
+
|
|
1779
|
+
for fp in fingerprints:
|
|
1780
|
+
key = self._get_key(fp)
|
|
1781
|
+
data = client.hgetall(key)
|
|
1782
|
+
|
|
1783
|
+
if not data:
|
|
1784
|
+
# Entry expired, remove from index
|
|
1785
|
+
client.srem(self._index_key, fp)
|
|
1786
|
+
removed += 1
|
|
1787
|
+
elif float(data.get(self.FIELD_LAST_SEEN, 0)) < cutoff:
|
|
1788
|
+
# Entry is old, delete it
|
|
1789
|
+
client.delete(key)
|
|
1790
|
+
client.srem(self._index_key, fp)
|
|
1791
|
+
removed += 1
|
|
1792
|
+
|
|
1793
|
+
# Trim stream to remove old entries
|
|
1794
|
+
stream_info = client.xinfo_stream(self._stream_key)
|
|
1795
|
+
if stream_info and stream_info.get("length", 0) > 0:
|
|
1796
|
+
# Get first entry timestamp
|
|
1797
|
+
first_entry = client.xrange(self._stream_key, count=1)
|
|
1798
|
+
if first_entry:
|
|
1799
|
+
entry_id = first_entry[0][0]
|
|
1800
|
+
# Stream ID format: timestamp-sequence
|
|
1801
|
+
entry_ts = int(entry_id.split("-")[0]) / 1000
|
|
1802
|
+
if entry_ts < cutoff:
|
|
1803
|
+
# Trim old entries
|
|
1804
|
+
cutoff_id = f"{int(cutoff * 1000)}-0"
|
|
1805
|
+
trimmed = client.xtrim(
|
|
1806
|
+
self._stream_key, minid=cutoff_id, approximate=True
|
|
1807
|
+
)
|
|
1808
|
+
removed += trimmed
|
|
1809
|
+
|
|
1810
|
+
return removed
|
|
1811
|
+
|
|
1812
|
+
except Exception as e:
|
|
1813
|
+
self._handle_redis_error(e, "cleanup")
|
|
1814
|
+
|
|
1815
|
+
if self.enable_fallback:
|
|
1816
|
+
self._using_fallback = True
|
|
1817
|
+
return self._get_fallback_store().cleanup(max_age_seconds)
|
|
1818
|
+
|
|
1819
|
+
raise
|
|
1820
|
+
|
|
1821
|
+
async def cleanup_async(self, max_age_seconds: int) -> int:
|
|
1822
|
+
"""Async remove expired entries.
|
|
1823
|
+
|
|
1824
|
+
Args:
|
|
1825
|
+
max_age_seconds: Maximum age of entries to keep.
|
|
1826
|
+
|
|
1827
|
+
Returns:
|
|
1828
|
+
Number of entries removed.
|
|
1829
|
+
"""
|
|
1830
|
+
if self._using_fallback and self.enable_fallback:
|
|
1831
|
+
return self._get_fallback_store().cleanup(max_age_seconds)
|
|
1832
|
+
|
|
1833
|
+
try:
|
|
1834
|
+
client = await self.get_async_client()
|
|
1835
|
+
cutoff = time.time() - max_age_seconds
|
|
1836
|
+
removed = 0
|
|
1837
|
+
|
|
1838
|
+
# Get all fingerprints from index
|
|
1839
|
+
fingerprints = await client.smembers(self._index_key)
|
|
1840
|
+
|
|
1841
|
+
for fp in fingerprints:
|
|
1842
|
+
key = self._get_key(fp)
|
|
1843
|
+
data = await client.hgetall(key)
|
|
1844
|
+
|
|
1845
|
+
if not data:
|
|
1846
|
+
# Entry expired, remove from index
|
|
1847
|
+
await client.srem(self._index_key, fp)
|
|
1848
|
+
removed += 1
|
|
1849
|
+
elif float(data.get(self.FIELD_LAST_SEEN, 0)) < cutoff:
|
|
1850
|
+
# Entry is old, delete it
|
|
1851
|
+
await client.delete(key)
|
|
1852
|
+
await client.srem(self._index_key, fp)
|
|
1853
|
+
removed += 1
|
|
1854
|
+
|
|
1855
|
+
# Trim stream to remove old entries
|
|
1856
|
+
try:
|
|
1857
|
+
stream_info = await client.xinfo_stream(self._stream_key)
|
|
1858
|
+
if stream_info and stream_info.get("length", 0) > 0:
|
|
1859
|
+
# Trim old entries
|
|
1860
|
+
cutoff_id = f"{int(cutoff * 1000)}-0"
|
|
1861
|
+
trimmed = await client.xtrim(
|
|
1862
|
+
self._stream_key, minid=cutoff_id, approximate=True
|
|
1863
|
+
)
|
|
1864
|
+
removed += trimmed
|
|
1865
|
+
except redis.ResponseError:
|
|
1866
|
+
# Stream might not exist
|
|
1867
|
+
pass
|
|
1868
|
+
|
|
1869
|
+
return removed
|
|
1870
|
+
|
|
1871
|
+
except Exception as e:
|
|
1872
|
+
self._handle_redis_error(e, "cleanup_async")
|
|
1873
|
+
|
|
1874
|
+
if self.enable_fallback:
|
|
1875
|
+
self._using_fallback = True
|
|
1876
|
+
return self._get_fallback_store().cleanup(max_age_seconds)
|
|
1877
|
+
|
|
1878
|
+
raise
|
|
1879
|
+
|
|
1880
|
+
def clear(self) -> None:
|
|
1881
|
+
"""Clear all deduplication entries."""
|
|
1882
|
+
if self._using_fallback and self.enable_fallback:
|
|
1883
|
+
self._get_fallback_store().clear()
|
|
1884
|
+
return
|
|
1885
|
+
|
|
1886
|
+
try:
|
|
1887
|
+
client = self.client
|
|
1888
|
+
|
|
1889
|
+
# Get all fingerprints from index
|
|
1890
|
+
fingerprints = client.smembers(self._index_key)
|
|
1891
|
+
|
|
1892
|
+
if fingerprints:
|
|
1893
|
+
# Delete all entry keys
|
|
1894
|
+
keys_to_delete = [self._get_key(fp) for fp in fingerprints]
|
|
1895
|
+
client.delete(*keys_to_delete)
|
|
1896
|
+
|
|
1897
|
+
# Delete index
|
|
1898
|
+
client.delete(self._index_key)
|
|
1899
|
+
|
|
1900
|
+
# Delete stream
|
|
1901
|
+
client.delete(self._stream_key)
|
|
1902
|
+
|
|
1903
|
+
except Exception as e:
|
|
1904
|
+
self._handle_redis_error(e, "clear")
|
|
1905
|
+
|
|
1906
|
+
if self.enable_fallback:
|
|
1907
|
+
self._using_fallback = True
|
|
1908
|
+
self._get_fallback_store().clear()
|
|
1909
|
+
return
|
|
1910
|
+
|
|
1911
|
+
raise
|
|
1912
|
+
|
|
1913
|
+
async def clear_async(self) -> None:
|
|
1914
|
+
"""Async clear all deduplication entries."""
|
|
1915
|
+
if self._using_fallback and self.enable_fallback:
|
|
1916
|
+
self._get_fallback_store().clear()
|
|
1917
|
+
return
|
|
1918
|
+
|
|
1919
|
+
try:
|
|
1920
|
+
client = await self.get_async_client()
|
|
1921
|
+
|
|
1922
|
+
# Get all fingerprints from index
|
|
1923
|
+
fingerprints = await client.smembers(self._index_key)
|
|
1924
|
+
|
|
1925
|
+
if fingerprints:
|
|
1926
|
+
# Delete all entry keys
|
|
1927
|
+
keys_to_delete = [self._get_key(fp) for fp in fingerprints]
|
|
1928
|
+
await client.delete(*keys_to_delete)
|
|
1929
|
+
|
|
1930
|
+
# Delete index
|
|
1931
|
+
await client.delete(self._index_key)
|
|
1932
|
+
|
|
1933
|
+
# Delete stream
|
|
1934
|
+
await client.delete(self._stream_key)
|
|
1935
|
+
|
|
1936
|
+
except Exception as e:
|
|
1937
|
+
self._handle_redis_error(e, "clear_async")
|
|
1938
|
+
|
|
1939
|
+
if self.enable_fallback:
|
|
1940
|
+
self._using_fallback = True
|
|
1941
|
+
self._get_fallback_store().clear()
|
|
1942
|
+
return
|
|
1943
|
+
|
|
1944
|
+
raise
|
|
1945
|
+
|
|
1946
|
+
def count(self) -> int:
|
|
1947
|
+
"""Get total entry count.
|
|
1948
|
+
|
|
1949
|
+
Returns:
|
|
1950
|
+
Number of deduplication entries.
|
|
1951
|
+
"""
|
|
1952
|
+
if self._using_fallback and self.enable_fallback:
|
|
1953
|
+
return self._get_fallback_store().count()
|
|
1954
|
+
|
|
1955
|
+
try:
|
|
1956
|
+
return self.client.scard(self._index_key)
|
|
1957
|
+
|
|
1958
|
+
except Exception as e:
|
|
1959
|
+
self._handle_redis_error(e, "count")
|
|
1960
|
+
|
|
1961
|
+
if self.enable_fallback:
|
|
1962
|
+
self._using_fallback = True
|
|
1963
|
+
return self._get_fallback_store().count()
|
|
1964
|
+
|
|
1965
|
+
raise
|
|
1966
|
+
|
|
1967
|
+
async def count_async(self) -> int:
|
|
1968
|
+
"""Async get total entry count.
|
|
1969
|
+
|
|
1970
|
+
Returns:
|
|
1971
|
+
Number of deduplication entries.
|
|
1972
|
+
"""
|
|
1973
|
+
if self._using_fallback and self.enable_fallback:
|
|
1974
|
+
return self._get_fallback_store().count()
|
|
1975
|
+
|
|
1976
|
+
try:
|
|
1977
|
+
client = await self.get_async_client()
|
|
1978
|
+
return await client.scard(self._index_key)
|
|
1979
|
+
|
|
1980
|
+
except Exception as e:
|
|
1981
|
+
self._handle_redis_error(e, "count_async")
|
|
1982
|
+
|
|
1983
|
+
if self.enable_fallback:
|
|
1984
|
+
self._using_fallback = True
|
|
1985
|
+
return self._get_fallback_store().count()
|
|
1986
|
+
|
|
1987
|
+
raise
|
|
1988
|
+
|
|
1989
|
+
def health_check(self) -> dict[str, Any]:
|
|
1990
|
+
"""Perform health check and return status.
|
|
1991
|
+
|
|
1992
|
+
Returns:
|
|
1993
|
+
Dictionary with health status information.
|
|
1994
|
+
"""
|
|
1995
|
+
result = {
|
|
1996
|
+
"healthy": False,
|
|
1997
|
+
"connected": self._connected,
|
|
1998
|
+
"using_fallback": self._using_fallback,
|
|
1999
|
+
"redis_url": self._mask_url(self.redis_url),
|
|
2000
|
+
"metrics": self._metrics.to_dict(),
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
if self._using_fallback and self.enable_fallback:
|
|
2004
|
+
result["healthy"] = True
|
|
2005
|
+
result["mode"] = "fallback"
|
|
2006
|
+
result["fallback_entries"] = self._get_fallback_store().count()
|
|
2007
|
+
return result
|
|
2008
|
+
|
|
2009
|
+
try:
|
|
2010
|
+
client = self.client
|
|
2011
|
+
ping_ok = client.ping()
|
|
2012
|
+
|
|
2013
|
+
if ping_ok:
|
|
2014
|
+
result["healthy"] = True
|
|
2015
|
+
result["mode"] = "redis"
|
|
2016
|
+
result["entries"] = self.count()
|
|
2017
|
+
|
|
2018
|
+
# Get stream info
|
|
2019
|
+
try:
|
|
2020
|
+
stream_info = client.xinfo_stream(self._stream_key)
|
|
2021
|
+
result["stream"] = {
|
|
2022
|
+
"length": stream_info.get("length", 0),
|
|
2023
|
+
"first_entry": stream_info.get("first-entry"),
|
|
2024
|
+
"last_entry": stream_info.get("last-entry"),
|
|
2025
|
+
}
|
|
2026
|
+
except redis.ResponseError:
|
|
2027
|
+
result["stream"] = {"length": 0}
|
|
2028
|
+
|
|
2029
|
+
# Get Redis info
|
|
2030
|
+
info = client.info(section="server")
|
|
2031
|
+
result["redis_info"] = {
|
|
2032
|
+
"version": info.get("redis_version"),
|
|
2033
|
+
"uptime_seconds": info.get("uptime_in_seconds"),
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
except Exception as e:
|
|
2037
|
+
result["error"] = str(e)
|
|
2038
|
+
if self._last_error_time:
|
|
2039
|
+
result["last_error_time"] = datetime.fromtimestamp(
|
|
2040
|
+
self._last_error_time
|
|
2041
|
+
).isoformat()
|
|
2042
|
+
|
|
2043
|
+
return result
|
|
2044
|
+
|
|
2045
|
+
async def health_check_async(self) -> dict[str, Any]:
|
|
2046
|
+
"""Async perform health check and return status.
|
|
2047
|
+
|
|
2048
|
+
Returns:
|
|
2049
|
+
Dictionary with health status information.
|
|
2050
|
+
"""
|
|
2051
|
+
result = {
|
|
2052
|
+
"healthy": False,
|
|
2053
|
+
"connected": self._connected,
|
|
2054
|
+
"using_fallback": self._using_fallback,
|
|
2055
|
+
"redis_url": self._mask_url(self.redis_url),
|
|
2056
|
+
"metrics": self._metrics.to_dict(),
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
if self._using_fallback and self.enable_fallback:
|
|
2060
|
+
result["healthy"] = True
|
|
2061
|
+
result["mode"] = "fallback"
|
|
2062
|
+
result["fallback_entries"] = self._get_fallback_store().count()
|
|
2063
|
+
return result
|
|
2064
|
+
|
|
2065
|
+
try:
|
|
2066
|
+
client = await self.get_async_client()
|
|
2067
|
+
ping_ok = await client.ping()
|
|
2068
|
+
|
|
2069
|
+
if ping_ok:
|
|
2070
|
+
result["healthy"] = True
|
|
2071
|
+
result["mode"] = "redis"
|
|
2072
|
+
result["entries"] = await self.count_async()
|
|
2073
|
+
|
|
2074
|
+
# Get stream info
|
|
2075
|
+
try:
|
|
2076
|
+
stream_info = await client.xinfo_stream(self._stream_key)
|
|
2077
|
+
result["stream"] = {
|
|
2078
|
+
"length": stream_info.get("length", 0),
|
|
2079
|
+
"first_entry": stream_info.get("first-entry"),
|
|
2080
|
+
"last_entry": stream_info.get("last-entry"),
|
|
2081
|
+
}
|
|
2082
|
+
except redis.ResponseError:
|
|
2083
|
+
result["stream"] = {"length": 0}
|
|
2084
|
+
|
|
2085
|
+
# Get Redis info
|
|
2086
|
+
info = await client.info(section="server")
|
|
2087
|
+
result["redis_info"] = {
|
|
2088
|
+
"version": info.get("redis_version"),
|
|
2089
|
+
"uptime_seconds": info.get("uptime_in_seconds"),
|
|
2090
|
+
}
|
|
2091
|
+
|
|
2092
|
+
except Exception as e:
|
|
2093
|
+
result["error"] = str(e)
|
|
2094
|
+
if self._last_error_time:
|
|
2095
|
+
result["last_error_time"] = datetime.fromtimestamp(
|
|
2096
|
+
self._last_error_time
|
|
2097
|
+
).isoformat()
|
|
2098
|
+
|
|
2099
|
+
return result
|
|
2100
|
+
|
|
2101
|
+
def _mask_url(self, url: str) -> str:
|
|
2102
|
+
"""Mask sensitive parts of Redis URL.
|
|
2103
|
+
|
|
2104
|
+
Args:
|
|
2105
|
+
url: Redis URL to mask.
|
|
2106
|
+
|
|
2107
|
+
Returns:
|
|
2108
|
+
Masked URL string.
|
|
2109
|
+
"""
|
|
2110
|
+
import re
|
|
2111
|
+
|
|
2112
|
+
# Mask password if present
|
|
2113
|
+
return re.sub(r"://[^:]+:[^@]+@", "://***:***@", url)
|
|
2114
|
+
|
|
2115
|
+
def get_metrics(self) -> dict[str, Any]:
|
|
2116
|
+
"""Get current metrics.
|
|
2117
|
+
|
|
2118
|
+
Returns:
|
|
2119
|
+
Dictionary with metrics data.
|
|
2120
|
+
"""
|
|
2121
|
+
return self._metrics.to_dict()
|
|
2122
|
+
|
|
2123
|
+
def reset_metrics(self) -> None:
|
|
2124
|
+
"""Reset all metrics to zero."""
|
|
2125
|
+
self._metrics = DeduplicationMetrics()
|
|
2126
|
+
|
|
2127
|
+
async def read_stream(
|
|
2128
|
+
self,
|
|
2129
|
+
count: int = 100,
|
|
2130
|
+
block_ms: int = 0,
|
|
2131
|
+
) -> list[dict[str, Any]]:
|
|
2132
|
+
"""Read entries from the deduplication stream.
|
|
2133
|
+
|
|
2134
|
+
Useful for audit logging or replaying events.
|
|
2135
|
+
|
|
2136
|
+
Args:
|
|
2137
|
+
count: Maximum number of entries to read.
|
|
2138
|
+
block_ms: Block timeout in milliseconds (0 = no blocking).
|
|
2139
|
+
|
|
2140
|
+
Returns:
|
|
2141
|
+
List of stream entries.
|
|
2142
|
+
"""
|
|
2143
|
+
try:
|
|
2144
|
+
client = await self.get_async_client()
|
|
2145
|
+
|
|
2146
|
+
# Ensure consumer group exists
|
|
2147
|
+
await self._ensure_consumer_group(client)
|
|
2148
|
+
|
|
2149
|
+
# Read from stream using consumer group
|
|
2150
|
+
entries = await client.xreadgroup(
|
|
2151
|
+
self.consumer_group,
|
|
2152
|
+
self.consumer_name,
|
|
2153
|
+
{self._stream_key: ">"},
|
|
2154
|
+
count=count,
|
|
2155
|
+
block=block_ms,
|
|
2156
|
+
)
|
|
2157
|
+
|
|
2158
|
+
result = []
|
|
2159
|
+
if entries:
|
|
2160
|
+
for stream_name, messages in entries:
|
|
2161
|
+
for msg_id, fields in messages:
|
|
2162
|
+
result.append(
|
|
2163
|
+
{
|
|
2164
|
+
"id": msg_id,
|
|
2165
|
+
"stream": stream_name,
|
|
2166
|
+
"fields": fields,
|
|
2167
|
+
}
|
|
2168
|
+
)
|
|
2169
|
+
|
|
2170
|
+
# Acknowledge the message
|
|
2171
|
+
await client.xack(
|
|
2172
|
+
self._stream_key, self.consumer_group, msg_id
|
|
2173
|
+
)
|
|
2174
|
+
|
|
2175
|
+
return result
|
|
2176
|
+
|
|
2177
|
+
except Exception as e:
|
|
2178
|
+
self._handle_redis_error(e, "read_stream")
|
|
2179
|
+
return []
|
|
2180
|
+
|
|
2181
|
+
async def get_pending_messages(self) -> dict[str, Any]:
|
|
2182
|
+
"""Get information about pending messages in consumer group.
|
|
2183
|
+
|
|
2184
|
+
Returns:
|
|
2185
|
+
Dictionary with pending message information.
|
|
2186
|
+
"""
|
|
2187
|
+
try:
|
|
2188
|
+
client = await self.get_async_client()
|
|
2189
|
+
|
|
2190
|
+
pending = await client.xpending(self._stream_key, self.consumer_group)
|
|
2191
|
+
|
|
2192
|
+
return {
|
|
2193
|
+
"pending_count": pending.get("pending", 0),
|
|
2194
|
+
"min_id": pending.get("min"),
|
|
2195
|
+
"max_id": pending.get("max"),
|
|
2196
|
+
"consumers": pending.get("consumers", {}),
|
|
2197
|
+
}
|
|
2198
|
+
|
|
2199
|
+
except Exception as e:
|
|
2200
|
+
self._handle_redis_error(e, "get_pending_messages")
|
|
2201
|
+
return {"pending_count": 0, "error": str(e)}
|
|
2202
|
+
|
|
2203
|
+
def close(self) -> None:
|
|
2204
|
+
"""Close all connections and pools."""
|
|
2205
|
+
if self._client is not None:
|
|
2206
|
+
try:
|
|
2207
|
+
self._client.close()
|
|
2208
|
+
except Exception:
|
|
2209
|
+
pass
|
|
2210
|
+
self._client = None
|
|
2211
|
+
|
|
2212
|
+
if self._pool is not None:
|
|
2213
|
+
try:
|
|
2214
|
+
self._pool.disconnect()
|
|
2215
|
+
except Exception:
|
|
2216
|
+
pass
|
|
2217
|
+
self._pool = None
|
|
2218
|
+
|
|
2219
|
+
self._connected = False
|
|
2220
|
+
|
|
2221
|
+
async def close_async(self) -> None:
|
|
2222
|
+
"""Async close all connections and pools."""
|
|
2223
|
+
if self._async_client is not None:
|
|
2224
|
+
try:
|
|
2225
|
+
await self._async_client.close()
|
|
2226
|
+
except Exception:
|
|
2227
|
+
pass
|
|
2228
|
+
self._async_client = None
|
|
2229
|
+
|
|
2230
|
+
if self._async_pool is not None:
|
|
2231
|
+
try:
|
|
2232
|
+
await self._async_pool.disconnect()
|
|
2233
|
+
except Exception:
|
|
2234
|
+
pass
|
|
2235
|
+
self._async_pool = None
|
|
2236
|
+
|
|
2237
|
+
self._connected = False
|
|
2238
|
+
|
|
2239
|
+
def __enter__(self) -> "RedisStreamsDeduplicationStore":
|
|
2240
|
+
"""Context manager entry."""
|
|
2241
|
+
return self
|
|
2242
|
+
|
|
2243
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
2244
|
+
"""Context manager exit, closes connections."""
|
|
2245
|
+
self.close()
|
|
2246
|
+
|
|
2247
|
+
async def __aenter__(self) -> "RedisStreamsDeduplicationStore":
|
|
2248
|
+
"""Async context manager entry."""
|
|
2249
|
+
return self
|
|
2250
|
+
|
|
2251
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
2252
|
+
"""Async context manager exit, closes connections."""
|
|
2253
|
+
await self.close_async()
|
|
2254
|
+
|
|
2255
|
+
|
|
2256
|
+
# ============================================================================
|
|
2257
|
+
# Factory Function
|
|
2258
|
+
# ============================================================================
|
|
2259
|
+
|
|
2260
|
+
|
|
2261
|
+
class DeduplicationStoreType:
|
|
2262
|
+
"""Store type constants."""
|
|
2263
|
+
|
|
2264
|
+
MEMORY = "memory"
|
|
2265
|
+
SQLITE = "sqlite"
|
|
2266
|
+
REDIS = "redis"
|
|
2267
|
+
REDIS_STREAMS = "redis_streams"
|
|
2268
|
+
|
|
2269
|
+
|
|
2270
|
+
def create_deduplication_store(
|
|
2271
|
+
store_type: str | None = None,
|
|
2272
|
+
**kwargs: Any,
|
|
2273
|
+
) -> BaseDeduplicationStore:
|
|
2274
|
+
"""Factory function to create appropriate deduplication store.
|
|
2275
|
+
|
|
2276
|
+
Selects the store type based on configuration or environment variables.
|
|
2277
|
+
|
|
2278
|
+
Environment variables:
|
|
2279
|
+
TRUTHOUND_DEDUP_STORE_TYPE: Store type (memory, sqlite, redis, redis_streams)
|
|
2280
|
+
TRUTHOUND_DEDUP_SQLITE_PATH: SQLite database path
|
|
2281
|
+
TRUTHOUND_DEDUP_REDIS_URL: Redis connection URL (enables redis/redis_streams)
|
|
2282
|
+
|
|
2283
|
+
Args:
|
|
2284
|
+
store_type: Explicit store type override. If None, auto-detects.
|
|
2285
|
+
**kwargs: Additional arguments passed to the store constructor.
|
|
2286
|
+
|
|
2287
|
+
Returns:
|
|
2288
|
+
Configured BaseDeduplicationStore instance.
|
|
2289
|
+
|
|
2290
|
+
Example:
|
|
2291
|
+
# Auto-detect based on environment
|
|
2292
|
+
store = create_deduplication_store()
|
|
2293
|
+
|
|
2294
|
+
# Explicit type
|
|
2295
|
+
store = create_deduplication_store("redis_streams", default_ttl=7200)
|
|
2296
|
+
|
|
2297
|
+
# SQLite with custom path
|
|
2298
|
+
store = create_deduplication_store("sqlite", db_path="/tmp/dedup.db")
|
|
2299
|
+
"""
|
|
2300
|
+
import logging
|
|
2301
|
+
import os
|
|
2302
|
+
|
|
2303
|
+
logger = logging.getLogger(__name__)
|
|
2304
|
+
|
|
2305
|
+
# Determine store type
|
|
2306
|
+
if store_type is None:
|
|
2307
|
+
store_type = os.getenv("TRUTHOUND_DEDUP_STORE_TYPE")
|
|
2308
|
+
|
|
2309
|
+
# Auto-detect if still None
|
|
2310
|
+
if store_type is None:
|
|
2311
|
+
redis_url = os.getenv("TRUTHOUND_DEDUP_REDIS_URL")
|
|
2312
|
+
if redis_url and REDIS_AVAILABLE:
|
|
2313
|
+
store_type = DeduplicationStoreType.REDIS_STREAMS
|
|
2314
|
+
logger.info(
|
|
2315
|
+
f"Auto-detected Redis Streams store from TRUTHOUND_DEDUP_REDIS_URL"
|
|
2316
|
+
)
|
|
2317
|
+
elif os.getenv("TRUTHOUND_DEDUP_SQLITE_PATH"):
|
|
2318
|
+
store_type = DeduplicationStoreType.SQLITE
|
|
2319
|
+
logger.info("Auto-detected SQLite store from TRUTHOUND_DEDUP_SQLITE_PATH")
|
|
2320
|
+
else:
|
|
2321
|
+
store_type = DeduplicationStoreType.MEMORY
|
|
2322
|
+
logger.info("Using default InMemory store")
|
|
2323
|
+
|
|
2324
|
+
# Normalize store type
|
|
2325
|
+
store_type = store_type.lower().strip()
|
|
2326
|
+
|
|
2327
|
+
# Create store based on type
|
|
2328
|
+
if store_type == DeduplicationStoreType.MEMORY:
|
|
2329
|
+
logger.info("Creating InMemory deduplication store")
|
|
2330
|
+
return InMemoryDeduplicationStore()
|
|
2331
|
+
|
|
2332
|
+
elif store_type == DeduplicationStoreType.SQLITE:
|
|
2333
|
+
db_path = kwargs.pop("db_path", None) or os.getenv(
|
|
2334
|
+
"TRUTHOUND_DEDUP_SQLITE_PATH", "deduplication.db"
|
|
2335
|
+
)
|
|
2336
|
+
logger.info(f"Creating SQLite deduplication store at {db_path}")
|
|
2337
|
+
return SQLiteDeduplicationStore(db_path=db_path)
|
|
2338
|
+
|
|
2339
|
+
elif store_type == DeduplicationStoreType.REDIS:
|
|
2340
|
+
if not REDIS_AVAILABLE:
|
|
2341
|
+
logger.warning(
|
|
2342
|
+
"Redis not available, falling back to InMemory store. "
|
|
2343
|
+
"Install with: pip install truthound-dashboard[redis]"
|
|
2344
|
+
)
|
|
2345
|
+
return InMemoryDeduplicationStore()
|
|
2346
|
+
|
|
2347
|
+
logger.info("Creating Redis deduplication store (simple)")
|
|
2348
|
+
return RedisDeduplicationStore(**kwargs)
|
|
2349
|
+
|
|
2350
|
+
elif store_type == DeduplicationStoreType.REDIS_STREAMS:
|
|
2351
|
+
if not REDIS_AVAILABLE:
|
|
2352
|
+
logger.warning(
|
|
2353
|
+
"Redis not available, falling back to InMemory store. "
|
|
2354
|
+
"Install with: pip install truthound-dashboard[redis]"
|
|
2355
|
+
)
|
|
2356
|
+
return InMemoryDeduplicationStore()
|
|
2357
|
+
|
|
2358
|
+
logger.info("Creating Redis Streams deduplication store (production)")
|
|
2359
|
+
return RedisStreamsDeduplicationStore(**kwargs)
|
|
2360
|
+
|
|
2361
|
+
else:
|
|
2362
|
+
logger.warning(
|
|
2363
|
+
f"Unknown store type '{store_type}', falling back to InMemory store"
|
|
2364
|
+
)
|
|
2365
|
+
return InMemoryDeduplicationStore()
|