truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,29 +1,39 @@
1
- """Validation scheduler with notification integration.
1
+ """Validation scheduler with notification integration and maintenance.
2
2
 
3
3
  This module provides scheduled validation execution with automatic
4
- notification dispatch on failures.
4
+ notification dispatch on failures, plus scheduled database maintenance.
5
5
 
6
6
  The scheduler:
7
- 1. Runs scheduled validations based on cron expressions
8
- 2. Triggers notifications on validation failures
9
- 3. Updates schedule run timestamps
7
+ 1. Runs scheduled validations based on flexible trigger types
8
+ 2. Supports cron, interval, data change, composite, event, and webhook triggers
9
+ 3. Triggers notifications on validation failures
10
+ 4. Updates schedule run timestamps
11
+ 5. Runs periodic database maintenance (cleanup, vacuum)
12
+ 6. Provides per-schedule check intervals and priority-based evaluation
13
+ 7. Supports webhook triggers from external data pipelines
10
14
  """
11
15
 
12
16
  from __future__ import annotations
13
17
 
14
18
  import asyncio
19
+ import hashlib
20
+ import hmac
15
21
  import logging
16
- from datetime import datetime
22
+ import uuid
23
+ from datetime import datetime, timedelta
17
24
  from typing import Any
18
25
 
19
26
  from apscheduler.schedulers.asyncio import AsyncIOScheduler
20
27
  from apscheduler.triggers.cron import CronTrigger
28
+ from apscheduler.triggers.interval import IntervalTrigger
21
29
 
22
- from truthound_dashboard.db import Schedule, Source, get_session
30
+ from truthound_dashboard.db import Schedule, Source, TriggerType, get_session
23
31
 
32
+ from .maintenance import get_maintenance_manager
24
33
  from .notifications.dispatcher import create_dispatcher
25
34
  from .services import ValidationService
26
35
  from .truthound_adapter import get_adapter
36
+ from .triggers import TriggerFactory, TriggerContext, TriggerEvaluation
27
37
 
28
38
  logger = logging.getLogger(__name__)
29
39
 
@@ -34,6 +44,23 @@ class ValidationScheduler:
34
44
  Manages scheduled validation jobs using APScheduler and integrates
35
45
  with the notification system to alert on failures.
36
46
 
47
+ Supports multiple trigger types:
48
+ - Cron: Traditional cron expressions
49
+ - Interval: Fixed time intervals
50
+ - DataChange: Profile-based change detection
51
+ - Composite: Combined triggers with AND/OR logic
52
+ - Event: Response to system events
53
+ - Manual: API-only execution
54
+ - Webhook: External webhook triggers
55
+
56
+ Also manages scheduled database maintenance tasks.
57
+
58
+ Features:
59
+ - Per-schedule check intervals (overrides global default)
60
+ - Priority-based trigger evaluation (1=highest, 10=lowest)
61
+ - Cooldown support to prevent rapid re-triggering
62
+ - Trigger monitoring and status tracking
63
+
37
64
  Usage:
38
65
  scheduler = ValidationScheduler()
39
66
  await scheduler.start()
@@ -41,10 +68,48 @@ class ValidationScheduler:
41
68
  await scheduler.stop()
42
69
  """
43
70
 
44
- def __init__(self) -> None:
45
- """Initialize the scheduler."""
71
+ # Default maintenance schedule: daily at 3:00 AM
72
+ DEFAULT_MAINTENANCE_CRON = "0 3 * * *"
73
+ # Alternative: run maintenance every 24 hours
74
+ MAINTENANCE_INTERVAL_HOURS = 24
75
+ # Data change trigger check interval (base interval for checker loop)
76
+ DATA_CHANGE_CHECK_INTERVAL_SECONDS = 60 # 1 minute (reduced for better responsiveness)
77
+ # Default per-schedule check interval
78
+ DEFAULT_SCHEDULE_CHECK_INTERVAL_MINUTES = 5
79
+
80
+ def __init__(
81
+ self,
82
+ *,
83
+ maintenance_enabled: bool = True,
84
+ maintenance_cron: str | None = None,
85
+ data_change_check_interval: int | None = None,
86
+ ) -> None:
87
+ """Initialize the scheduler.
88
+
89
+ Args:
90
+ maintenance_enabled: Whether to enable scheduled maintenance.
91
+ maintenance_cron: Cron expression for maintenance schedule.
92
+ Defaults to daily at 3:00 AM.
93
+ data_change_check_interval: Interval in seconds for the checker loop.
94
+ Individual schedules can have their own check intervals.
95
+ """
46
96
  self._scheduler = AsyncIOScheduler()
47
97
  self._jobs: dict[str, str] = {} # schedule_id -> job_id mapping
98
+ self._maintenance_enabled = maintenance_enabled
99
+ self._maintenance_cron = maintenance_cron or self.DEFAULT_MAINTENANCE_CRON
100
+ self._maintenance_job_id = "system_maintenance"
101
+ self._data_change_job_id = "data_change_check"
102
+ self._data_change_check_interval = (
103
+ data_change_check_interval or self.DATA_CHANGE_CHECK_INTERVAL_SECONDS
104
+ )
105
+
106
+ # Trigger monitoring state
107
+ self._trigger_check_times: dict[str, datetime] = {} # schedule_id -> last_check_at
108
+ self._trigger_trigger_times: dict[str, datetime] = {} # schedule_id -> last_triggered_at
109
+ self._trigger_check_counts: dict[str, int] = {} # schedule_id -> check_count
110
+ self._trigger_trigger_counts: dict[str, int] = {} # schedule_id -> trigger_count
111
+ self._last_checker_run: datetime | None = None
112
+ self._checker_running = False
48
113
 
49
114
  async def start(self) -> None:
50
115
  """Start the scheduler and load existing schedules."""
@@ -52,11 +117,97 @@ class ValidationScheduler:
52
117
  self._scheduler.start()
53
118
  await self._load_schedules()
54
119
 
120
+ # Start maintenance schedule if enabled
121
+ if self._maintenance_enabled:
122
+ self._schedule_maintenance()
123
+
124
+ # Start data change trigger checker
125
+ self._schedule_data_change_checker()
126
+
55
127
  async def stop(self) -> None:
56
128
  """Stop the scheduler."""
57
129
  logger.info("Stopping validation scheduler")
58
130
  self._scheduler.shutdown(wait=False)
59
131
 
132
+ def _schedule_maintenance(self) -> None:
133
+ """Schedule periodic database maintenance."""
134
+ try:
135
+ trigger = CronTrigger.from_crontab(self._maintenance_cron)
136
+ self._scheduler.add_job(
137
+ self._run_maintenance,
138
+ trigger=trigger,
139
+ id=self._maintenance_job_id,
140
+ name="Database Maintenance",
141
+ replace_existing=True,
142
+ )
143
+ logger.info(
144
+ f"Scheduled database maintenance: {self._maintenance_cron}"
145
+ )
146
+ except Exception as e:
147
+ logger.error(f"Failed to schedule maintenance: {e}")
148
+
149
+ def enable_maintenance(self, cron: str | None = None) -> None:
150
+ """Enable scheduled maintenance.
151
+
152
+ Args:
153
+ cron: Optional cron expression override.
154
+ """
155
+ self._maintenance_enabled = True
156
+ if cron:
157
+ self._maintenance_cron = cron
158
+ self._schedule_maintenance()
159
+
160
+ def disable_maintenance(self) -> None:
161
+ """Disable scheduled maintenance."""
162
+ self._maintenance_enabled = False
163
+ try:
164
+ self._scheduler.remove_job(self._maintenance_job_id)
165
+ logger.info("Disabled scheduled maintenance")
166
+ except Exception:
167
+ pass # Job may not exist
168
+
169
+ def get_maintenance_next_run(self) -> datetime | None:
170
+ """Get next scheduled maintenance run time.
171
+
172
+ Returns:
173
+ Next run datetime or None if disabled.
174
+ """
175
+ if not self._maintenance_enabled:
176
+ return None
177
+ try:
178
+ job = self._scheduler.get_job(self._maintenance_job_id)
179
+ if job:
180
+ return job.next_run_time
181
+ except Exception:
182
+ pass
183
+ return None
184
+
185
+ async def _run_maintenance(self) -> None:
186
+ """Execute scheduled database maintenance."""
187
+ logger.info("Running scheduled database maintenance")
188
+
189
+ manager = get_maintenance_manager()
190
+
191
+ if not manager.config.enabled:
192
+ logger.info("Maintenance is disabled in configuration")
193
+ return
194
+
195
+ try:
196
+ report = await manager.run_cleanup()
197
+
198
+ logger.info(
199
+ f"Maintenance completed: {report.total_deleted} records deleted "
200
+ f"in {report.total_duration_ms}ms "
201
+ f"(vacuum: {report.vacuum_performed})"
202
+ )
203
+
204
+ if not report.success:
205
+ failed_tasks = [r.task_name for r in report.results if not r.success]
206
+ logger.warning(f"Some maintenance tasks failed: {failed_tasks}")
207
+
208
+ except Exception as e:
209
+ logger.error(f"Maintenance failed: {e}")
210
+
60
211
  async def _load_schedules(self) -> None:
61
212
  """Load active schedules from database."""
62
213
  async with get_session() as session:
@@ -73,24 +224,75 @@ class ValidationScheduler:
73
224
  def add_schedule(self, schedule: Schedule) -> None:
74
225
  """Add a schedule to the scheduler.
75
226
 
227
+ Supports multiple trigger types:
228
+ - Cron/Interval: Traditional APScheduler triggers
229
+ - DataChange/Composite/Event: Evaluated by periodic checker
230
+
76
231
  Args:
77
232
  schedule: Schedule model to add.
78
233
  """
79
234
  if schedule.id in self._jobs:
80
235
  self.remove_schedule(schedule.id)
81
236
 
237
+ trigger_type = schedule.effective_trigger_type
238
+
239
+ # Manual and event triggers don't need APScheduler jobs
240
+ if trigger_type in (TriggerType.MANUAL, TriggerType.EVENT):
241
+ logger.info(
242
+ f"Schedule {schedule.name} uses {trigger_type.value} trigger - "
243
+ "no APScheduler job needed"
244
+ )
245
+ return
246
+
247
+ # Data change and composite triggers are handled by periodic checker
248
+ if trigger_type in (TriggerType.DATA_CHANGE, TriggerType.COMPOSITE):
249
+ logger.info(
250
+ f"Schedule {schedule.name} uses {trigger_type.value} trigger - "
251
+ "will be checked periodically"
252
+ )
253
+ return
254
+
82
255
  try:
83
- trigger = CronTrigger.from_crontab(schedule.cron_expression)
256
+ # Create APScheduler trigger based on type
257
+ if trigger_type == TriggerType.CRON:
258
+ cron_expr = schedule.effective_cron_expression
259
+ if not cron_expr:
260
+ logger.error(f"Cron schedule {schedule.id} missing expression")
261
+ return
262
+ ap_trigger = CronTrigger.from_crontab(cron_expr)
263
+ trigger_desc = cron_expr
264
+
265
+ elif trigger_type == TriggerType.INTERVAL:
266
+ config = schedule.trigger_config or {}
267
+ seconds = config.get("seconds", 0)
268
+ minutes = config.get("minutes", 0)
269
+ hours = config.get("hours", 0)
270
+ days = config.get("days", 0)
271
+
272
+ total_seconds = seconds + minutes * 60 + hours * 3600 + days * 86400
273
+ if total_seconds <= 0:
274
+ total_seconds = 3600 # Default to 1 hour
275
+
276
+ ap_trigger = IntervalTrigger(seconds=total_seconds)
277
+ trigger_desc = f"every {total_seconds}s"
278
+
279
+ else:
280
+ # Fallback for unknown types - try as cron
281
+ cron_expr = schedule.cron_expression or "0 0 * * *"
282
+ ap_trigger = CronTrigger.from_crontab(cron_expr)
283
+ trigger_desc = cron_expr
284
+
84
285
  job = self._scheduler.add_job(
85
286
  self._run_validation,
86
- trigger=trigger,
287
+ trigger=ap_trigger,
87
288
  args=[schedule.id],
88
289
  id=f"schedule_{schedule.id}",
89
290
  name=f"Validation: {schedule.name}",
90
291
  replace_existing=True,
91
292
  )
92
293
  self._jobs[schedule.id] = job.id
93
- logger.info(f"Added schedule: {schedule.name} ({schedule.cron_expression})")
294
+ logger.info(f"Added schedule: {schedule.name} ({trigger_desc})")
295
+
94
296
  except Exception as e:
95
297
  logger.error(f"Failed to add schedule {schedule.id}: {e}")
96
298
 
@@ -282,6 +484,614 @@ class ValidationScheduler:
282
484
  except Exception:
283
485
  return None
284
486
 
487
+ def _schedule_data_change_checker(self) -> None:
488
+ """Schedule periodic checker for data change and composite triggers."""
489
+ try:
490
+ self._scheduler.add_job(
491
+ self._check_data_change_triggers,
492
+ trigger=IntervalTrigger(seconds=self._data_change_check_interval),
493
+ id=self._data_change_job_id,
494
+ name="Data Change Trigger Checker",
495
+ replace_existing=True,
496
+ )
497
+ logger.info(
498
+ f"Scheduled data change checker: every {self._data_change_check_interval}s"
499
+ )
500
+ except Exception as e:
501
+ logger.error(f"Failed to schedule data change checker: {e}")
502
+
503
+ async def _check_data_change_triggers(self) -> None:
504
+ """Check all data change and composite triggers.
505
+
506
+ This runs periodically to evaluate triggers that can't be
507
+ handled by APScheduler's built-in triggers.
508
+
509
+ Features:
510
+ - Per-schedule check intervals (respects check_interval_minutes)
511
+ - Priority-based evaluation (lower priority number = higher priority)
512
+ - Cooldown support (prevents rapid re-triggering)
513
+ """
514
+ self._checker_running = True
515
+ self._last_checker_run = datetime.utcnow()
516
+ logger.debug("Checking data change triggers")
517
+
518
+ try:
519
+ async with get_session() as session:
520
+ from sqlalchemy import select
521
+
522
+ # Get schedules with data change or composite triggers
523
+ result = await session.execute(
524
+ select(Schedule)
525
+ .where(Schedule.is_active == True)
526
+ .where(
527
+ Schedule.trigger_type.in_([
528
+ TriggerType.DATA_CHANGE.value,
529
+ TriggerType.COMPOSITE.value,
530
+ ])
531
+ )
532
+ )
533
+ schedules = result.scalars().all()
534
+
535
+ if not schedules:
536
+ logger.debug("No data change/composite schedules to check")
537
+ return
538
+
539
+ # Filter schedules that are due for checking
540
+ now = datetime.utcnow()
541
+ schedules_to_check = []
542
+
543
+ for schedule in schedules:
544
+ if self._is_schedule_due_for_check(schedule, now):
545
+ schedules_to_check.append(schedule)
546
+
547
+ if not schedules_to_check:
548
+ logger.debug("No schedules due for check")
549
+ return
550
+
551
+ # Sort by priority (lower number = higher priority)
552
+ schedules_to_check.sort(
553
+ key=lambda s: self._get_schedule_priority(s)
554
+ )
555
+
556
+ logger.info(
557
+ f"Checking {len(schedules_to_check)}/{len(schedules)} "
558
+ "data change/composite schedules (sorted by priority)"
559
+ )
560
+
561
+ for schedule in schedules_to_check:
562
+ await self._evaluate_and_run_if_needed(session, schedule)
563
+ finally:
564
+ self._checker_running = False
565
+
566
+ def _is_schedule_due_for_check(self, schedule: Schedule, now: datetime) -> bool:
567
+ """Check if a schedule is due for evaluation.
568
+
569
+ Args:
570
+ schedule: Schedule to check.
571
+ now: Current timestamp.
572
+
573
+ Returns:
574
+ True if schedule should be checked.
575
+ """
576
+ schedule_id = schedule.id
577
+ last_check = self._trigger_check_times.get(schedule_id)
578
+
579
+ # First check - always due
580
+ if last_check is None:
581
+ return True
582
+
583
+ # Get per-schedule check interval
584
+ config = schedule.trigger_config or {}
585
+ check_interval_minutes = config.get(
586
+ "check_interval_minutes",
587
+ self.DEFAULT_SCHEDULE_CHECK_INTERVAL_MINUTES
588
+ )
589
+
590
+ # Calculate if due
591
+ next_check = last_check + timedelta(minutes=check_interval_minutes)
592
+ return now >= next_check
593
+
594
+ def _get_schedule_priority(self, schedule: Schedule) -> int:
595
+ """Get priority for a schedule (lower = higher priority).
596
+
597
+ Args:
598
+ schedule: Schedule to get priority for.
599
+
600
+ Returns:
601
+ Priority value (1-10, default 5).
602
+ """
603
+ config = schedule.trigger_config or {}
604
+ return config.get("priority", 5)
605
+
606
+ def _is_in_cooldown(self, schedule: Schedule, now: datetime) -> bool:
607
+ """Check if schedule is in cooldown period.
608
+
609
+ Args:
610
+ schedule: Schedule to check.
611
+ now: Current timestamp.
612
+
613
+ Returns:
614
+ True if in cooldown.
615
+ """
616
+ schedule_id = schedule.id
617
+ last_triggered = self._trigger_trigger_times.get(schedule_id)
618
+
619
+ if last_triggered is None:
620
+ return False
621
+
622
+ config = schedule.trigger_config or {}
623
+ cooldown_minutes = config.get("cooldown_minutes", 15)
624
+
625
+ if cooldown_minutes <= 0:
626
+ return False
627
+
628
+ cooldown_end = last_triggered + timedelta(minutes=cooldown_minutes)
629
+ return now < cooldown_end
630
+
631
+ def _get_cooldown_remaining(self, schedule: Schedule, now: datetime) -> int:
632
+ """Get remaining cooldown time in seconds.
633
+
634
+ Args:
635
+ schedule: Schedule to check.
636
+ now: Current timestamp.
637
+
638
+ Returns:
639
+ Remaining cooldown seconds (0 if not in cooldown).
640
+ """
641
+ schedule_id = schedule.id
642
+ last_triggered = self._trigger_trigger_times.get(schedule_id)
643
+
644
+ if last_triggered is None:
645
+ return 0
646
+
647
+ config = schedule.trigger_config or {}
648
+ cooldown_minutes = config.get("cooldown_minutes", 15)
649
+
650
+ if cooldown_minutes <= 0:
651
+ return 0
652
+
653
+ cooldown_end = last_triggered + timedelta(minutes=cooldown_minutes)
654
+ remaining = (cooldown_end - now).total_seconds()
655
+ return max(0, int(remaining))
656
+
657
+ async def _evaluate_and_run_if_needed(
658
+ self, session: Any, schedule: Schedule
659
+ ) -> None:
660
+ """Evaluate a schedule's trigger and run validation if needed.
661
+
662
+ Args:
663
+ session: Database session.
664
+ schedule: Schedule to evaluate.
665
+ """
666
+ schedule_id = schedule.id
667
+ now = datetime.utcnow()
668
+
669
+ # Update check tracking
670
+ self._trigger_check_times[schedule_id] = now
671
+ self._trigger_check_counts[schedule_id] = (
672
+ self._trigger_check_counts.get(schedule_id, 0) + 1
673
+ )
674
+
675
+ try:
676
+ # Check cooldown first
677
+ if self._is_in_cooldown(schedule, now):
678
+ remaining = self._get_cooldown_remaining(schedule, now)
679
+ logger.debug(
680
+ f"Schedule {schedule.name} in cooldown ({remaining}s remaining)"
681
+ )
682
+ return
683
+
684
+ # Get profile data for data change triggers
685
+ profile_data = None
686
+ baseline_profile = None
687
+
688
+ if schedule.trigger_type == TriggerType.DATA_CHANGE.value:
689
+ # Check if auto_profile is enabled
690
+ config = schedule.trigger_config or {}
691
+ if config.get("auto_profile", True):
692
+ # Run a fresh profile before comparison
693
+ await self._run_profile_if_needed(session, schedule.source_id)
694
+
695
+ profile_data, baseline_profile = await self._get_profile_data(
696
+ session, schedule.source_id
697
+ )
698
+
699
+ # Evaluate trigger
700
+ evaluation = await TriggerFactory.evaluate_schedule(
701
+ schedule,
702
+ profile_data=profile_data,
703
+ baseline_profile=baseline_profile,
704
+ )
705
+
706
+ # Update schedule with evaluation result
707
+ schedule.update_trigger_result(evaluation.to_dict())
708
+
709
+ if evaluation.should_trigger:
710
+ logger.info(
711
+ f"Trigger fired for schedule {schedule.name}: {evaluation.reason}"
712
+ )
713
+ # Update trigger tracking
714
+ self._trigger_trigger_times[schedule_id] = now
715
+ self._trigger_trigger_counts[schedule_id] = (
716
+ self._trigger_trigger_counts.get(schedule_id, 0) + 1
717
+ )
718
+ # Run validation in background
719
+ asyncio.create_task(self._run_validation(schedule.id))
720
+ else:
721
+ logger.debug(
722
+ f"Trigger not fired for schedule {schedule.name}: {evaluation.reason}"
723
+ )
724
+
725
+ await session.commit()
726
+
727
+ except Exception as e:
728
+ logger.error(f"Error evaluating schedule {schedule.id}: {e}")
729
+
730
+ async def _run_profile_if_needed(
731
+ self, session: Any, source_id: str
732
+ ) -> None:
733
+ """Run a profile for a source if needed for data change detection.
734
+
735
+ Args:
736
+ session: Database session.
737
+ source_id: Source ID to profile.
738
+ """
739
+ from sqlalchemy import select
740
+ from truthound_dashboard.db import Profile
741
+
742
+ try:
743
+ # Check if we have a recent profile (within last check interval)
744
+ result = await session.execute(
745
+ select(Profile)
746
+ .where(Profile.source_id == source_id)
747
+ .order_by(Profile.created_at.desc())
748
+ .limit(1)
749
+ )
750
+ latest_profile = result.scalar_one_or_none()
751
+
752
+ # Skip if recent profile exists (within 1 minute)
753
+ if latest_profile:
754
+ profile_age = datetime.utcnow() - latest_profile.created_at
755
+ if profile_age.total_seconds() < 60:
756
+ logger.debug(f"Recent profile exists for source {source_id}")
757
+ return
758
+
759
+ # Run profile using adapter
760
+ adapter = get_adapter()
761
+ result = await session.execute(
762
+ select(Source).where(Source.id == source_id)
763
+ )
764
+ source = result.scalar_one_or_none()
765
+
766
+ if source and source.connection_string:
767
+ logger.debug(f"Running auto-profile for source {source_id}")
768
+ await adapter.profile(source.connection_string)
769
+
770
+ except Exception as e:
771
+ logger.warning(f"Auto-profile failed for source {source_id}: {e}")
772
+
773
+ async def _get_profile_data(
774
+ self, session: Any, source_id: str
775
+ ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
776
+ """Get current and baseline profile data for a source.
777
+
778
+ Args:
779
+ session: Database session.
780
+ source_id: Source ID.
781
+
782
+ Returns:
783
+ Tuple of (current_profile, baseline_profile).
784
+ """
785
+ from sqlalchemy import select
786
+ from truthound_dashboard.db import Profile
787
+
788
+ try:
789
+ # Get the two most recent profiles
790
+ result = await session.execute(
791
+ select(Profile)
792
+ .where(Profile.source_id == source_id)
793
+ .order_by(Profile.created_at.desc())
794
+ .limit(2)
795
+ )
796
+ profiles = result.scalars().all()
797
+
798
+ if len(profiles) == 0:
799
+ return None, None
800
+ elif len(profiles) == 1:
801
+ return profiles[0].profile_json, None
802
+ else:
803
+ return profiles[0].profile_json, profiles[1].profile_json
804
+
805
+ except Exception as e:
806
+ logger.error(f"Error getting profile data for source {source_id}: {e}")
807
+ return None, None
808
+
809
+ async def trigger_event(
810
+ self,
811
+ event_type: str,
812
+ source_id: str | None = None,
813
+ event_data: dict[str, Any] | None = None,
814
+ ) -> list[str]:
815
+ """Trigger event-based schedules.
816
+
817
+ Called when system events occur (e.g., schema change, drift detected).
818
+
819
+ Args:
820
+ event_type: Type of event (e.g., "schema_changed", "drift_detected").
821
+ source_id: Optional source ID related to the event.
822
+ event_data: Additional event data.
823
+
824
+ Returns:
825
+ List of schedule IDs that were triggered.
826
+ """
827
+ triggered_schedules = []
828
+
829
+ async with get_session() as session:
830
+ from sqlalchemy import select
831
+
832
+ # Get event trigger schedules
833
+ result = await session.execute(
834
+ select(Schedule)
835
+ .where(Schedule.is_active == True)
836
+ .where(Schedule.trigger_type == TriggerType.EVENT.value)
837
+ )
838
+ schedules = result.scalars().all()
839
+
840
+ full_event_data = {
841
+ "type": event_type,
842
+ "source_id": source_id,
843
+ **(event_data or {}),
844
+ }
845
+
846
+ for schedule in schedules:
847
+ evaluation = await TriggerFactory.evaluate_schedule(
848
+ schedule,
849
+ event_data=full_event_data,
850
+ )
851
+
852
+ if evaluation.should_trigger:
853
+ logger.info(
854
+ f"Event '{event_type}' triggered schedule {schedule.name}"
855
+ )
856
+ asyncio.create_task(self._run_validation(schedule.id))
857
+ triggered_schedules.append(schedule.id)
858
+
859
+ return triggered_schedules
860
+
861
+ async def trigger_webhook(
862
+ self,
863
+ source: str,
864
+ event_type: str = "data_updated",
865
+ payload: dict[str, Any] | None = None,
866
+ schedule_id: str | None = None,
867
+ source_id: str | None = None,
868
+ signature: str | None = None,
869
+ ) -> dict[str, Any]:
870
+ """Process incoming webhook trigger.
871
+
872
+ Args:
873
+ source: Source identifier (e.g., "airflow", "dagster").
874
+ event_type: Type of event.
875
+ payload: Additional payload data.
876
+ schedule_id: Specific schedule to trigger (optional).
877
+ source_id: Data source ID to filter (optional).
878
+ signature: HMAC signature for verification (optional).
879
+
880
+ Returns:
881
+ Result dictionary with triggered schedules.
882
+ """
883
+ request_id = str(uuid.uuid4())[:8]
884
+ triggered_schedules = []
885
+ now = datetime.utcnow()
886
+
887
+ logger.info(f"Webhook received from '{source}' (request_id={request_id})")
888
+
889
+ async with get_session() as session:
890
+ from sqlalchemy import select
891
+
892
+ # Build query for webhook triggers
893
+ query = (
894
+ select(Schedule)
895
+ .where(Schedule.is_active == True)
896
+ .where(Schedule.trigger_type == TriggerType.WEBHOOK.value)
897
+ )
898
+
899
+ # Filter by specific schedule if provided
900
+ if schedule_id:
901
+ query = query.where(Schedule.id == schedule_id)
902
+
903
+ # Filter by source ID if provided
904
+ if source_id:
905
+ query = query.where(Schedule.source_id == source_id)
906
+
907
+ result = await session.execute(query)
908
+ schedules = result.scalars().all()
909
+
910
+ for schedule in schedules:
911
+ # Verify signature if required
912
+ config = schedule.trigger_config or {}
913
+ webhook_secret = config.get("webhook_secret")
914
+ require_signature = config.get("require_signature", False)
915
+
916
+ signature_valid = True
917
+ if require_signature and webhook_secret:
918
+ signature_valid = self._verify_webhook_signature(
919
+ payload or {},
920
+ signature,
921
+ webhook_secret,
922
+ )
923
+
924
+ # Check cooldown
925
+ if self._is_in_cooldown(schedule, now):
926
+ logger.debug(
927
+ f"Schedule {schedule.name} in cooldown, skipping webhook"
928
+ )
929
+ continue
930
+
931
+ # Evaluate webhook trigger
932
+ evaluation = await TriggerFactory.evaluate_schedule(
933
+ schedule,
934
+ custom_data={
935
+ "webhook_data": {
936
+ "source": source,
937
+ "event_type": event_type,
938
+ "payload": payload or {},
939
+ "signature_valid": signature_valid,
940
+ }
941
+ },
942
+ )
943
+
944
+ if evaluation.should_trigger:
945
+ logger.info(
946
+ f"Webhook triggered schedule {schedule.name} "
947
+ f"(request_id={request_id})"
948
+ )
949
+ # Update trigger tracking
950
+ self._trigger_trigger_times[schedule.id] = now
951
+ self._trigger_trigger_counts[schedule.id] = (
952
+ self._trigger_trigger_counts.get(schedule.id, 0) + 1
953
+ )
954
+ asyncio.create_task(self._run_validation(schedule.id))
955
+ triggered_schedules.append(schedule.id)
956
+
957
+ return {
958
+ "accepted": True,
959
+ "triggered_schedules": triggered_schedules,
960
+ "message": (
961
+ f"Triggered {len(triggered_schedules)} schedule(s)"
962
+ if triggered_schedules
963
+ else "No matching schedules triggered"
964
+ ),
965
+ "request_id": request_id,
966
+ }
967
+
968
+ def _verify_webhook_signature(
969
+ self,
970
+ payload: dict[str, Any],
971
+ signature: str | None,
972
+ secret: str,
973
+ ) -> bool:
974
+ """Verify webhook HMAC signature.
975
+
976
+ Args:
977
+ payload: Request payload.
978
+ signature: Provided signature (X-Webhook-Signature header).
979
+ secret: Webhook secret key.
980
+
981
+ Returns:
982
+ True if signature is valid.
983
+ """
984
+ if not signature:
985
+ return False
986
+
987
+ try:
988
+ import json
989
+ payload_bytes = json.dumps(payload, sort_keys=True).encode()
990
+ expected = hmac.new(
991
+ secret.encode(),
992
+ payload_bytes,
993
+ hashlib.sha256,
994
+ ).hexdigest()
995
+ return hmac.compare_digest(signature, expected)
996
+ except Exception as e:
997
+ logger.warning(f"Webhook signature verification failed: {e}")
998
+ return False
999
+
1000
+ def get_trigger_monitoring_status(self) -> dict[str, Any]:
1001
+ """Get current trigger monitoring status.
1002
+
1003
+ Returns:
1004
+ Dictionary with monitoring stats and schedule statuses.
1005
+ """
1006
+ now = datetime.utcnow()
1007
+ one_hour_ago = now - timedelta(hours=1)
1008
+
1009
+ # Count checks and triggers in last hour
1010
+ checks_last_hour = sum(
1011
+ 1 for t in self._trigger_check_times.values()
1012
+ if t >= one_hour_ago
1013
+ )
1014
+ triggers_last_hour = sum(
1015
+ 1 for t in self._trigger_trigger_times.values()
1016
+ if t >= one_hour_ago
1017
+ )
1018
+
1019
+ return {
1020
+ "checker_running": self._checker_running,
1021
+ "checker_interval_seconds": self._data_change_check_interval,
1022
+ "last_checker_run_at": (
1023
+ self._last_checker_run.isoformat()
1024
+ if self._last_checker_run else None
1025
+ ),
1026
+ "total_schedules_tracked": len(self._trigger_check_times),
1027
+ "checks_last_hour": checks_last_hour,
1028
+ "triggers_last_hour": triggers_last_hour,
1029
+ }
1030
+
1031
+ async def get_trigger_check_statuses(self) -> list[dict[str, Any]]:
1032
+ """Get detailed status for each tracked trigger.
1033
+
1034
+ Returns:
1035
+ List of trigger status dictionaries.
1036
+ """
1037
+ now = datetime.utcnow()
1038
+ statuses = []
1039
+
1040
+ async with get_session() as session:
1041
+ from sqlalchemy import select
1042
+
1043
+ # Get all active data change/composite/webhook schedules
1044
+ result = await session.execute(
1045
+ select(Schedule)
1046
+ .where(Schedule.is_active == True)
1047
+ .where(
1048
+ Schedule.trigger_type.in_([
1049
+ TriggerType.DATA_CHANGE.value,
1050
+ TriggerType.COMPOSITE.value,
1051
+ TriggerType.WEBHOOK.value,
1052
+ ])
1053
+ )
1054
+ )
1055
+ schedules = result.scalars().all()
1056
+
1057
+ for schedule in schedules:
1058
+ schedule_id = schedule.id
1059
+ config = schedule.trigger_config or {}
1060
+
1061
+ last_check = self._trigger_check_times.get(schedule_id)
1062
+ last_triggered = self._trigger_trigger_times.get(schedule_id)
1063
+ check_interval = config.get(
1064
+ "check_interval_minutes",
1065
+ self.DEFAULT_SCHEDULE_CHECK_INTERVAL_MINUTES,
1066
+ )
1067
+
1068
+ # Calculate next check time
1069
+ next_check = None
1070
+ if last_check:
1071
+ next_check = last_check + timedelta(minutes=check_interval)
1072
+
1073
+ statuses.append({
1074
+ "schedule_id": schedule_id,
1075
+ "schedule_name": schedule.name,
1076
+ "trigger_type": schedule.trigger_type,
1077
+ "last_check_at": last_check.isoformat() if last_check else None,
1078
+ "next_check_at": next_check.isoformat() if next_check else None,
1079
+ "last_triggered_at": (
1080
+ last_triggered.isoformat() if last_triggered else None
1081
+ ),
1082
+ "check_count": self._trigger_check_counts.get(schedule_id, 0),
1083
+ "trigger_count": self._trigger_trigger_counts.get(schedule_id, 0),
1084
+ "is_due_for_check": self._is_schedule_due_for_check(schedule, now),
1085
+ "priority": self._get_schedule_priority(schedule),
1086
+ "cooldown_remaining_seconds": self._get_cooldown_remaining(
1087
+ schedule, now
1088
+ ),
1089
+ })
1090
+
1091
+ # Sort by priority
1092
+ statuses.sort(key=lambda s: s["priority"])
1093
+ return statuses
1094
+
285
1095
 
286
1096
  # Singleton instance
287
1097
  _scheduler: ValidationScheduler | None = None