truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/METADATA +147 -23
  162. truthound_dashboard-1.4.1.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
  164. truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
  166. truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,858 @@
1
+ """Schema evolution detection service.
2
+
3
+ This module provides functionality for detecting and tracking
4
+ schema changes over time, enabling schema evolution monitoring.
5
+
6
+ Features:
7
+ - Automatic schema change detection
8
+ - Breaking change identification
9
+ - Version history tracking
10
+ - Notification integration for schema changes
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import hashlib
16
+ import json
17
+ import logging
18
+ from collections.abc import Sequence
19
+ from datetime import datetime
20
+ from typing import Any
21
+
22
+ from sqlalchemy import func, select
23
+ from sqlalchemy.ext.asyncio import AsyncSession
24
+
25
+ from truthound_dashboard.db import (
26
+ BaseRepository,
27
+ Schema,
28
+ Source,
29
+ )
30
+ from truthound_dashboard.db.models import SchemaChange, SchemaVersion
31
+ from truthound_dashboard.schemas.schema_evolution import (
32
+ SchemaChangeResponse,
33
+ SchemaChangeSeverity,
34
+ SchemaChangeType,
35
+ SchemaEvolutionResponse,
36
+ SchemaEvolutionSummary,
37
+ SchemaVersionResponse,
38
+ SchemaVersionSummary,
39
+ )
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ class SchemaVersionRepository(BaseRepository[SchemaVersion]):
45
+ """Repository for SchemaVersion model operations."""
46
+
47
+ model = SchemaVersion
48
+
49
+ async def get_latest_for_source(self, source_id: str) -> SchemaVersion | None:
50
+ """Get the latest schema version for a source.
51
+
52
+ Args:
53
+ source_id: Source ID.
54
+
55
+ Returns:
56
+ Latest schema version or None.
57
+ """
58
+ result = await self.session.execute(
59
+ select(SchemaVersion)
60
+ .where(SchemaVersion.source_id == source_id)
61
+ .order_by(SchemaVersion.version_number.desc())
62
+ .limit(1)
63
+ )
64
+ return result.scalar_one_or_none()
65
+
66
+ async def get_for_source(
67
+ self,
68
+ source_id: str,
69
+ *,
70
+ limit: int = 20,
71
+ offset: int = 0,
72
+ ) -> Sequence[SchemaVersion]:
73
+ """Get schema versions for a source.
74
+
75
+ Args:
76
+ source_id: Source ID.
77
+ limit: Maximum to return.
78
+ offset: Number to skip.
79
+
80
+ Returns:
81
+ Sequence of schema versions.
82
+ """
83
+ return await self.list(
84
+ offset=offset,
85
+ limit=limit,
86
+ filters=[SchemaVersion.source_id == source_id],
87
+ order_by=[SchemaVersion.version_number.desc()],
88
+ )
89
+
90
+ async def get_next_version_number(self, source_id: str) -> int:
91
+ """Get the next version number for a source.
92
+
93
+ Args:
94
+ source_id: Source ID.
95
+
96
+ Returns:
97
+ Next version number (1 if no previous versions).
98
+ """
99
+ result = await self.session.execute(
100
+ select(func.max(SchemaVersion.version_number)).where(
101
+ SchemaVersion.source_id == source_id
102
+ )
103
+ )
104
+ max_version = result.scalar_one_or_none()
105
+ return (max_version or 0) + 1
106
+
107
+ async def get_by_hash(
108
+ self, source_id: str, schema_hash: str
109
+ ) -> SchemaVersion | None:
110
+ """Get schema version by hash.
111
+
112
+ Args:
113
+ source_id: Source ID.
114
+ schema_hash: Schema hash.
115
+
116
+ Returns:
117
+ Schema version or None.
118
+ """
119
+ result = await self.session.execute(
120
+ select(SchemaVersion)
121
+ .where(SchemaVersion.source_id == source_id)
122
+ .where(SchemaVersion.schema_hash == schema_hash)
123
+ .limit(1)
124
+ )
125
+ return result.scalar_one_or_none()
126
+
127
+
128
+ class SchemaChangeRepository(BaseRepository[SchemaChange]):
129
+ """Repository for SchemaChange model operations."""
130
+
131
+ model = SchemaChange
132
+
133
+ async def get_for_source(
134
+ self,
135
+ source_id: str,
136
+ *,
137
+ limit: int = 50,
138
+ offset: int = 0,
139
+ ) -> Sequence[SchemaChange]:
140
+ """Get schema changes for a source.
141
+
142
+ Args:
143
+ source_id: Source ID.
144
+ limit: Maximum to return.
145
+ offset: Number to skip.
146
+
147
+ Returns:
148
+ Sequence of schema changes.
149
+ """
150
+ return await self.list(
151
+ offset=offset,
152
+ limit=limit,
153
+ filters=[SchemaChange.source_id == source_id],
154
+ order_by=[SchemaChange.created_at.desc()],
155
+ )
156
+
157
+ async def get_for_version(
158
+ self, to_version_id: str
159
+ ) -> Sequence[SchemaChange]:
160
+ """Get changes for a specific version transition.
161
+
162
+ Args:
163
+ to_version_id: Target version ID.
164
+
165
+ Returns:
166
+ Sequence of changes.
167
+ """
168
+ return await self.list(
169
+ filters=[SchemaChange.to_version_id == to_version_id],
170
+ order_by=[SchemaChange.created_at.desc()],
171
+ )
172
+
173
+ async def count_breaking_changes(self, source_id: str) -> int:
174
+ """Count breaking changes for a source.
175
+
176
+ Args:
177
+ source_id: Source ID.
178
+
179
+ Returns:
180
+ Count of breaking changes.
181
+ """
182
+ result = await self.session.execute(
183
+ select(func.count(SchemaChange.id))
184
+ .where(SchemaChange.source_id == source_id)
185
+ .where(SchemaChange.severity == "breaking")
186
+ )
187
+ return result.scalar_one() or 0
188
+
189
+
190
+ class SchemaEvolutionService:
191
+ """Service for schema evolution detection and tracking.
192
+
193
+ This service handles:
194
+ - Schema version creation and tracking
195
+ - Change detection between versions
196
+ - Breaking change identification
197
+ - Automatic notification dispatch for schema changes
198
+
199
+ Notifications are sent automatically when:
200
+ - Any schema changes are detected (event_type: schema_changed)
201
+ - Breaking changes are detected (triggers breaking_schema_change rules)
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ session: AsyncSession,
207
+ *,
208
+ enable_notifications: bool = True,
209
+ ):
210
+ """Initialize service.
211
+
212
+ Args:
213
+ session: Database session.
214
+ enable_notifications: Whether to send notifications on schema changes.
215
+ """
216
+ self.session = session
217
+ self.version_repo = SchemaVersionRepository(session)
218
+ self.change_repo = SchemaChangeRepository(session)
219
+ self._enable_notifications = enable_notifications
220
+
221
+ def _compute_schema_hash(self, schema_json: dict[str, Any]) -> str:
222
+ """Compute deterministic hash of schema structure.
223
+
224
+ Args:
225
+ schema_json: Schema JSON to hash.
226
+
227
+ Returns:
228
+ SHA256 hash string.
229
+ """
230
+ # Extract and normalize columns for consistent hashing
231
+ columns = schema_json.get("columns", {})
232
+ normalized = {}
233
+ for col_name, col_def in sorted(columns.items()):
234
+ # Only hash structural properties
235
+ normalized[col_name] = {
236
+ "dtype": col_def.get("dtype"),
237
+ "nullable": col_def.get("nullable"),
238
+ }
239
+
240
+ # Create deterministic JSON string
241
+ json_str = json.dumps(normalized, sort_keys=True)
242
+ return hashlib.sha256(json_str.encode()).hexdigest()
243
+
244
+ def _extract_column_snapshot(
245
+ self, schema_json: dict[str, Any]
246
+ ) -> dict[str, Any]:
247
+ """Extract column definitions for snapshot.
248
+
249
+ Args:
250
+ schema_json: Full schema JSON.
251
+
252
+ Returns:
253
+ Column definitions dictionary.
254
+ """
255
+ columns = schema_json.get("columns", {})
256
+ snapshot = {}
257
+ for col_name, col_def in columns.items():
258
+ snapshot[col_name] = {
259
+ "dtype": col_def.get("dtype"),
260
+ "nullable": col_def.get("nullable"),
261
+ "unique": col_def.get("unique"),
262
+ }
263
+ return snapshot
264
+
265
+ # Type compatibility matrix: (old_type, new_type) -> is_compatible
266
+ # Compatible means data can be safely converted without loss
267
+ TYPE_COMPATIBILITY_MATRIX: dict[tuple[str, str], bool] = {
268
+ # Safe widening conversions
269
+ ("int8", "int16"): True,
270
+ ("int8", "int32"): True,
271
+ ("int8", "int64"): True,
272
+ ("int16", "int32"): True,
273
+ ("int16", "int64"): True,
274
+ ("int32", "int64"): True,
275
+ ("float32", "float64"): True,
276
+ ("int8", "float32"): True,
277
+ ("int8", "float64"): True,
278
+ ("int16", "float32"): True,
279
+ ("int16", "float64"): True,
280
+ ("int32", "float64"): True,
281
+ # String conversions (anything can become string)
282
+ ("int8", "string"): True,
283
+ ("int16", "string"): True,
284
+ ("int32", "string"): True,
285
+ ("int64", "string"): True,
286
+ ("float32", "string"): True,
287
+ ("float64", "string"): True,
288
+ ("boolean", "string"): True,
289
+ ("date", "string"): True,
290
+ ("datetime", "string"): True,
291
+ # Same type aliases
292
+ ("integer", "int64"): True,
293
+ ("int64", "integer"): True,
294
+ ("float", "float64"): True,
295
+ ("float64", "float"): True,
296
+ ("str", "string"): True,
297
+ ("string", "str"): True,
298
+ ("bool", "boolean"): True,
299
+ ("boolean", "bool"): True,
300
+ }
301
+
302
+ # Types that can be widened (ordered by width)
303
+ TYPE_WIDTH_ORDER: dict[str, int] = {
304
+ "int8": 1, "int16": 2, "int32": 3, "int64": 4, "integer": 4,
305
+ "float32": 5, "float64": 6, "float": 6,
306
+ "string": 10, "str": 10, "text": 10,
307
+ }
308
+
309
+ def _normalize_type(self, dtype: str | None) -> str:
310
+ """Normalize type name for comparison.
311
+
312
+ Args:
313
+ dtype: Data type string.
314
+
315
+ Returns:
316
+ Normalized type name.
317
+ """
318
+ if not dtype:
319
+ return "unknown"
320
+
321
+ dtype_lower = dtype.lower().strip()
322
+
323
+ # Normalize common aliases
324
+ type_aliases = {
325
+ "int": "int64",
326
+ "integer": "int64",
327
+ "bigint": "int64",
328
+ "smallint": "int16",
329
+ "tinyint": "int8",
330
+ "float": "float64",
331
+ "double": "float64",
332
+ "real": "float32",
333
+ "str": "string",
334
+ "text": "string",
335
+ "varchar": "string",
336
+ "char": "string",
337
+ "bool": "boolean",
338
+ "timestamp": "datetime",
339
+ "timestamptz": "datetime",
340
+ }
341
+
342
+ return type_aliases.get(dtype_lower, dtype_lower)
343
+
344
+ def _is_type_compatible(self, old_type: str, new_type: str) -> bool:
345
+ """Check if type change is backward compatible (widening).
346
+
347
+ Args:
348
+ old_type: Old data type.
349
+ new_type: New data type.
350
+
351
+ Returns:
352
+ True if change is compatible (widening conversion).
353
+ """
354
+ old_norm = self._normalize_type(old_type)
355
+ new_norm = self._normalize_type(new_type)
356
+
357
+ # Same type is always compatible
358
+ if old_norm == new_norm:
359
+ return True
360
+
361
+ # Check explicit compatibility matrix
362
+ if (old_norm, new_norm) in self.TYPE_COMPATIBILITY_MATRIX:
363
+ return self.TYPE_COMPATIBILITY_MATRIX[(old_norm, new_norm)]
364
+
365
+ # Check width-based compatibility
366
+ old_width = self.TYPE_WIDTH_ORDER.get(old_norm)
367
+ new_width = self.TYPE_WIDTH_ORDER.get(new_norm)
368
+
369
+ if old_width is not None and new_width is not None:
370
+ # Widening is compatible, narrowing is not
371
+ return new_width >= old_width
372
+
373
+ return False
374
+
375
+ def _get_type_change_severity(
376
+ self, old_type: str, new_type: str
377
+ ) -> SchemaChangeSeverity:
378
+ """Determine severity of a type change.
379
+
380
+ Args:
381
+ old_type: Old data type.
382
+ new_type: New data type.
383
+
384
+ Returns:
385
+ Severity level of the change.
386
+ """
387
+ old_norm = self._normalize_type(old_type)
388
+ new_norm = self._normalize_type(new_type)
389
+
390
+ # Same type = no change
391
+ if old_norm == new_norm:
392
+ return SchemaChangeSeverity.NON_BREAKING
393
+
394
+ # Compatible widening conversion
395
+ if self._is_type_compatible(old_type, new_type):
396
+ return SchemaChangeSeverity.NON_BREAKING
397
+
398
+ # Narrowing or incompatible conversion = breaking
399
+ return SchemaChangeSeverity.BREAKING
400
+
401
+ def _detect_column_changes(
402
+ self,
403
+ old_columns: dict[str, Any],
404
+ new_columns: dict[str, Any],
405
+ ) -> list[dict[str, Any]]:
406
+ """Detect changes between two column snapshots.
407
+
408
+ Args:
409
+ old_columns: Previous column definitions.
410
+ new_columns: New column definitions.
411
+
412
+ Returns:
413
+ List of detected changes.
414
+ """
415
+ changes = []
416
+ old_names = set(old_columns.keys())
417
+ new_names = set(new_columns.keys())
418
+
419
+ # Added columns (non-breaking if nullable, warning if not nullable)
420
+ for col_name in new_names - old_names:
421
+ col_def = new_columns[col_name]
422
+ is_nullable = col_def.get("nullable", True)
423
+ severity = (
424
+ SchemaChangeSeverity.NON_BREAKING
425
+ if is_nullable
426
+ else SchemaChangeSeverity.WARNING
427
+ )
428
+ changes.append({
429
+ "change_type": SchemaChangeType.COLUMN_ADDED.value,
430
+ "column_name": col_name,
431
+ "old_value": None,
432
+ "new_value": col_def.get("dtype"),
433
+ "severity": severity.value,
434
+ "details": {
435
+ "nullable": is_nullable,
436
+ "reason": (
437
+ "New column is nullable"
438
+ if is_nullable
439
+ else "New non-nullable column may require default value"
440
+ ),
441
+ },
442
+ })
443
+
444
+ # Removed columns (breaking change)
445
+ for col_name in old_names - new_names:
446
+ col_def = old_columns[col_name]
447
+ changes.append({
448
+ "change_type": SchemaChangeType.COLUMN_REMOVED.value,
449
+ "column_name": col_name,
450
+ "old_value": col_def.get("dtype"),
451
+ "new_value": None,
452
+ "severity": SchemaChangeSeverity.BREAKING.value,
453
+ "details": {
454
+ "reason": "Existing column removed - queries may fail",
455
+ },
456
+ })
457
+
458
+ # Check changes in common columns
459
+ for col_name in old_names & new_names:
460
+ old_def = old_columns[col_name]
461
+ new_def = new_columns[col_name]
462
+
463
+ old_type = old_def.get("dtype")
464
+ new_type = new_def.get("dtype")
465
+
466
+ # Type changes with compatibility analysis
467
+ if old_type != new_type:
468
+ severity = self._get_type_change_severity(old_type, new_type)
469
+ is_compatible = self._is_type_compatible(old_type, new_type)
470
+
471
+ changes.append({
472
+ "change_type": SchemaChangeType.TYPE_CHANGED.value,
473
+ "column_name": col_name,
474
+ "old_value": old_type,
475
+ "new_value": new_type,
476
+ "severity": severity.value,
477
+ "details": {
478
+ "is_compatible": is_compatible,
479
+ "old_type_normalized": self._normalize_type(old_type),
480
+ "new_type_normalized": self._normalize_type(new_type),
481
+ "reason": (
482
+ "Compatible type widening"
483
+ if is_compatible
484
+ else "Incompatible type change - data may be lost"
485
+ ),
486
+ },
487
+ })
488
+
489
+ # Nullable changes
490
+ old_nullable = old_def.get("nullable", True)
491
+ new_nullable = new_def.get("nullable", True)
492
+
493
+ if old_nullable != new_nullable:
494
+ # nullable -> non-nullable is breaking
495
+ # non-nullable -> nullable is safe
496
+ if old_nullable and not new_nullable:
497
+ severity = SchemaChangeSeverity.BREAKING
498
+ reason = "Column became non-nullable - existing nulls will cause errors"
499
+ else:
500
+ severity = SchemaChangeSeverity.NON_BREAKING
501
+ reason = "Column became nullable - no impact on existing data"
502
+
503
+ changes.append({
504
+ "change_type": SchemaChangeType.NULLABLE_CHANGED.value,
505
+ "column_name": col_name,
506
+ "old_value": old_nullable,
507
+ "new_value": new_nullable,
508
+ "severity": severity.value,
509
+ "details": {
510
+ "reason": reason,
511
+ },
512
+ })
513
+
514
+ # Unique constraint changes
515
+ old_unique = old_def.get("unique", False)
516
+ new_unique = new_def.get("unique", False)
517
+
518
+ if old_unique != new_unique:
519
+ if not old_unique and new_unique:
520
+ severity = SchemaChangeSeverity.WARNING
521
+ reason = "Unique constraint added - duplicates will be rejected"
522
+ else:
523
+ severity = SchemaChangeSeverity.NON_BREAKING
524
+ reason = "Unique constraint removed"
525
+
526
+ changes.append({
527
+ "change_type": SchemaChangeType.CONSTRAINT_CHANGED.value,
528
+ "column_name": col_name,
529
+ "old_value": f"unique={old_unique}",
530
+ "new_value": f"unique={new_unique}",
531
+ "severity": severity.value,
532
+ "details": {
533
+ "constraint_type": "unique",
534
+ "reason": reason,
535
+ },
536
+ })
537
+
538
+ return changes
539
+
540
+ async def create_version(
541
+ self,
542
+ source_id: str,
543
+ schema: Schema,
544
+ ) -> tuple[SchemaVersion, list[SchemaChange]]:
545
+ """Create a new schema version snapshot.
546
+
547
+ Args:
548
+ source_id: Source ID.
549
+ schema: Schema record.
550
+
551
+ Returns:
552
+ Tuple of (new version, list of changes).
553
+ """
554
+ schema_json = schema.schema_json or {}
555
+ schema_hash = self._compute_schema_hash(schema_json)
556
+
557
+ # Check if this exact schema already exists
558
+ existing = await self.version_repo.get_by_hash(source_id, schema_hash)
559
+ if existing:
560
+ return existing, []
561
+
562
+ # Get previous version
563
+ previous = await self.version_repo.get_latest_for_source(source_id)
564
+ next_version_number = await self.version_repo.get_next_version_number(
565
+ source_id
566
+ )
567
+
568
+ # Create column snapshot
569
+ column_snapshot = self._extract_column_snapshot(schema_json)
570
+
571
+ # Create new version
572
+ new_version = await self.version_repo.create(
573
+ source_id=source_id,
574
+ schema_id=schema.id,
575
+ version_number=next_version_number,
576
+ schema_hash=schema_hash,
577
+ column_snapshot=column_snapshot,
578
+ )
579
+
580
+ # Detect changes
581
+ changes = []
582
+ if previous:
583
+ detected = self._detect_column_changes(
584
+ previous.column_snapshot,
585
+ column_snapshot,
586
+ )
587
+ for change_data in detected:
588
+ change = await self.change_repo.create(
589
+ source_id=source_id,
590
+ from_version_id=previous.id,
591
+ to_version_id=new_version.id,
592
+ **change_data,
593
+ )
594
+ changes.append(change)
595
+
596
+ await self.session.commit()
597
+ return new_version, changes
598
+
599
+ async def _send_schema_change_notification(
600
+ self,
601
+ source: Source,
602
+ from_version: int | None,
603
+ to_version: int,
604
+ total_changes: int,
605
+ breaking_changes: int,
606
+ changes: list[SchemaChange],
607
+ ) -> None:
608
+ """Send notification for schema changes.
609
+
610
+ Args:
611
+ source: Source record.
612
+ from_version: Previous version number (None if first version).
613
+ to_version: New version number.
614
+ total_changes: Total number of changes.
615
+ breaking_changes: Number of breaking changes.
616
+ changes: List of change records.
617
+ """
618
+ if not self._enable_notifications:
619
+ return
620
+
621
+ try:
622
+ # Import here to avoid circular imports
623
+ from truthound_dashboard.core.notifications.dispatcher import (
624
+ create_dispatcher,
625
+ )
626
+
627
+ dispatcher = create_dispatcher(self.session)
628
+
629
+ # Format changes for notification
630
+ change_details = [
631
+ {
632
+ "type": c.change_type,
633
+ "column": c.column_name,
634
+ "old_value": c.old_value,
635
+ "new_value": c.new_value,
636
+ "breaking": c.is_breaking,
637
+ }
638
+ for c in changes[:10] # Limit to first 10 for notification
639
+ ]
640
+
641
+ results = await dispatcher.notify_schema_changed(
642
+ source_id=source.id,
643
+ source_name=source.name,
644
+ from_version=from_version,
645
+ to_version=to_version,
646
+ total_changes=total_changes,
647
+ breaking_changes=breaking_changes,
648
+ changes=change_details,
649
+ )
650
+
651
+ if results:
652
+ successful = sum(1 for r in results if r.success)
653
+ logger.info(
654
+ f"Schema change notification sent: {successful}/{len(results)} "
655
+ f"channels (source={source.name}, changes={total_changes})"
656
+ )
657
+ except Exception as e:
658
+ logger.warning(f"Failed to send schema change notification: {e}")
659
+
660
+ async def detect_changes(
661
+ self,
662
+ source: Source,
663
+ schema: Schema,
664
+ *,
665
+ notify: bool = True,
666
+ ) -> SchemaEvolutionResponse:
667
+ """Detect schema changes for a source.
668
+
669
+ Automatically sends notifications when changes are detected
670
+ (if notifications are enabled and notify=True).
671
+
672
+ Args:
673
+ source: Source record.
674
+ schema: Current schema.
675
+ notify: Whether to send notification for this detection.
676
+
677
+ Returns:
678
+ Evolution detection response.
679
+ """
680
+ new_version, changes = await self.create_version(source.id, schema)
681
+
682
+ # Get previous version info
683
+ versions = await self.version_repo.get_for_source(source.id, limit=2)
684
+ from_version = versions[1].version_number if len(versions) > 1 else None
685
+
686
+ # Convert changes to response format
687
+ change_responses = [
688
+ SchemaChangeResponse(
689
+ id=c.id,
690
+ source_id=c.source_id,
691
+ from_version_id=c.from_version_id,
692
+ to_version_id=c.to_version_id,
693
+ change_type=SchemaChangeType(c.change_type),
694
+ column_name=c.column_name,
695
+ old_value=c.old_value,
696
+ new_value=c.new_value,
697
+ severity=SchemaChangeSeverity(c.severity),
698
+ description=c.description,
699
+ created_at=c.created_at,
700
+ )
701
+ for c in changes
702
+ ]
703
+
704
+ breaking_count = sum(1 for c in changes if c.is_breaking)
705
+
706
+ # Send notification if changes were detected
707
+ if changes and notify:
708
+ await self._send_schema_change_notification(
709
+ source=source,
710
+ from_version=from_version,
711
+ to_version=new_version.version_number,
712
+ total_changes=len(changes),
713
+ breaking_changes=breaking_count,
714
+ changes=changes,
715
+ )
716
+
717
+ return SchemaEvolutionResponse(
718
+ source_id=source.id,
719
+ source_name=source.name,
720
+ from_version=from_version,
721
+ to_version=new_version.version_number,
722
+ has_changes=len(changes) > 0,
723
+ total_changes=len(changes),
724
+ breaking_changes=breaking_count,
725
+ changes=change_responses,
726
+ detected_at=datetime.utcnow(),
727
+ )
728
+
729
+ async def get_version_history(
730
+ self,
731
+ source_id: str,
732
+ *,
733
+ limit: int = 20,
734
+ offset: int = 0,
735
+ ) -> list[SchemaVersionSummary]:
736
+ """Get schema version history for a source.
737
+
738
+ Args:
739
+ source_id: Source ID.
740
+ limit: Maximum to return.
741
+ offset: Number to skip.
742
+
743
+ Returns:
744
+ List of version summaries.
745
+ """
746
+ versions = await self.version_repo.get_for_source(
747
+ source_id, limit=limit, offset=offset
748
+ )
749
+ return [
750
+ SchemaVersionSummary(
751
+ id=v.id,
752
+ version_number=v.version_number,
753
+ column_count=v.column_count,
754
+ created_at=v.created_at,
755
+ )
756
+ for v in versions
757
+ ]
758
+
759
+ async def get_version(self, version_id: str) -> SchemaVersionResponse | None:
760
+ """Get a specific schema version.
761
+
762
+ Args:
763
+ version_id: Version ID.
764
+
765
+ Returns:
766
+ Version response or None.
767
+ """
768
+ version = await self.version_repo.get_by_id(version_id)
769
+ if not version:
770
+ return None
771
+
772
+ return SchemaVersionResponse(
773
+ id=version.id,
774
+ source_id=version.source_id,
775
+ schema_id=version.schema_id,
776
+ version_number=version.version_number,
777
+ column_count=version.column_count,
778
+ columns=version.column_names,
779
+ schema_hash=version.schema_hash,
780
+ column_snapshot=version.column_snapshot,
781
+ created_at=version.created_at,
782
+ updated_at=version.updated_at,
783
+ )
784
+
785
+ async def get_changes(
786
+ self,
787
+ source_id: str,
788
+ *,
789
+ limit: int = 50,
790
+ offset: int = 0,
791
+ ) -> list[SchemaChangeResponse]:
792
+ """Get schema changes for a source.
793
+
794
+ Args:
795
+ source_id: Source ID.
796
+ limit: Maximum to return.
797
+ offset: Number to skip.
798
+
799
+ Returns:
800
+ List of change responses.
801
+ """
802
+ changes = await self.change_repo.get_for_source(
803
+ source_id, limit=limit, offset=offset
804
+ )
805
+ return [
806
+ SchemaChangeResponse(
807
+ id=c.id,
808
+ source_id=c.source_id,
809
+ from_version_id=c.from_version_id,
810
+ to_version_id=c.to_version_id,
811
+ change_type=SchemaChangeType(c.change_type),
812
+ column_name=c.column_name,
813
+ old_value=c.old_value,
814
+ new_value=c.new_value,
815
+ severity=SchemaChangeSeverity(c.severity),
816
+ description=c.description,
817
+ created_at=c.created_at,
818
+ )
819
+ for c in changes
820
+ ]
821
+
822
+ async def get_evolution_summary(
823
+ self, source_id: str
824
+ ) -> SchemaEvolutionSummary:
825
+ """Get evolution summary for a source.
826
+
827
+ Args:
828
+ source_id: Source ID.
829
+
830
+ Returns:
831
+ Evolution summary.
832
+ """
833
+ # Get version count
834
+ versions = await self.version_repo.get_for_source(source_id, limit=1)
835
+ latest_version = versions[0] if versions else None
836
+
837
+ total_versions = 0
838
+ if latest_version:
839
+ total_versions = latest_version.version_number
840
+
841
+ # Get change counts
842
+ changes = await self.change_repo.get_for_source(source_id, limit=1000)
843
+ total_changes = len(changes)
844
+ breaking_changes = await self.change_repo.count_breaking_changes(source_id)
845
+
846
+ # Get last change timestamp
847
+ last_change_at = None
848
+ if changes:
849
+ last_change_at = changes[0].created_at
850
+
851
+ return SchemaEvolutionSummary(
852
+ source_id=source_id,
853
+ current_version=latest_version.version_number if latest_version else 0,
854
+ total_versions=total_versions,
855
+ total_changes=total_changes,
856
+ breaking_changes=breaking_changes,
857
+ last_change_at=last_change_at,
858
+ )