truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,601 @@
1
+ """Profile comparison service.
2
+
3
+ This module provides functionality for comparing profiles
4
+ over time, including time-series trends and version comparison.
5
+
6
+ Features:
7
+ - Profile history listing
8
+ - Two-profile comparison with statistical significance tests
9
+ - Latest comparison (current vs previous)
10
+ - Time-series trend analysis with significance testing
11
+ - Column-level trend tracking
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Sequence
17
+ from datetime import datetime, timedelta
18
+ from typing import Any
19
+
20
+ from sqlalchemy import select
21
+ from sqlalchemy.ext.asyncio import AsyncSession
22
+
23
+ from truthound_dashboard.db import Profile, Source
24
+ from truthound_dashboard.core.services import ProfileRepository
25
+ from truthound_dashboard.core.statistics import (
26
+ StatisticalTestResult,
27
+ comprehensive_comparison,
28
+ trend_significance_test,
29
+ SignificanceLevel,
30
+ )
31
+ from truthound_dashboard.schemas.profile_comparison import (
32
+ ColumnComparison,
33
+ ColumnTrend,
34
+ LatestComparisonResponse,
35
+ ProfileComparisonResponse,
36
+ ProfileSummary,
37
+ ProfileTrendPoint,
38
+ ProfileTrendResponse,
39
+ TrendDirection,
40
+ )
41
+
42
+
43
+ def _parse_percentage(value: str | None) -> float:
44
+ """Parse percentage string to float.
45
+
46
+ Args:
47
+ value: Percentage string like "25.5%".
48
+
49
+ Returns:
50
+ Float value (0.0-100.0).
51
+ """
52
+ if not value:
53
+ return 0.0
54
+ try:
55
+ return float(value.replace("%", ""))
56
+ except (ValueError, AttributeError):
57
+ return 0.0
58
+
59
+
60
+ def _calculate_change(baseline: float, current: float) -> tuple[float, float | None]:
61
+ """Calculate absolute and percentage change.
62
+
63
+ Args:
64
+ baseline: Baseline value.
65
+ current: Current value.
66
+
67
+ Returns:
68
+ Tuple of (absolute_change, percentage_change).
69
+ """
70
+ change = current - baseline
71
+ if baseline != 0:
72
+ change_pct = (change / baseline) * 100
73
+ else:
74
+ change_pct = None
75
+ return change, change_pct
76
+
77
+
78
+ def _determine_trend(change: float, threshold: float = 0.1) -> TrendDirection:
79
+ """Determine trend direction based on change.
80
+
81
+ Args:
82
+ change: Change value (or percentage).
83
+ threshold: Threshold for significant change.
84
+
85
+ Returns:
86
+ Trend direction.
87
+ """
88
+ if abs(change) < threshold:
89
+ return TrendDirection.STABLE
90
+ return TrendDirection.UP if change > 0 else TrendDirection.DOWN
91
+
92
+
93
+ class ProfileComparisonService:
94
+ """Service for profile comparison and trend analysis."""
95
+
96
+ def __init__(self, session: AsyncSession):
97
+ """Initialize service.
98
+
99
+ Args:
100
+ session: Database session.
101
+ """
102
+ self.session = session
103
+ self.profile_repo = ProfileRepository(session)
104
+
105
+ async def list_profiles(
106
+ self,
107
+ source_id: str,
108
+ *,
109
+ limit: int = 20,
110
+ offset: int = 0,
111
+ ) -> list[ProfileSummary]:
112
+ """List profile history for a source.
113
+
114
+ Args:
115
+ source_id: Source ID.
116
+ limit: Maximum to return.
117
+ offset: Number to skip.
118
+
119
+ Returns:
120
+ List of profile summaries.
121
+ """
122
+ profiles = await self.profile_repo.get_for_source(
123
+ source_id, limit=limit, offset=offset
124
+ )
125
+ return [
126
+ ProfileSummary(
127
+ id=p.id,
128
+ source_id=p.source_id,
129
+ row_count=p.row_count or 0,
130
+ column_count=p.column_count or 0,
131
+ size_bytes=p.size_bytes or 0,
132
+ created_at=p.created_at,
133
+ )
134
+ for p in profiles
135
+ ]
136
+
137
+ def _compare_columns(
138
+ self,
139
+ baseline_cols: list[dict[str, Any]],
140
+ current_cols: list[dict[str, Any]],
141
+ significance_threshold: float = 0.1,
142
+ use_statistical_test: bool = True,
143
+ ) -> list[ColumnComparison]:
144
+ """Compare column statistics between two profiles.
145
+
146
+ Args:
147
+ baseline_cols: Baseline column profiles.
148
+ current_cols: Current column profiles.
149
+ significance_threshold: Threshold for significant change.
150
+ use_statistical_test: Whether to use statistical significance tests.
151
+
152
+ Returns:
153
+ List of column comparisons.
154
+ """
155
+ comparisons = []
156
+
157
+ # Create lookup by column name
158
+ baseline_map = {c.get("name"): c for c in baseline_cols}
159
+ current_map = {c.get("name"): c for c in current_cols}
160
+
161
+ # Compare columns present in both
162
+ common_cols = set(baseline_map.keys()) & set(current_map.keys())
163
+
164
+ for col_name in common_cols:
165
+ baseline = baseline_map[col_name]
166
+ current = current_map[col_name]
167
+
168
+ # Compare null_pct
169
+ baseline_null = _parse_percentage(baseline.get("null_pct"))
170
+ current_null = _parse_percentage(current.get("null_pct"))
171
+ null_change, null_change_pct = _calculate_change(baseline_null, current_null)
172
+ is_null_significant = abs(null_change) >= significance_threshold * 100
173
+
174
+ # Statistical test details for null_pct
175
+ stat_test_result = None
176
+ if use_statistical_test:
177
+ # Use sample data if available for statistical test
178
+ baseline_samples = baseline.get("samples", [])
179
+ current_samples = current.get("samples", [])
180
+ if baseline_samples and current_samples:
181
+ stat_test_result = self._run_statistical_test(
182
+ baseline_samples, current_samples
183
+ )
184
+ is_null_significant = stat_test_result.get("is_significant", is_null_significant)
185
+
186
+ comparisons.append(
187
+ ColumnComparison(
188
+ column=col_name,
189
+ metric="null_pct",
190
+ baseline_value=baseline_null,
191
+ current_value=current_null,
192
+ change=null_change,
193
+ change_pct=null_change_pct,
194
+ is_significant=is_null_significant,
195
+ trend=_determine_trend(null_change, significance_threshold * 100),
196
+ statistical_test=stat_test_result,
197
+ )
198
+ )
199
+
200
+ # Compare unique_pct
201
+ baseline_unique = _parse_percentage(baseline.get("unique_pct"))
202
+ current_unique = _parse_percentage(current.get("unique_pct"))
203
+ unique_change, unique_change_pct = _calculate_change(
204
+ baseline_unique, current_unique
205
+ )
206
+ is_unique_significant = abs(unique_change) >= significance_threshold * 100
207
+
208
+ comparisons.append(
209
+ ColumnComparison(
210
+ column=col_name,
211
+ metric="unique_pct",
212
+ baseline_value=baseline_unique,
213
+ current_value=current_unique,
214
+ change=unique_change,
215
+ change_pct=unique_change_pct,
216
+ is_significant=is_unique_significant,
217
+ trend=_determine_trend(unique_change, significance_threshold * 100),
218
+ )
219
+ )
220
+
221
+ # Compare distinct_count if available
222
+ baseline_distinct = baseline.get("distinct_count")
223
+ current_distinct = current.get("distinct_count")
224
+ if baseline_distinct is not None and current_distinct is not None:
225
+ distinct_change, distinct_change_pct = _calculate_change(
226
+ float(baseline_distinct), float(current_distinct)
227
+ )
228
+ is_distinct_significant = (
229
+ distinct_change_pct is not None
230
+ and abs(distinct_change_pct) >= significance_threshold * 100
231
+ )
232
+
233
+ comparisons.append(
234
+ ColumnComparison(
235
+ column=col_name,
236
+ metric="distinct_count",
237
+ baseline_value=baseline_distinct,
238
+ current_value=current_distinct,
239
+ change=distinct_change,
240
+ change_pct=distinct_change_pct,
241
+ is_significant=is_distinct_significant,
242
+ trend=_determine_trend(
243
+ distinct_change_pct or 0, significance_threshold * 100
244
+ ),
245
+ )
246
+ )
247
+
248
+ # Compare numeric statistics if available (mean, std, min, max)
249
+ for stat_name in ["mean", "std", "min", "max"]:
250
+ baseline_val = baseline.get(stat_name)
251
+ current_val = current.get(stat_name)
252
+ if baseline_val is not None and current_val is not None:
253
+ try:
254
+ b_val = float(baseline_val)
255
+ c_val = float(current_val)
256
+ change, change_pct = _calculate_change(b_val, c_val)
257
+ is_sig = (
258
+ change_pct is not None
259
+ and abs(change_pct) >= significance_threshold * 100
260
+ )
261
+
262
+ comparisons.append(
263
+ ColumnComparison(
264
+ column=col_name,
265
+ metric=stat_name,
266
+ baseline_value=b_val,
267
+ current_value=c_val,
268
+ change=change,
269
+ change_pct=change_pct,
270
+ is_significant=is_sig,
271
+ trend=_determine_trend(
272
+ change_pct or 0, significance_threshold * 100
273
+ ),
274
+ )
275
+ )
276
+ except (ValueError, TypeError):
277
+ pass
278
+
279
+ return comparisons
280
+
281
+ def _run_statistical_test(
282
+ self,
283
+ baseline_values: list[float],
284
+ current_values: list[float],
285
+ ) -> dict[str, Any]:
286
+ """Run statistical significance test on sample data.
287
+
288
+ Args:
289
+ baseline_values: Baseline sample values.
290
+ current_values: Current sample values.
291
+
292
+ Returns:
293
+ Dictionary with test results.
294
+ """
295
+ try:
296
+ result = comprehensive_comparison(baseline_values, current_values)
297
+ return {
298
+ "test_name": result.recommended_test,
299
+ "p_value": result.t_test.p_value if "t-test" in result.recommended_test.lower() else result.mann_whitney.p_value,
300
+ "is_significant": result.overall_significant,
301
+ "effect_size": result.t_test.effect_size or result.mann_whitney.effect_size,
302
+ "interpretation": result.summary,
303
+ }
304
+ except Exception:
305
+ return {}
306
+
307
+ async def compare_profiles(
308
+ self,
309
+ source: Source,
310
+ baseline_profile_id: str,
311
+ current_profile_id: str,
312
+ *,
313
+ significance_threshold: float = 0.1,
314
+ ) -> ProfileComparisonResponse:
315
+ """Compare two specific profiles.
316
+
317
+ Args:
318
+ source: Source record.
319
+ baseline_profile_id: Baseline profile ID.
320
+ current_profile_id: Current profile ID.
321
+ significance_threshold: Threshold for significant changes.
322
+
323
+ Returns:
324
+ Profile comparison response.
325
+ """
326
+ # Load profiles
327
+ baseline = await self.profile_repo.get_by_id(baseline_profile_id)
328
+ current = await self.profile_repo.get_by_id(current_profile_id)
329
+
330
+ if not baseline or not current:
331
+ raise ValueError("One or both profiles not found")
332
+
333
+ # Get row count changes
334
+ baseline_rows = baseline.row_count or 0
335
+ current_rows = current.row_count or 0
336
+ row_change, row_change_pct = _calculate_change(
337
+ float(baseline_rows), float(current_rows)
338
+ )
339
+
340
+ # Get column count changes
341
+ baseline_cols = baseline.column_count or 0
342
+ current_cols = current.column_count or 0
343
+ col_change = current_cols - baseline_cols
344
+
345
+ # Compare columns
346
+ baseline_columns = baseline.columns if hasattr(baseline, "columns") else []
347
+ current_columns = current.columns if hasattr(current, "columns") else []
348
+
349
+ if not baseline_columns and baseline.profile_json:
350
+ baseline_columns = baseline.profile_json.get("columns", [])
351
+ if not current_columns and current.profile_json:
352
+ current_columns = current.profile_json.get("columns", [])
353
+
354
+ column_comparisons = self._compare_columns(
355
+ baseline_columns, current_columns, significance_threshold
356
+ )
357
+
358
+ # Count significant changes
359
+ significant_count = sum(1 for c in column_comparisons if c.is_significant)
360
+
361
+ # Build summary
362
+ summary = {
363
+ "baseline_date": baseline.created_at.isoformat(),
364
+ "current_date": current.created_at.isoformat(),
365
+ "time_diff_hours": (
366
+ current.created_at - baseline.created_at
367
+ ).total_seconds() / 3600,
368
+ "columns_compared": len(set(c.column for c in column_comparisons)),
369
+ }
370
+
371
+ return ProfileComparisonResponse(
372
+ source_id=source.id,
373
+ source_name=source.name,
374
+ baseline_profile_id=baseline_profile_id,
375
+ current_profile_id=current_profile_id,
376
+ baseline_timestamp=baseline.created_at,
377
+ current_timestamp=current.created_at,
378
+ row_count_change=int(row_change),
379
+ row_count_change_pct=row_change_pct or 0.0,
380
+ column_count_change=col_change,
381
+ column_comparisons=column_comparisons,
382
+ significant_changes=significant_count,
383
+ summary=summary,
384
+ compared_at=datetime.utcnow(),
385
+ )
386
+
387
+ async def get_latest_comparison(
388
+ self,
389
+ source: Source,
390
+ ) -> LatestComparisonResponse:
391
+ """Compare latest profile with previous one.
392
+
393
+ Args:
394
+ source: Source record.
395
+
396
+ Returns:
397
+ Latest comparison response.
398
+ """
399
+ # Get last two profiles
400
+ profiles = await self.profile_repo.get_for_source(source.id, limit=2)
401
+
402
+ if len(profiles) < 2:
403
+ return LatestComparisonResponse(
404
+ source_id=source.id,
405
+ has_previous=False,
406
+ comparison=None,
407
+ )
408
+
409
+ current = profiles[0]
410
+ baseline = profiles[1]
411
+
412
+ comparison = await self.compare_profiles(
413
+ source, baseline.id, current.id
414
+ )
415
+
416
+ return LatestComparisonResponse(
417
+ source_id=source.id,
418
+ has_previous=True,
419
+ comparison=comparison,
420
+ )
421
+
422
+ def _parse_period(self, period: str) -> timedelta:
423
+ """Parse period string to timedelta.
424
+
425
+ Args:
426
+ period: Period string like "30d", "7d", "90d".
427
+
428
+ Returns:
429
+ Timedelta object.
430
+ """
431
+ if period.endswith("d"):
432
+ days = int(period[:-1])
433
+ return timedelta(days=days)
434
+ elif period.endswith("w"):
435
+ weeks = int(period[:-1])
436
+ return timedelta(weeks=weeks)
437
+ elif period.endswith("h"):
438
+ hours = int(period[:-1])
439
+ return timedelta(hours=hours)
440
+ else:
441
+ # Default to days
442
+ return timedelta(days=int(period))
443
+
444
+ async def get_profile_trend(
445
+ self,
446
+ source: Source,
447
+ *,
448
+ period: str = "30d",
449
+ granularity: str = "daily",
450
+ ) -> ProfileTrendResponse:
451
+ """Get time-series profile trends.
452
+
453
+ Args:
454
+ source: Source record.
455
+ period: Time period (e.g., "7d", "30d", "90d").
456
+ granularity: Data granularity (hourly, daily, weekly).
457
+
458
+ Returns:
459
+ Profile trend response.
460
+ """
461
+ # Calculate time range
462
+ period_delta = self._parse_period(period)
463
+ start_time = datetime.utcnow() - period_delta
464
+
465
+ # Get profiles within period
466
+ profiles = await self.profile_repo.get_for_source(
467
+ source.id, limit=1000
468
+ )
469
+ profiles = [p for p in profiles if p.created_at >= start_time]
470
+ profiles.sort(key=lambda p: p.created_at)
471
+
472
+ # Build trend points
473
+ data_points: list[ProfileTrendPoint] = []
474
+ for profile in profiles:
475
+ columns = profile.columns if hasattr(profile, "columns") else []
476
+ if not columns and profile.profile_json:
477
+ columns = profile.profile_json.get("columns", [])
478
+
479
+ # Calculate averages
480
+ null_pcts = [_parse_percentage(c.get("null_pct")) for c in columns]
481
+ unique_pcts = [_parse_percentage(c.get("unique_pct")) for c in columns]
482
+
483
+ avg_null = sum(null_pcts) / len(null_pcts) if null_pcts else 0.0
484
+ avg_unique = sum(unique_pcts) / len(unique_pcts) if unique_pcts else 0.0
485
+
486
+ data_points.append(
487
+ ProfileTrendPoint(
488
+ timestamp=profile.created_at,
489
+ profile_id=profile.id,
490
+ row_count=profile.row_count or 0,
491
+ column_count=profile.column_count or 0,
492
+ avg_null_pct=round(avg_null, 2),
493
+ avg_unique_pct=round(avg_unique, 2),
494
+ size_bytes=profile.size_bytes or 0,
495
+ )
496
+ )
497
+
498
+ # Build column trends
499
+ column_trends: list[ColumnTrend] = []
500
+ if len(profiles) >= 2:
501
+ # Get all unique column names
502
+ all_columns = set()
503
+ for profile in profiles:
504
+ columns = profile.columns if hasattr(profile, "columns") else []
505
+ if not columns and profile.profile_json:
506
+ columns = profile.profile_json.get("columns", [])
507
+ for col in columns:
508
+ all_columns.add(col.get("name", ""))
509
+
510
+ # Build trend for top columns by null_pct
511
+ for col_name in list(all_columns)[:10]:
512
+ null_values: list[tuple[datetime, float]] = []
513
+
514
+ for profile in profiles:
515
+ columns = profile.columns if hasattr(profile, "columns") else []
516
+ if not columns and profile.profile_json:
517
+ columns = profile.profile_json.get("columns", [])
518
+
519
+ for col in columns:
520
+ if col.get("name") == col_name:
521
+ null_pct = _parse_percentage(col.get("null_pct"))
522
+ null_values.append((profile.created_at, null_pct))
523
+
524
+ if len(null_values) >= 2:
525
+ first_val = null_values[0][1]
526
+ last_val = null_values[-1][1]
527
+ change = last_val - first_val
528
+ change_pct = (change / first_val * 100) if first_val != 0 else 0
529
+
530
+ column_trends.append(
531
+ ColumnTrend(
532
+ column=col_name,
533
+ metric="null_pct",
534
+ values=null_values,
535
+ trend_direction=_determine_trend(change, 1.0),
536
+ change_pct=round(change_pct, 2),
537
+ min_value=min(v[1] for v in null_values),
538
+ max_value=max(v[1] for v in null_values),
539
+ avg_value=sum(v[1] for v in null_values) / len(null_values),
540
+ )
541
+ )
542
+
543
+ # Determine overall row count trend with statistical significance
544
+ row_count_trend = TrendDirection.STABLE
545
+ row_count_significance = None
546
+ if len(data_points) >= 3:
547
+ row_counts = [p.row_count for p in data_points]
548
+ first_rows = data_points[0].row_count
549
+ last_rows = data_points[-1].row_count
550
+
551
+ if first_rows > 0:
552
+ row_pct_change = (last_rows - first_rows) / first_rows * 100
553
+ row_count_trend = _determine_trend(row_pct_change, 5.0)
554
+
555
+ # Run trend significance test
556
+ try:
557
+ trend_result = trend_significance_test(row_counts)
558
+ row_count_significance = {
559
+ "p_value": round(trend_result.p_value, 4),
560
+ "is_significant": trend_result.is_significant,
561
+ "slope": trend_result.effect_size,
562
+ "interpretation": trend_result.interpretation,
563
+ }
564
+ except Exception:
565
+ pass
566
+ elif len(data_points) >= 2:
567
+ first_rows = data_points[0].row_count
568
+ last_rows = data_points[-1].row_count
569
+ if first_rows > 0:
570
+ row_pct_change = (last_rows - first_rows) / first_rows * 100
571
+ row_count_trend = _determine_trend(row_pct_change, 5.0)
572
+
573
+ # Build summary
574
+ summary = {
575
+ "start_date": start_time.isoformat(),
576
+ "end_date": datetime.utcnow().isoformat(),
577
+ "profile_count": len(data_points),
578
+ }
579
+
580
+ if data_points:
581
+ summary["min_rows"] = min(p.row_count for p in data_points)
582
+ summary["max_rows"] = max(p.row_count for p in data_points)
583
+ summary["avg_rows"] = int(
584
+ sum(p.row_count for p in data_points) / len(data_points)
585
+ )
586
+
587
+ # Add trend significance info
588
+ if row_count_significance:
589
+ summary["row_count_trend_significance"] = row_count_significance
590
+
591
+ return ProfileTrendResponse(
592
+ source_id=source.id,
593
+ source_name=source.name,
594
+ period=period,
595
+ granularity=granularity,
596
+ data_points=data_points,
597
+ column_trends=column_trends,
598
+ total_profiles=len(data_points),
599
+ row_count_trend=row_count_trend,
600
+ summary=summary,
601
+ )