truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +645 -23
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +15 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""Storage action implementations.
|
|
2
|
+
|
|
3
|
+
Provides actions for storing validation results to various backends:
|
|
4
|
+
- Local filesystem
|
|
5
|
+
- Amazon S3
|
|
6
|
+
- Google Cloud Storage
|
|
7
|
+
|
|
8
|
+
These actions persist validation results for historical analysis,
|
|
9
|
+
compliance, and audit purposes.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from truthound_dashboard.core.interfaces.actions import (
|
|
22
|
+
ActionConfig,
|
|
23
|
+
ActionContext,
|
|
24
|
+
ActionResult,
|
|
25
|
+
ActionStatus,
|
|
26
|
+
BaseAction,
|
|
27
|
+
NotifyCondition,
|
|
28
|
+
register_action,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# =============================================================================
|
|
35
|
+
# File Storage Action
|
|
36
|
+
# =============================================================================
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class FileStorageConfig(ActionConfig):
|
|
41
|
+
"""Configuration for file storage action.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
base_path: Base directory for storage.
|
|
45
|
+
file_format: Output format (json, csv, parquet).
|
|
46
|
+
include_issues: Include detailed issues.
|
|
47
|
+
create_dirs: Create directories if missing.
|
|
48
|
+
filename_template: Template for filename.
|
|
49
|
+
compress: Compress output files.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
base_path: str = "./validation_results"
|
|
53
|
+
file_format: str = "json"
|
|
54
|
+
include_issues: bool = True
|
|
55
|
+
create_dirs: bool = True
|
|
56
|
+
filename_template: str = "{checkpoint_name}_{run_id}.{format}"
|
|
57
|
+
compress: bool = False
|
|
58
|
+
|
|
59
|
+
def __post_init__(self):
|
|
60
|
+
self.name = self.name or "file_storage"
|
|
61
|
+
self.notify_on = NotifyCondition.ALWAYS
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@register_action("file_storage")
|
|
65
|
+
class FileStorageAction(BaseAction):
|
|
66
|
+
"""Store validation results to local filesystem.
|
|
67
|
+
|
|
68
|
+
Saves results as JSON, CSV, or Parquet files for historical tracking.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
action = FileStorageAction(
|
|
72
|
+
base_path="/data/validations",
|
|
73
|
+
file_format="json",
|
|
74
|
+
)
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
base_path: str = "./validation_results",
|
|
80
|
+
file_format: str = "json",
|
|
81
|
+
config: FileStorageConfig | dict[str, Any] | None = None,
|
|
82
|
+
**kwargs: Any,
|
|
83
|
+
) -> None:
|
|
84
|
+
if config is None:
|
|
85
|
+
config = FileStorageConfig(
|
|
86
|
+
base_path=base_path,
|
|
87
|
+
file_format=file_format,
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
90
|
+
elif isinstance(config, dict):
|
|
91
|
+
config = FileStorageConfig(**config)
|
|
92
|
+
|
|
93
|
+
super().__init__(config)
|
|
94
|
+
self._storage_config: FileStorageConfig = config
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def action_type(self) -> str:
|
|
98
|
+
return "storage"
|
|
99
|
+
|
|
100
|
+
def _do_execute(self, context: ActionContext) -> ActionResult:
|
|
101
|
+
"""Store result to file."""
|
|
102
|
+
result = context.checkpoint_result
|
|
103
|
+
|
|
104
|
+
# Build file path
|
|
105
|
+
filename = self._storage_config.filename_template.format(
|
|
106
|
+
checkpoint_name=result.checkpoint_name.replace(" ", "_"),
|
|
107
|
+
run_id=result.run_id,
|
|
108
|
+
format=self._storage_config.file_format,
|
|
109
|
+
date=datetime.now().strftime("%Y%m%d"),
|
|
110
|
+
timestamp=datetime.now().strftime("%Y%m%d_%H%M%S"),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
base_path = Path(self._storage_config.base_path)
|
|
114
|
+
if self._storage_config.create_dirs:
|
|
115
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
|
|
117
|
+
file_path = base_path / filename
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
# Build data to store
|
|
121
|
+
data = result.to_dict()
|
|
122
|
+
if not self._storage_config.include_issues:
|
|
123
|
+
data.pop("issues", None)
|
|
124
|
+
|
|
125
|
+
# Write based on format
|
|
126
|
+
if self._storage_config.file_format == "json":
|
|
127
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
128
|
+
json.dump(data, f, indent=2, default=str)
|
|
129
|
+
elif self._storage_config.file_format == "csv":
|
|
130
|
+
self._write_csv(file_path, data)
|
|
131
|
+
elif self._storage_config.file_format == "parquet":
|
|
132
|
+
self._write_parquet(file_path, data)
|
|
133
|
+
else:
|
|
134
|
+
raise ValueError(f"Unsupported format: {self._storage_config.file_format}")
|
|
135
|
+
|
|
136
|
+
return ActionResult(
|
|
137
|
+
action_name=self.name,
|
|
138
|
+
action_type=self.action_type,
|
|
139
|
+
status=ActionStatus.SUCCESS,
|
|
140
|
+
message=f"Result stored to {file_path}",
|
|
141
|
+
details={"file_path": str(file_path), "format": self._storage_config.file_format},
|
|
142
|
+
)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
return ActionResult(
|
|
145
|
+
action_name=self.name,
|
|
146
|
+
action_type=self.action_type,
|
|
147
|
+
status=ActionStatus.FAILURE,
|
|
148
|
+
message=f"Failed to store result: {str(e)}",
|
|
149
|
+
error=str(e),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def _write_csv(self, path: Path, data: dict[str, Any]) -> None:
|
|
153
|
+
"""Write data as CSV."""
|
|
154
|
+
import csv
|
|
155
|
+
|
|
156
|
+
# Flatten issues for CSV
|
|
157
|
+
rows = []
|
|
158
|
+
for issue in data.get("issues", []):
|
|
159
|
+
row = {
|
|
160
|
+
"run_id": data["run_id"],
|
|
161
|
+
"checkpoint_name": data["checkpoint_name"],
|
|
162
|
+
"source_name": data["source_name"],
|
|
163
|
+
"status": data["status"],
|
|
164
|
+
"column": issue.get("column", ""),
|
|
165
|
+
"issue_type": issue.get("issue_type", ""),
|
|
166
|
+
"count": issue.get("count", 0),
|
|
167
|
+
"severity": issue.get("severity", ""),
|
|
168
|
+
}
|
|
169
|
+
rows.append(row)
|
|
170
|
+
|
|
171
|
+
if rows:
|
|
172
|
+
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
173
|
+
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
|
|
174
|
+
writer.writeheader()
|
|
175
|
+
writer.writerows(rows)
|
|
176
|
+
else:
|
|
177
|
+
# Write summary if no issues
|
|
178
|
+
summary_row = {
|
|
179
|
+
"run_id": data["run_id"],
|
|
180
|
+
"checkpoint_name": data["checkpoint_name"],
|
|
181
|
+
"source_name": data["source_name"],
|
|
182
|
+
"status": data["status"],
|
|
183
|
+
"row_count": data["row_count"],
|
|
184
|
+
"issue_count": data["issue_count"],
|
|
185
|
+
}
|
|
186
|
+
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
187
|
+
writer = csv.DictWriter(f, fieldnames=summary_row.keys())
|
|
188
|
+
writer.writeheader()
|
|
189
|
+
writer.writerow(summary_row)
|
|
190
|
+
|
|
191
|
+
def _write_parquet(self, path: Path, data: dict[str, Any]) -> None:
|
|
192
|
+
"""Write data as Parquet using Polars."""
|
|
193
|
+
import polars as pl
|
|
194
|
+
|
|
195
|
+
# Convert to DataFrame
|
|
196
|
+
issues = data.get("issues", [])
|
|
197
|
+
if issues:
|
|
198
|
+
df = pl.DataFrame(issues)
|
|
199
|
+
df = df.with_columns([
|
|
200
|
+
pl.lit(data["run_id"]).alias("run_id"),
|
|
201
|
+
pl.lit(data["checkpoint_name"]).alias("checkpoint_name"),
|
|
202
|
+
pl.lit(data["source_name"]).alias("source_name"),
|
|
203
|
+
])
|
|
204
|
+
else:
|
|
205
|
+
df = pl.DataFrame({
|
|
206
|
+
"run_id": [data["run_id"]],
|
|
207
|
+
"checkpoint_name": [data["checkpoint_name"]],
|
|
208
|
+
"source_name": [data["source_name"]],
|
|
209
|
+
"status": [data["status"]],
|
|
210
|
+
"row_count": [data["row_count"]],
|
|
211
|
+
"issue_count": [data["issue_count"]],
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
df.write_parquet(path)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# =============================================================================
|
|
218
|
+
# S3 Storage Action
|
|
219
|
+
# =============================================================================
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class S3StorageConfig(ActionConfig):
|
|
224
|
+
"""Configuration for S3 storage action.
|
|
225
|
+
|
|
226
|
+
Attributes:
|
|
227
|
+
bucket: S3 bucket name.
|
|
228
|
+
prefix: Key prefix for objects.
|
|
229
|
+
region: AWS region.
|
|
230
|
+
access_key_id: AWS access key (optional, uses env/IAM if not set).
|
|
231
|
+
secret_access_key: AWS secret key.
|
|
232
|
+
file_format: Output format.
|
|
233
|
+
include_issues: Include detailed issues.
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
bucket: str = ""
|
|
237
|
+
prefix: str = "validations"
|
|
238
|
+
region: str = "us-east-1"
|
|
239
|
+
access_key_id: str | None = None
|
|
240
|
+
secret_access_key: str | None = None
|
|
241
|
+
file_format: str = "json"
|
|
242
|
+
include_issues: bool = True
|
|
243
|
+
|
|
244
|
+
def __post_init__(self):
|
|
245
|
+
self.name = self.name or "s3_storage"
|
|
246
|
+
self.notify_on = NotifyCondition.ALWAYS
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
@register_action("s3_storage")
|
|
250
|
+
class S3StorageAction(BaseAction):
|
|
251
|
+
"""Store validation results to Amazon S3.
|
|
252
|
+
|
|
253
|
+
Uploads results to an S3 bucket for durable storage.
|
|
254
|
+
Uses boto3 for S3 operations.
|
|
255
|
+
|
|
256
|
+
Example:
|
|
257
|
+
action = S3StorageAction(
|
|
258
|
+
bucket="my-validations",
|
|
259
|
+
prefix="data-quality/daily",
|
|
260
|
+
)
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
def __init__(
|
|
264
|
+
self,
|
|
265
|
+
bucket: str = "",
|
|
266
|
+
prefix: str = "validations",
|
|
267
|
+
config: S3StorageConfig | dict[str, Any] | None = None,
|
|
268
|
+
**kwargs: Any,
|
|
269
|
+
) -> None:
|
|
270
|
+
if config is None:
|
|
271
|
+
config = S3StorageConfig(
|
|
272
|
+
bucket=bucket,
|
|
273
|
+
prefix=prefix,
|
|
274
|
+
**kwargs,
|
|
275
|
+
)
|
|
276
|
+
elif isinstance(config, dict):
|
|
277
|
+
config = S3StorageConfig(**config)
|
|
278
|
+
|
|
279
|
+
super().__init__(config)
|
|
280
|
+
self._s3_config: S3StorageConfig = config
|
|
281
|
+
|
|
282
|
+
@property
|
|
283
|
+
def action_type(self) -> str:
|
|
284
|
+
return "storage"
|
|
285
|
+
|
|
286
|
+
def _do_execute(self, context: ActionContext) -> ActionResult:
|
|
287
|
+
"""Store result to S3."""
|
|
288
|
+
try:
|
|
289
|
+
import boto3
|
|
290
|
+
except ImportError:
|
|
291
|
+
return ActionResult(
|
|
292
|
+
action_name=self.name,
|
|
293
|
+
action_type=self.action_type,
|
|
294
|
+
status=ActionStatus.FAILURE,
|
|
295
|
+
message="boto3 not installed. Install with: pip install boto3",
|
|
296
|
+
error="ImportError: boto3",
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
result = context.checkpoint_result
|
|
300
|
+
|
|
301
|
+
# Build S3 key
|
|
302
|
+
timestamp = datetime.now().strftime("%Y/%m/%d")
|
|
303
|
+
filename = f"{result.checkpoint_name}_{result.run_id}.{self._s3_config.file_format}"
|
|
304
|
+
key = f"{self._s3_config.prefix}/{timestamp}/{filename}"
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
# Create S3 client
|
|
308
|
+
client_kwargs = {"region_name": self._s3_config.region}
|
|
309
|
+
if self._s3_config.access_key_id:
|
|
310
|
+
client_kwargs["aws_access_key_id"] = self._s3_config.access_key_id
|
|
311
|
+
client_kwargs["aws_secret_access_key"] = self._s3_config.secret_access_key
|
|
312
|
+
|
|
313
|
+
s3 = boto3.client("s3", **client_kwargs)
|
|
314
|
+
|
|
315
|
+
# Build data
|
|
316
|
+
data = result.to_dict()
|
|
317
|
+
if not self._s3_config.include_issues:
|
|
318
|
+
data.pop("issues", None)
|
|
319
|
+
|
|
320
|
+
# Serialize
|
|
321
|
+
body = json.dumps(data, indent=2, default=str)
|
|
322
|
+
|
|
323
|
+
# Upload
|
|
324
|
+
s3.put_object(
|
|
325
|
+
Bucket=self._s3_config.bucket,
|
|
326
|
+
Key=key,
|
|
327
|
+
Body=body.encode("utf-8"),
|
|
328
|
+
ContentType="application/json",
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
return ActionResult(
|
|
332
|
+
action_name=self.name,
|
|
333
|
+
action_type=self.action_type,
|
|
334
|
+
status=ActionStatus.SUCCESS,
|
|
335
|
+
message=f"Result stored to s3://{self._s3_config.bucket}/{key}",
|
|
336
|
+
details={"bucket": self._s3_config.bucket, "key": key},
|
|
337
|
+
)
|
|
338
|
+
except Exception as e:
|
|
339
|
+
return ActionResult(
|
|
340
|
+
action_name=self.name,
|
|
341
|
+
action_type=self.action_type,
|
|
342
|
+
status=ActionStatus.FAILURE,
|
|
343
|
+
message=f"Failed to store to S3: {str(e)}",
|
|
344
|
+
error=str(e),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# =============================================================================
|
|
349
|
+
# GCS Storage Action
|
|
350
|
+
# =============================================================================
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
@dataclass
|
|
354
|
+
class GCSStorageConfig(ActionConfig):
|
|
355
|
+
"""Configuration for Google Cloud Storage action.
|
|
356
|
+
|
|
357
|
+
Attributes:
|
|
358
|
+
bucket: GCS bucket name.
|
|
359
|
+
prefix: Object prefix.
|
|
360
|
+
project: GCP project ID.
|
|
361
|
+
credentials_path: Path to service account JSON.
|
|
362
|
+
file_format: Output format.
|
|
363
|
+
include_issues: Include detailed issues.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
bucket: str = ""
|
|
367
|
+
prefix: str = "validations"
|
|
368
|
+
project: str | None = None
|
|
369
|
+
credentials_path: str | None = None
|
|
370
|
+
file_format: str = "json"
|
|
371
|
+
include_issues: bool = True
|
|
372
|
+
|
|
373
|
+
def __post_init__(self):
|
|
374
|
+
self.name = self.name or "gcs_storage"
|
|
375
|
+
self.notify_on = NotifyCondition.ALWAYS
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@register_action("gcs_storage")
|
|
379
|
+
class GCSStorageAction(BaseAction):
|
|
380
|
+
"""Store validation results to Google Cloud Storage.
|
|
381
|
+
|
|
382
|
+
Uploads results to a GCS bucket for durable storage.
|
|
383
|
+
Uses google-cloud-storage for GCS operations.
|
|
384
|
+
|
|
385
|
+
Example:
|
|
386
|
+
action = GCSStorageAction(
|
|
387
|
+
bucket="my-validations",
|
|
388
|
+
prefix="data-quality/daily",
|
|
389
|
+
)
|
|
390
|
+
"""
|
|
391
|
+
|
|
392
|
+
def __init__(
|
|
393
|
+
self,
|
|
394
|
+
bucket: str = "",
|
|
395
|
+
prefix: str = "validations",
|
|
396
|
+
config: GCSStorageConfig | dict[str, Any] | None = None,
|
|
397
|
+
**kwargs: Any,
|
|
398
|
+
) -> None:
|
|
399
|
+
if config is None:
|
|
400
|
+
config = GCSStorageConfig(
|
|
401
|
+
bucket=bucket,
|
|
402
|
+
prefix=prefix,
|
|
403
|
+
**kwargs,
|
|
404
|
+
)
|
|
405
|
+
elif isinstance(config, dict):
|
|
406
|
+
config = GCSStorageConfig(**config)
|
|
407
|
+
|
|
408
|
+
super().__init__(config)
|
|
409
|
+
self._gcs_config: GCSStorageConfig = config
|
|
410
|
+
|
|
411
|
+
@property
|
|
412
|
+
def action_type(self) -> str:
|
|
413
|
+
return "storage"
|
|
414
|
+
|
|
415
|
+
def _do_execute(self, context: ActionContext) -> ActionResult:
|
|
416
|
+
"""Store result to GCS."""
|
|
417
|
+
try:
|
|
418
|
+
from google.cloud import storage
|
|
419
|
+
except ImportError:
|
|
420
|
+
return ActionResult(
|
|
421
|
+
action_name=self.name,
|
|
422
|
+
action_type=self.action_type,
|
|
423
|
+
status=ActionStatus.FAILURE,
|
|
424
|
+
message="google-cloud-storage not installed. Install with: pip install google-cloud-storage",
|
|
425
|
+
error="ImportError: google-cloud-storage",
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
result = context.checkpoint_result
|
|
429
|
+
|
|
430
|
+
# Build GCS path
|
|
431
|
+
timestamp = datetime.now().strftime("%Y/%m/%d")
|
|
432
|
+
filename = f"{result.checkpoint_name}_{result.run_id}.{self._gcs_config.file_format}"
|
|
433
|
+
blob_name = f"{self._gcs_config.prefix}/{timestamp}/{filename}"
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
# Create GCS client
|
|
437
|
+
if self._gcs_config.credentials_path:
|
|
438
|
+
client = storage.Client.from_service_account_json(
|
|
439
|
+
self._gcs_config.credentials_path
|
|
440
|
+
)
|
|
441
|
+
else:
|
|
442
|
+
client = storage.Client(project=self._gcs_config.project)
|
|
443
|
+
|
|
444
|
+
bucket = client.bucket(self._gcs_config.bucket)
|
|
445
|
+
blob = bucket.blob(blob_name)
|
|
446
|
+
|
|
447
|
+
# Build data
|
|
448
|
+
data = result.to_dict()
|
|
449
|
+
if not self._gcs_config.include_issues:
|
|
450
|
+
data.pop("issues", None)
|
|
451
|
+
|
|
452
|
+
# Upload
|
|
453
|
+
blob.upload_from_string(
|
|
454
|
+
json.dumps(data, indent=2, default=str),
|
|
455
|
+
content_type="application/json",
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
return ActionResult(
|
|
459
|
+
action_name=self.name,
|
|
460
|
+
action_type=self.action_type,
|
|
461
|
+
status=ActionStatus.SUCCESS,
|
|
462
|
+
message=f"Result stored to gs://{self._gcs_config.bucket}/{blob_name}",
|
|
463
|
+
details={"bucket": self._gcs_config.bucket, "blob": blob_name},
|
|
464
|
+
)
|
|
465
|
+
except Exception as e:
|
|
466
|
+
return ActionResult(
|
|
467
|
+
action_name=self.name,
|
|
468
|
+
action_type=self.action_type,
|
|
469
|
+
status=ActionStatus.FAILURE,
|
|
470
|
+
message=f"Failed to store to GCS: {str(e)}",
|
|
471
|
+
error=str(e),
|
|
472
|
+
)
|