truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,472 @@
1
+ """Storage action implementations.
2
+
3
+ Provides actions for storing validation results to various backends:
4
+ - Local filesystem
5
+ - Amazon S3
6
+ - Google Cloud Storage
7
+
8
+ These actions persist validation results for historical analysis,
9
+ compliance, and audit purposes.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from truthound_dashboard.core.interfaces.actions import (
22
+ ActionConfig,
23
+ ActionContext,
24
+ ActionResult,
25
+ ActionStatus,
26
+ BaseAction,
27
+ NotifyCondition,
28
+ register_action,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # =============================================================================
35
+ # File Storage Action
36
+ # =============================================================================
37
+
38
+
39
+ @dataclass
40
+ class FileStorageConfig(ActionConfig):
41
+ """Configuration for file storage action.
42
+
43
+ Attributes:
44
+ base_path: Base directory for storage.
45
+ file_format: Output format (json, csv, parquet).
46
+ include_issues: Include detailed issues.
47
+ create_dirs: Create directories if missing.
48
+ filename_template: Template for filename.
49
+ compress: Compress output files.
50
+ """
51
+
52
+ base_path: str = "./validation_results"
53
+ file_format: str = "json"
54
+ include_issues: bool = True
55
+ create_dirs: bool = True
56
+ filename_template: str = "{checkpoint_name}_{run_id}.{format}"
57
+ compress: bool = False
58
+
59
+ def __post_init__(self):
60
+ self.name = self.name or "file_storage"
61
+ self.notify_on = NotifyCondition.ALWAYS
62
+
63
+
64
+ @register_action("file_storage")
65
+ class FileStorageAction(BaseAction):
66
+ """Store validation results to local filesystem.
67
+
68
+ Saves results as JSON, CSV, or Parquet files for historical tracking.
69
+
70
+ Example:
71
+ action = FileStorageAction(
72
+ base_path="/data/validations",
73
+ file_format="json",
74
+ )
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ base_path: str = "./validation_results",
80
+ file_format: str = "json",
81
+ config: FileStorageConfig | dict[str, Any] | None = None,
82
+ **kwargs: Any,
83
+ ) -> None:
84
+ if config is None:
85
+ config = FileStorageConfig(
86
+ base_path=base_path,
87
+ file_format=file_format,
88
+ **kwargs,
89
+ )
90
+ elif isinstance(config, dict):
91
+ config = FileStorageConfig(**config)
92
+
93
+ super().__init__(config)
94
+ self._storage_config: FileStorageConfig = config
95
+
96
+ @property
97
+ def action_type(self) -> str:
98
+ return "storage"
99
+
100
+ def _do_execute(self, context: ActionContext) -> ActionResult:
101
+ """Store result to file."""
102
+ result = context.checkpoint_result
103
+
104
+ # Build file path
105
+ filename = self._storage_config.filename_template.format(
106
+ checkpoint_name=result.checkpoint_name.replace(" ", "_"),
107
+ run_id=result.run_id,
108
+ format=self._storage_config.file_format,
109
+ date=datetime.now().strftime("%Y%m%d"),
110
+ timestamp=datetime.now().strftime("%Y%m%d_%H%M%S"),
111
+ )
112
+
113
+ base_path = Path(self._storage_config.base_path)
114
+ if self._storage_config.create_dirs:
115
+ base_path.mkdir(parents=True, exist_ok=True)
116
+
117
+ file_path = base_path / filename
118
+
119
+ try:
120
+ # Build data to store
121
+ data = result.to_dict()
122
+ if not self._storage_config.include_issues:
123
+ data.pop("issues", None)
124
+
125
+ # Write based on format
126
+ if self._storage_config.file_format == "json":
127
+ with open(file_path, "w", encoding="utf-8") as f:
128
+ json.dump(data, f, indent=2, default=str)
129
+ elif self._storage_config.file_format == "csv":
130
+ self._write_csv(file_path, data)
131
+ elif self._storage_config.file_format == "parquet":
132
+ self._write_parquet(file_path, data)
133
+ else:
134
+ raise ValueError(f"Unsupported format: {self._storage_config.file_format}")
135
+
136
+ return ActionResult(
137
+ action_name=self.name,
138
+ action_type=self.action_type,
139
+ status=ActionStatus.SUCCESS,
140
+ message=f"Result stored to {file_path}",
141
+ details={"file_path": str(file_path), "format": self._storage_config.file_format},
142
+ )
143
+ except Exception as e:
144
+ return ActionResult(
145
+ action_name=self.name,
146
+ action_type=self.action_type,
147
+ status=ActionStatus.FAILURE,
148
+ message=f"Failed to store result: {str(e)}",
149
+ error=str(e),
150
+ )
151
+
152
+ def _write_csv(self, path: Path, data: dict[str, Any]) -> None:
153
+ """Write data as CSV."""
154
+ import csv
155
+
156
+ # Flatten issues for CSV
157
+ rows = []
158
+ for issue in data.get("issues", []):
159
+ row = {
160
+ "run_id": data["run_id"],
161
+ "checkpoint_name": data["checkpoint_name"],
162
+ "source_name": data["source_name"],
163
+ "status": data["status"],
164
+ "column": issue.get("column", ""),
165
+ "issue_type": issue.get("issue_type", ""),
166
+ "count": issue.get("count", 0),
167
+ "severity": issue.get("severity", ""),
168
+ }
169
+ rows.append(row)
170
+
171
+ if rows:
172
+ with open(path, "w", newline="", encoding="utf-8") as f:
173
+ writer = csv.DictWriter(f, fieldnames=rows[0].keys())
174
+ writer.writeheader()
175
+ writer.writerows(rows)
176
+ else:
177
+ # Write summary if no issues
178
+ summary_row = {
179
+ "run_id": data["run_id"],
180
+ "checkpoint_name": data["checkpoint_name"],
181
+ "source_name": data["source_name"],
182
+ "status": data["status"],
183
+ "row_count": data["row_count"],
184
+ "issue_count": data["issue_count"],
185
+ }
186
+ with open(path, "w", newline="", encoding="utf-8") as f:
187
+ writer = csv.DictWriter(f, fieldnames=summary_row.keys())
188
+ writer.writeheader()
189
+ writer.writerow(summary_row)
190
+
191
+ def _write_parquet(self, path: Path, data: dict[str, Any]) -> None:
192
+ """Write data as Parquet using Polars."""
193
+ import polars as pl
194
+
195
+ # Convert to DataFrame
196
+ issues = data.get("issues", [])
197
+ if issues:
198
+ df = pl.DataFrame(issues)
199
+ df = df.with_columns([
200
+ pl.lit(data["run_id"]).alias("run_id"),
201
+ pl.lit(data["checkpoint_name"]).alias("checkpoint_name"),
202
+ pl.lit(data["source_name"]).alias("source_name"),
203
+ ])
204
+ else:
205
+ df = pl.DataFrame({
206
+ "run_id": [data["run_id"]],
207
+ "checkpoint_name": [data["checkpoint_name"]],
208
+ "source_name": [data["source_name"]],
209
+ "status": [data["status"]],
210
+ "row_count": [data["row_count"]],
211
+ "issue_count": [data["issue_count"]],
212
+ })
213
+
214
+ df.write_parquet(path)
215
+
216
+
217
+ # =============================================================================
218
+ # S3 Storage Action
219
+ # =============================================================================
220
+
221
+
222
+ @dataclass
223
+ class S3StorageConfig(ActionConfig):
224
+ """Configuration for S3 storage action.
225
+
226
+ Attributes:
227
+ bucket: S3 bucket name.
228
+ prefix: Key prefix for objects.
229
+ region: AWS region.
230
+ access_key_id: AWS access key (optional, uses env/IAM if not set).
231
+ secret_access_key: AWS secret key.
232
+ file_format: Output format.
233
+ include_issues: Include detailed issues.
234
+ """
235
+
236
+ bucket: str = ""
237
+ prefix: str = "validations"
238
+ region: str = "us-east-1"
239
+ access_key_id: str | None = None
240
+ secret_access_key: str | None = None
241
+ file_format: str = "json"
242
+ include_issues: bool = True
243
+
244
+ def __post_init__(self):
245
+ self.name = self.name or "s3_storage"
246
+ self.notify_on = NotifyCondition.ALWAYS
247
+
248
+
249
+ @register_action("s3_storage")
250
+ class S3StorageAction(BaseAction):
251
+ """Store validation results to Amazon S3.
252
+
253
+ Uploads results to an S3 bucket for durable storage.
254
+ Uses boto3 for S3 operations.
255
+
256
+ Example:
257
+ action = S3StorageAction(
258
+ bucket="my-validations",
259
+ prefix="data-quality/daily",
260
+ )
261
+ """
262
+
263
+ def __init__(
264
+ self,
265
+ bucket: str = "",
266
+ prefix: str = "validations",
267
+ config: S3StorageConfig | dict[str, Any] | None = None,
268
+ **kwargs: Any,
269
+ ) -> None:
270
+ if config is None:
271
+ config = S3StorageConfig(
272
+ bucket=bucket,
273
+ prefix=prefix,
274
+ **kwargs,
275
+ )
276
+ elif isinstance(config, dict):
277
+ config = S3StorageConfig(**config)
278
+
279
+ super().__init__(config)
280
+ self._s3_config: S3StorageConfig = config
281
+
282
+ @property
283
+ def action_type(self) -> str:
284
+ return "storage"
285
+
286
+ def _do_execute(self, context: ActionContext) -> ActionResult:
287
+ """Store result to S3."""
288
+ try:
289
+ import boto3
290
+ except ImportError:
291
+ return ActionResult(
292
+ action_name=self.name,
293
+ action_type=self.action_type,
294
+ status=ActionStatus.FAILURE,
295
+ message="boto3 not installed. Install with: pip install boto3",
296
+ error="ImportError: boto3",
297
+ )
298
+
299
+ result = context.checkpoint_result
300
+
301
+ # Build S3 key
302
+ timestamp = datetime.now().strftime("%Y/%m/%d")
303
+ filename = f"{result.checkpoint_name}_{result.run_id}.{self._s3_config.file_format}"
304
+ key = f"{self._s3_config.prefix}/{timestamp}/{filename}"
305
+
306
+ try:
307
+ # Create S3 client
308
+ client_kwargs = {"region_name": self._s3_config.region}
309
+ if self._s3_config.access_key_id:
310
+ client_kwargs["aws_access_key_id"] = self._s3_config.access_key_id
311
+ client_kwargs["aws_secret_access_key"] = self._s3_config.secret_access_key
312
+
313
+ s3 = boto3.client("s3", **client_kwargs)
314
+
315
+ # Build data
316
+ data = result.to_dict()
317
+ if not self._s3_config.include_issues:
318
+ data.pop("issues", None)
319
+
320
+ # Serialize
321
+ body = json.dumps(data, indent=2, default=str)
322
+
323
+ # Upload
324
+ s3.put_object(
325
+ Bucket=self._s3_config.bucket,
326
+ Key=key,
327
+ Body=body.encode("utf-8"),
328
+ ContentType="application/json",
329
+ )
330
+
331
+ return ActionResult(
332
+ action_name=self.name,
333
+ action_type=self.action_type,
334
+ status=ActionStatus.SUCCESS,
335
+ message=f"Result stored to s3://{self._s3_config.bucket}/{key}",
336
+ details={"bucket": self._s3_config.bucket, "key": key},
337
+ )
338
+ except Exception as e:
339
+ return ActionResult(
340
+ action_name=self.name,
341
+ action_type=self.action_type,
342
+ status=ActionStatus.FAILURE,
343
+ message=f"Failed to store to S3: {str(e)}",
344
+ error=str(e),
345
+ )
346
+
347
+
348
+ # =============================================================================
349
+ # GCS Storage Action
350
+ # =============================================================================
351
+
352
+
353
+ @dataclass
354
+ class GCSStorageConfig(ActionConfig):
355
+ """Configuration for Google Cloud Storage action.
356
+
357
+ Attributes:
358
+ bucket: GCS bucket name.
359
+ prefix: Object prefix.
360
+ project: GCP project ID.
361
+ credentials_path: Path to service account JSON.
362
+ file_format: Output format.
363
+ include_issues: Include detailed issues.
364
+ """
365
+
366
+ bucket: str = ""
367
+ prefix: str = "validations"
368
+ project: str | None = None
369
+ credentials_path: str | None = None
370
+ file_format: str = "json"
371
+ include_issues: bool = True
372
+
373
+ def __post_init__(self):
374
+ self.name = self.name or "gcs_storage"
375
+ self.notify_on = NotifyCondition.ALWAYS
376
+
377
+
378
+ @register_action("gcs_storage")
379
+ class GCSStorageAction(BaseAction):
380
+ """Store validation results to Google Cloud Storage.
381
+
382
+ Uploads results to a GCS bucket for durable storage.
383
+ Uses google-cloud-storage for GCS operations.
384
+
385
+ Example:
386
+ action = GCSStorageAction(
387
+ bucket="my-validations",
388
+ prefix="data-quality/daily",
389
+ )
390
+ """
391
+
392
+ def __init__(
393
+ self,
394
+ bucket: str = "",
395
+ prefix: str = "validations",
396
+ config: GCSStorageConfig | dict[str, Any] | None = None,
397
+ **kwargs: Any,
398
+ ) -> None:
399
+ if config is None:
400
+ config = GCSStorageConfig(
401
+ bucket=bucket,
402
+ prefix=prefix,
403
+ **kwargs,
404
+ )
405
+ elif isinstance(config, dict):
406
+ config = GCSStorageConfig(**config)
407
+
408
+ super().__init__(config)
409
+ self._gcs_config: GCSStorageConfig = config
410
+
411
+ @property
412
+ def action_type(self) -> str:
413
+ return "storage"
414
+
415
+ def _do_execute(self, context: ActionContext) -> ActionResult:
416
+ """Store result to GCS."""
417
+ try:
418
+ from google.cloud import storage
419
+ except ImportError:
420
+ return ActionResult(
421
+ action_name=self.name,
422
+ action_type=self.action_type,
423
+ status=ActionStatus.FAILURE,
424
+ message="google-cloud-storage not installed. Install with: pip install google-cloud-storage",
425
+ error="ImportError: google-cloud-storage",
426
+ )
427
+
428
+ result = context.checkpoint_result
429
+
430
+ # Build GCS path
431
+ timestamp = datetime.now().strftime("%Y/%m/%d")
432
+ filename = f"{result.checkpoint_name}_{result.run_id}.{self._gcs_config.file_format}"
433
+ blob_name = f"{self._gcs_config.prefix}/{timestamp}/{filename}"
434
+
435
+ try:
436
+ # Create GCS client
437
+ if self._gcs_config.credentials_path:
438
+ client = storage.Client.from_service_account_json(
439
+ self._gcs_config.credentials_path
440
+ )
441
+ else:
442
+ client = storage.Client(project=self._gcs_config.project)
443
+
444
+ bucket = client.bucket(self._gcs_config.bucket)
445
+ blob = bucket.blob(blob_name)
446
+
447
+ # Build data
448
+ data = result.to_dict()
449
+ if not self._gcs_config.include_issues:
450
+ data.pop("issues", None)
451
+
452
+ # Upload
453
+ blob.upload_from_string(
454
+ json.dumps(data, indent=2, default=str),
455
+ content_type="application/json",
456
+ )
457
+
458
+ return ActionResult(
459
+ action_name=self.name,
460
+ action_type=self.action_type,
461
+ status=ActionStatus.SUCCESS,
462
+ message=f"Result stored to gs://{self._gcs_config.bucket}/{blob_name}",
463
+ details={"bucket": self._gcs_config.bucket, "blob": blob_name},
464
+ )
465
+ except Exception as e:
466
+ return ActionResult(
467
+ action_name=self.name,
468
+ action_type=self.action_type,
469
+ status=ActionStatus.FAILURE,
470
+ message=f"Failed to store to GCS: {str(e)}",
471
+ error=str(e),
472
+ )