sql-code-graph 1.43.3__tar.gz → 1.44.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/PKG-INFO +1 -1
  2. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/pyproject.toml +1 -1
  3. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/__init__.py +1 -1
  4. sql_code_graph-1.44.0/src/sqlcg/snowflake/ground_truth_csv.py +347 -0
  5. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/.gitignore +0 -0
  6. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/README.md +0 -0
  7. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/__main__.py +0 -0
  8. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/__init__.py +0 -0
  9. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/__init__.py +0 -0
  10. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/analyze.py +0 -0
  11. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/catalog.py +0 -0
  12. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/db.py +0 -0
  13. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/find.py +0 -0
  14. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/gain.py +0 -0
  15. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/git.py +0 -0
  16. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/index.py +0 -0
  17. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/install.py +0 -0
  18. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/mcp.py +0 -0
  19. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/observe.py +0 -0
  20. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/reindex.py +0 -0
  21. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/report.py +0 -0
  22. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/uninstall.py +0 -0
  23. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/viz.py +0 -0
  24. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/commands/watch.py +0 -0
  25. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/coverage.py +0 -0
  26. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/cli/main.py +0 -0
  27. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/__init__.py +0 -0
  28. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/config.py +0 -0
  29. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/duckdb_backend.py +0 -0
  30. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/freshness.py +0 -0
  31. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/graph_db.py +0 -0
  32. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/jobs.py +0 -0
  33. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/noise_match.py +0 -0
  34. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/queries.cypher +0 -0
  35. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/queries.py +0 -0
  36. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/queries.sql +0 -0
  37. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/schema.cypher +0 -0
  38. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/core/schema.py +0 -0
  39. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/__init__.py +0 -0
  40. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/dbt_adapter.py +0 -0
  41. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/error_classify.py +0 -0
  42. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/git_delta.py +0 -0
  43. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/indexer.py +0 -0
  44. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/pool.py +0 -0
  45. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/walker.py +0 -0
  46. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/indexer/watcher.py +0 -0
  47. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/lineage/__init__.py +0 -0
  48. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/lineage/aggregator.py +0 -0
  49. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/lineage/schema_resolver.py +0 -0
  50. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/lineage/temp_collapse.py +0 -0
  51. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/metrics/__init__.py +0 -0
  52. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/metrics/store.py +0 -0
  53. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/observe/__init__.py +0 -0
  54. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/observe/clearing.py +0 -0
  55. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/observe/decision.py +0 -0
  56. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/observe/normalize.py +0 -0
  57. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/observe/recall.py +0 -0
  58. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/__init__.py +0 -0
  59. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/ansi_parser.py +0 -0
  60. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/base.py +0 -0
  61. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/bigquery_parser.py +0 -0
  62. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/dynamic_name.py +0 -0
  63. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/postgres_parser.py +0 -0
  64. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/registry.py +0 -0
  65. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/snowflake_parser.py +0 -0
  66. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/parsers/tsql_parser.py +0 -0
  67. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/__init__.py +0 -0
  68. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/control.py +0 -0
  69. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/exceptions.py +0 -0
  70. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/models.py +0 -0
  71. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/noise_filter.py +0 -0
  72. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/read_client.py +0 -0
  73. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/selfheal.py +0 -0
  74. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/server.py +0 -0
  75. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/skill.py +0 -0
  76. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/tools.py +0 -0
  77. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/server/writer.py +0 -0
  78. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/snowflake/__init__.py +0 -0
  79. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/snowflake/config.py +0 -0
  80. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/snowflake/connection.py +0 -0
  81. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/snowflake/exports.py +0 -0
  82. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/snowflake/ground_truth_cache.py +0 -0
  83. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/snowflake/oracle_exports.py +0 -0
  84. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/utils/__init__.py +0 -0
  85. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/utils/hashing.py +0 -0
  86. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/utils/ignore.py +0 -0
  87. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/utils/logging.py +0 -0
  88. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/viz/__init__.py +0 -0
  89. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/viz/assets/force-graph.min.js +0 -0
  90. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/viz/assets/template.html +0 -0
  91. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/viz/data.py +0 -0
  92. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/viz/render.py +0 -0
  93. {sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/viz/tags.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.43.3
3
+ Version: 1.44.0
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sql-code-graph"
7
- version = "1.43.3"
7
+ version = "1.44.0"
8
8
  description = "SQL code graph analyzer and lineage tracer"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.43.3"
3
+ __version__ = "1.44.0"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -0,0 +1,347 @@
1
+ """Ground-truth export reader — reads a pre-aggregated counts file from ``.sqlcg/``.
2
+
3
+ Allows a user who lacks ``IMPORTED PRIVILEGES on DATABASE SNOWFLAKE`` (or
4
+ Enterprise edition) to consume the 5 aggregate panel counts produced by an
5
+ access-holder's ``gain --from-snowflake --json`` run. The access-holder
6
+ writes the file via ``gain --from-snowflake --export-ground-truth <path>``
7
+ (PR3); this reader consumes it in the ``gain --from-snowflake`` precedence
8
+ chain (PR2).
9
+
10
+ **Contract A**: the export carries the canonical aggregate SQL's OUTPUT —
11
+ zero re-computation locally. This module does NOT port the triple-FLATTEN
12
+ aggregate logic from ``oracle_exports.py``.
13
+
14
+ Supported formats (BQ-3: ``.json`` preferred when both exist):
15
+
16
+ * ``.json`` — the exact shape written by ``write_ground_truth_export`` and
17
+ mirrored by ``ground_truth_cache.py``::
18
+
19
+ {
20
+ "row": {
21
+ "production_object_volume": 2668,
22
+ "production_edge_volume": ...,
23
+ "total_write_queries": ...,
24
+ "basesources_write_queries": ...,
25
+ "non_catalog_write_targets": ...
26
+ },
27
+ "window_days": 30,
28
+ "captured_at": "2026-06-16T14:00:00+00:00",
29
+ "scope": "MY_DWH"
30
+ }
31
+
32
+ * ``.csv`` — single data row, headers case-folded, delimiter sniffed
33
+ (comma or semicolon), extra columns ignored::
34
+
35
+ scope,window_days,captured_at,production_object_volume,...
36
+ MY_DWH,30,2026-06-16T14:00:00+00:00,2668,...
37
+
38
+ Imports: csv, io, json, pathlib, datetime — stdlib only. No
39
+ ``snowflake.connector``, no DuckDB.
40
+
41
+ Plan: plan/sprints/ws2_csv_ground_truth_ingest.md §PR1.
42
+ """
43
+
44
+ from __future__ import annotations
45
+
46
+ import csv
47
+ import io
48
+ import json
49
+ from pathlib import Path
50
+
51
+ from sqlcg.core.config import get_db_path
52
+ from sqlcg.snowflake.ground_truth_cache import _normalize_scope
53
+
54
+ # The 5 canonical aggregate keys produced by build_aggregates_sql
55
+ # (oracle_exports.py:499-507) and consumed by the panel/gain renderers.
56
+ _CANONICAL_ROW_KEYS: frozenset[str] = frozenset(
57
+ {
58
+ "production_object_volume",
59
+ "production_edge_volume",
60
+ "total_write_queries",
61
+ "basesources_write_queries",
62
+ "non_catalog_write_targets",
63
+ }
64
+ )
65
+
66
+ # All required top-level CSV columns (lower-cased after header detection).
67
+ # The 5 row keys + scope + window_days + captured_at = 8 columns total.
68
+ _REQUIRED_COLS: frozenset[str] = _CANONICAL_ROW_KEYS | frozenset(
69
+ {"scope", "window_days", "captured_at"}
70
+ )
71
+
72
+
73
+ def _csv_export_path() -> tuple[Path, Path]:
74
+ """Return the paths for the JSON and CSV ground-truth export files.
75
+
76
+ Both live beside ``graph.db`` in the ``.sqlcg/`` directory, mirroring
77
+ the ``_get_cache_path`` convention in ``gain.py``.
78
+
79
+ Returns:
80
+ A 2-tuple ``(json_path, csv_path)`` — the JSON file is preferred
81
+ when both exist (BQ-3 resolved decision).
82
+ """
83
+ parent = get_db_path().parent
84
+ return parent / "ground_truth_export.json", parent / "ground_truth_export.csv"
85
+
86
+
87
+ def _sniff_delimiter(header_line: str) -> str:
88
+ """Sniff CSV delimiter from the header line.
89
+
90
+ Returns ``","`` or ``";"``; defaults to ``","`` for single-column or
91
+ ambiguous headers. Mirrors the idiom in
92
+ ``sqlcg.cli.commands.catalog._sniff_delimiter``.
93
+ """
94
+ n_comma = header_line.count(",")
95
+ n_semi = header_line.count(";")
96
+ return ";" if n_semi > n_comma else ","
97
+
98
+
99
+ def _parse_csv(path: Path) -> dict:
100
+ """Parse a single-row ground-truth CSV export.
101
+
102
+ Args:
103
+ path: Path to the ``.csv`` file.
104
+
105
+ Returns:
106
+ A ``read_cache``-shaped dict ``{row, window_days, captured_at, scope}``.
107
+
108
+ Raises:
109
+ ValueError: if the file is empty, missing the header, missing a
110
+ required column, has a short/non-numeric data row, or has no
111
+ data rows.
112
+ """
113
+ raw = path.read_text(encoding="utf-8", errors="replace")
114
+ lines = raw.splitlines()
115
+ if not lines:
116
+ raise ValueError(f"Ground-truth CSV export is empty: {path}")
117
+
118
+ header_line = lines[0]
119
+ delimiter = _sniff_delimiter(header_line)
120
+
121
+ reader = csv.DictReader(io.StringIO(raw), delimiter=delimiter)
122
+ if reader.fieldnames is None:
123
+ raise ValueError(f"Could not read header from ground-truth CSV export: {path}")
124
+
125
+ # Case-fold field names for robust header detection.
126
+ field_map: dict[str, str] = {f.strip().lower(): f for f in reader.fieldnames}
127
+ missing = _REQUIRED_COLS - set(field_map)
128
+ if missing:
129
+ raise ValueError(
130
+ f"Ground-truth CSV export missing required columns {sorted(missing)}: {path}"
131
+ )
132
+
133
+ data_rows = list(reader)
134
+ if not data_rows:
135
+ raise ValueError(f"Ground-truth CSV export has no data rows: {path}")
136
+
137
+ row_raw = data_rows[0]
138
+
139
+ # Parse the 5 canonical int counts.
140
+ row: dict[str, int] = {}
141
+ for key in _CANONICAL_ROW_KEYS:
142
+ raw_val = row_raw.get(field_map[key], "").strip()
143
+ try:
144
+ row[key] = int(raw_val)
145
+ except (ValueError, TypeError) as exc:
146
+ raise ValueError(
147
+ f"Ground-truth CSV export: non-numeric value {raw_val!r} for column {key!r}: {path}"
148
+ ) from exc
149
+
150
+ # Parse window_days as int.
151
+ raw_window = row_raw.get(field_map["window_days"], "").strip()
152
+ try:
153
+ window_days = int(raw_window)
154
+ except (ValueError, TypeError) as exc:
155
+ raise ValueError(
156
+ f"Ground-truth CSV export: non-numeric window_days {raw_window!r}: {path}"
157
+ ) from exc
158
+
159
+ captured_at = row_raw.get(field_map["captured_at"], "").strip()
160
+ if not captured_at:
161
+ raise ValueError(f"Ground-truth CSV export: empty captured_at: {path}")
162
+
163
+ scope_raw = row_raw.get(field_map["scope"], "").strip()
164
+ # Normalise the scope: a blank/empty scope string → account-wide (None).
165
+ # Parse "MY_DWH" / "MY_DWH/SCHEMA" via _normalize_scope.
166
+ if scope_raw:
167
+ # Split on "/" to separate database from optional schema.
168
+ parts = scope_raw.split("/", 1)
169
+ database = parts[0] if parts[0] else None
170
+ schema = parts[1] if len(parts) > 1 and parts[1] else None
171
+ else:
172
+ database = None
173
+ schema = None
174
+
175
+ scope = _normalize_scope(database, schema)
176
+
177
+ return {
178
+ "row": row,
179
+ "window_days": window_days,
180
+ "captured_at": captured_at,
181
+ "scope": scope,
182
+ }
183
+
184
+
185
+ def _parse_json(path: Path) -> dict:
186
+ """Parse a JSON ground-truth export file.
187
+
188
+ The JSON format mirrors ``ground_truth_cache.py``'s cache schema exactly.
189
+
190
+ Args:
191
+ path: Path to the ``.json`` file.
192
+
193
+ Returns:
194
+ A ``read_cache``-shaped dict ``{row, window_days, captured_at, scope}``.
195
+
196
+ Raises:
197
+ ValueError: if the file is not valid JSON, missing required keys, or
198
+ contains non-numeric count values.
199
+ """
200
+ try:
201
+ with open(path, "rb") as f:
202
+ entry = json.loads(f.read())
203
+ except Exception as exc:
204
+ raise ValueError(f"Ground-truth JSON export is not valid JSON: {path}") from exc
205
+
206
+ if not isinstance(entry, dict):
207
+ raise ValueError(f"Ground-truth JSON export is not a JSON object: {path}")
208
+
209
+ for key in ("row", "window_days", "captured_at", "scope"):
210
+ if key not in entry:
211
+ raise ValueError(f"Ground-truth JSON export missing required key {key!r}: {path}")
212
+
213
+ inner = entry["row"]
214
+ if not isinstance(inner, dict):
215
+ raise ValueError(f"Ground-truth JSON export: 'row' must be a JSON object: {path}")
216
+
217
+ missing_row_keys = _CANONICAL_ROW_KEYS - set(inner)
218
+ if missing_row_keys:
219
+ raise ValueError(
220
+ f"Ground-truth JSON export: 'row' missing keys {sorted(missing_row_keys)}: {path}"
221
+ )
222
+
223
+ # Coerce to int (the JSON spec allows float; guard non-numeric values).
224
+ row: dict[str, int] = {}
225
+ for key in _CANONICAL_ROW_KEYS:
226
+ val = inner[key]
227
+ try:
228
+ row[key] = int(val)
229
+ except (ValueError, TypeError) as exc:
230
+ raise ValueError(
231
+ f"Ground-truth JSON export: non-numeric value {val!r} for row key {key!r}: {path}"
232
+ ) from exc
233
+
234
+ # window_days coerce to int.
235
+ try:
236
+ window_days = int(entry["window_days"])
237
+ except (ValueError, TypeError) as exc:
238
+ raise ValueError(
239
+ f"Ground-truth JSON export: non-numeric window_days {entry['window_days']!r}: {path}"
240
+ ) from exc
241
+
242
+ captured_at = entry["captured_at"]
243
+ if not isinstance(captured_at, str) or not captured_at.strip():
244
+ raise ValueError(f"Ground-truth JSON export: invalid captured_at: {path}")
245
+
246
+ # Normalise scope (already stored as a normalised string or None).
247
+ scope_raw = entry.get("scope")
248
+ if isinstance(scope_raw, str) and scope_raw:
249
+ parts = scope_raw.split("/", 1)
250
+ database = parts[0] if parts[0] else None
251
+ schema = parts[1] if len(parts) > 1 and parts[1] else None
252
+ scope = _normalize_scope(database, schema)
253
+ else:
254
+ scope = None
255
+
256
+ return {
257
+ "row": row,
258
+ "window_days": window_days,
259
+ "captured_at": captured_at,
260
+ "scope": scope,
261
+ }
262
+
263
+
264
+ def read_csv_export(path: Path) -> dict | None:
265
+ """Read and validate a ground-truth export file (JSON or CSV).
266
+
267
+ Prefers the ``.json`` sibling when ``path`` does not exist but the
268
+ sibling does — but callers are encouraged to pass the resolved path
269
+ from ``_csv_export_path()`` directly.
270
+
271
+ BQ-3 (resolved): when both ``.json`` and ``.csv`` exist at the SAME
272
+ stem, the caller (``_csv_export_path`` → gain.py PR2 wiring) resolves
273
+ preference BEFORE calling this function by passing the JSON path first.
274
+ This function routes on the file's suffix.
275
+
276
+ Args:
277
+ path: Full path to the export file. The suffix must be ``.json``
278
+ or ``.csv``; if the file is absent, ``None`` is returned.
279
+
280
+ Returns:
281
+ A ``read_cache``-shaped dict ``{row, window_days, captured_at, scope}``
282
+ with all 5 canonical row keys present as ``int``\\s and ``scope``
283
+ normalised to ``None`` (account-wide), ``"MY_DWH"`` (database), or
284
+ ``"MY_DWH/SCHEMA"`` (database+schema). Returns ``None`` if the file
285
+ does not exist.
286
+
287
+ Raises:
288
+ ValueError: if the file exists but is malformed — empty, missing
289
+ required columns/keys, non-numeric counts. The message always
290
+ contains the file path so callers can surface it.
291
+ """
292
+ if not path.exists():
293
+ return None
294
+
295
+ suffix = path.suffix.lower()
296
+ if suffix == ".json":
297
+ return _parse_json(path)
298
+ if suffix == ".csv":
299
+ return _parse_csv(path)
300
+
301
+ # Unknown extension — treat as CSV (best-effort).
302
+ return _parse_csv(path)
303
+
304
+
305
+ def write_ground_truth_export(
306
+ path: Path,
307
+ row: dict,
308
+ window_days: int,
309
+ captured_at: str,
310
+ database: str | None = None,
311
+ schema: str | None = None,
312
+ ) -> None:
313
+ """Write a ground-truth export JSON file that round-trips through ``read_csv_export``.
314
+
315
+ Called from the live-success block in ``gain.py`` (PR3) when
316
+ ``--export-ground-truth`` is set. Mirrors ``write_cache`` in
317
+ ``ground_truth_cache.py`` — atomic temp-file + rename to prevent partial
318
+ writes.
319
+
320
+ The written file is the EXACT shape ``_parse_json`` expects so that
321
+ ``read_csv_export(path)`` round-trips without loss.
322
+
323
+ Args:
324
+ path: Destination path for the export JSON file.
325
+ row: The aggregate counts dict (5 canonical keys, all ``int``).
326
+ window_days: Lookback window in days.
327
+ captured_at: ISO-8601 string timestamp of the probe.
328
+ database: Optional database scope (already B5-validated by caller).
329
+ schema: Optional schema scope (already B5-validated by caller).
330
+ """
331
+ entry: dict = {
332
+ "row": {k: int(v) for k, v in row.items() if k in _CANONICAL_ROW_KEYS},
333
+ "window_days": int(window_days),
334
+ "captured_at": captured_at,
335
+ "scope": _normalize_scope(database, schema),
336
+ }
337
+ payload = json.dumps(entry, indent=2)
338
+ tmp = path.with_suffix(".json.tmp")
339
+ try:
340
+ path.parent.mkdir(parents=True, exist_ok=True)
341
+ tmp.write_text(payload, encoding="utf-8")
342
+ tmp.replace(path)
343
+ except Exception:
344
+ try:
345
+ tmp.unlink(missing_ok=True)
346
+ except Exception:
347
+ pass