PyPI - sql-code-graph - Versions diffs - 1.43.3__tar.gz → 1.44.0__tar.gz - Mend

sql-code-graph 1.43.3tar.gz → 1.44.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

{sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sql-code-graph
-Version: 1.43.3
+Version: 1.44.0
 Summary: SQL code graph analyzer and lineage tracer
 Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
 Project-URL: Repository, https://github.com/Warhorze/sql-code-graph

{sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sql-code-graph"
-version = "1.43.3"
+version = "1.44.0"
 description = "SQL code graph analyzer and lineage tracer"
 readme = "README.md"
 requires-python = ">=3.12"

{sql_code_graph-1.43.3 → sql_code_graph-1.44.0}/src/sqlcg/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """SQL Code Graph - SQL lineage and dependency analysis tool."""
-__version__ = "1.43.3"
+__version__ = "1.44.0"
 __all__ = ["__version__"]

sql_code_graph-1.44.0/src/sqlcg/snowflake/ground_truth_csv.py ADDED Viewed

@@ -0,0 +1,347 @@
+"""Ground-truth export reader — reads a pre-aggregated counts file from ``.sqlcg/``.
+Allows a user who lacks ``IMPORTED PRIVILEGES on DATABASE SNOWFLAKE`` (or
+Enterprise edition) to consume the 5 aggregate panel counts produced by an
+access-holder's ``gain --from-snowflake --json`` run.  The access-holder
+writes the file via ``gain --from-snowflake --export-ground-truth <path>``
+(PR3); this reader consumes it in the ``gain --from-snowflake`` precedence
+chain (PR2).
+**Contract A**: the export carries the canonical aggregate SQL's OUTPUT —
+zero re-computation locally.  This module does NOT port the triple-FLATTEN
+aggregate logic from ``oracle_exports.py``.
+Supported formats (BQ-3: ``.json`` preferred when both exist):
+* ``.json`` — the exact shape written by ``write_ground_truth_export`` and
+  mirrored by ``ground_truth_cache.py``::
+      {
+          "row": {
+              "production_object_volume": 2668,
+              "production_edge_volume": ...,
+              "total_write_queries": ...,
+              "basesources_write_queries": ...,
+              "non_catalog_write_targets": ...
+          },
+          "window_days": 30,
+          "captured_at": "2026-06-16T14:00:00+00:00",
+          "scope": "MY_DWH"
+      }
+* ``.csv`` — single data row, headers case-folded, delimiter sniffed
+  (comma or semicolon), extra columns ignored::
+      scope,window_days,captured_at,production_object_volume,...
+      MY_DWH,30,2026-06-16T14:00:00+00:00,2668,...
+Imports: csv, io, json, pathlib, datetime — stdlib only.  No
+``snowflake.connector``, no DuckDB.
+Plan: plan/sprints/ws2_csv_ground_truth_ingest.md §PR1.
+"""
+from __future__ import annotations
+import csv
+import io
+import json
+from pathlib import Path
+from sqlcg.core.config import get_db_path
+from sqlcg.snowflake.ground_truth_cache import _normalize_scope
+# The 5 canonical aggregate keys produced by build_aggregates_sql
+# (oracle_exports.py:499-507) and consumed by the panel/gain renderers.
+_CANONICAL_ROW_KEYS: frozenset[str] = frozenset(
+    {
+        "production_object_volume",
+        "production_edge_volume",
+        "total_write_queries",
+        "basesources_write_queries",
+        "non_catalog_write_targets",
+    }
+)
+# All required top-level CSV columns (lower-cased after header detection).
+# The 5 row keys + scope + window_days + captured_at = 8 columns total.
+_REQUIRED_COLS: frozenset[str] = _CANONICAL_ROW_KEYS | frozenset(
+    {"scope", "window_days", "captured_at"}
+)
+def _csv_export_path() -> tuple[Path, Path]:
+    """Return the paths for the JSON and CSV ground-truth export files.
+    Both live beside ``graph.db`` in the ``.sqlcg/`` directory, mirroring
+    the ``_get_cache_path`` convention in ``gain.py``.
+    Returns:
+        A 2-tuple ``(json_path, csv_path)`` — the JSON file is preferred
+        when both exist (BQ-3 resolved decision).
+    """
+    parent = get_db_path().parent
+    return parent / "ground_truth_export.json", parent / "ground_truth_export.csv"
+def _sniff_delimiter(header_line: str) -> str:
+    """Sniff CSV delimiter from the header line.
+    Returns ``","`` or ``";"``; defaults to ``","`` for single-column or
+    ambiguous headers.  Mirrors the idiom in
+    ``sqlcg.cli.commands.catalog._sniff_delimiter``.
+    """
+    n_comma = header_line.count(",")
+    n_semi = header_line.count(";")
+    return ";" if n_semi > n_comma else ","
+def _parse_csv(path: Path) -> dict:
+    """Parse a single-row ground-truth CSV export.
+    Args:
+        path: Path to the ``.csv`` file.
+    Returns:
+        A ``read_cache``-shaped dict ``{row, window_days, captured_at, scope}``.
+    Raises:
+        ValueError: if the file is empty, missing the header, missing a
+            required column, has a short/non-numeric data row, or has no
+            data rows.
+    """
+    raw = path.read_text(encoding="utf-8", errors="replace")
+    lines = raw.splitlines()
+    if not lines:
+        raise ValueError(f"Ground-truth CSV export is empty: {path}")
+    header_line = lines[0]
+    delimiter = _sniff_delimiter(header_line)
+    reader = csv.DictReader(io.StringIO(raw), delimiter=delimiter)
+    if reader.fieldnames is None:
+        raise ValueError(f"Could not read header from ground-truth CSV export: {path}")
+    # Case-fold field names for robust header detection.
+    field_map: dict[str, str] = {f.strip().lower(): f for f in reader.fieldnames}
+    missing = _REQUIRED_COLS - set(field_map)
+    if missing:
+        raise ValueError(
+            f"Ground-truth CSV export missing required columns {sorted(missing)}: {path}"
+        )
+    data_rows = list(reader)
+    if not data_rows:
+        raise ValueError(f"Ground-truth CSV export has no data rows: {path}")
+    row_raw = data_rows[0]
+    # Parse the 5 canonical int counts.
+    row: dict[str, int] = {}
+    for key in _CANONICAL_ROW_KEYS:
+        raw_val = row_raw.get(field_map[key], "").strip()
+        try:
+            row[key] = int(raw_val)
+        except (ValueError, TypeError) as exc:
+            raise ValueError(
+                f"Ground-truth CSV export: non-numeric value {raw_val!r} for column {key!r}: {path}"
+            ) from exc
+    # Parse window_days as int.
+    raw_window = row_raw.get(field_map["window_days"], "").strip()
+    try:
+        window_days = int(raw_window)
+    except (ValueError, TypeError) as exc:
+        raise ValueError(
+            f"Ground-truth CSV export: non-numeric window_days {raw_window!r}: {path}"
+        ) from exc
+    captured_at = row_raw.get(field_map["captured_at"], "").strip()
+    if not captured_at:
+        raise ValueError(f"Ground-truth CSV export: empty captured_at: {path}")
+    scope_raw = row_raw.get(field_map["scope"], "").strip()
+    # Normalise the scope: a blank/empty scope string → account-wide (None).
+    # Parse "MY_DWH" / "MY_DWH/SCHEMA" via _normalize_scope.
+    if scope_raw:
+        # Split on "/" to separate database from optional schema.
+        parts = scope_raw.split("/", 1)
+        database = parts[0] if parts[0] else None
+        schema = parts[1] if len(parts) > 1 and parts[1] else None
+    else:
+        database = None
+        schema = None
+    scope = _normalize_scope(database, schema)
+    return {
+        "row": row,
+        "window_days": window_days,
+        "captured_at": captured_at,
+        "scope": scope,
+    }
+def _parse_json(path: Path) -> dict:
+    """Parse a JSON ground-truth export file.
+    The JSON format mirrors ``ground_truth_cache.py``'s cache schema exactly.
+    Args:
+        path: Path to the ``.json`` file.
+    Returns:
+        A ``read_cache``-shaped dict ``{row, window_days, captured_at, scope}``.
+    Raises:
+        ValueError: if the file is not valid JSON, missing required keys, or
+            contains non-numeric count values.
+    """
+    try:
+        with open(path, "rb") as f:
+            entry = json.loads(f.read())
+    except Exception as exc:
+        raise ValueError(f"Ground-truth JSON export is not valid JSON: {path}") from exc
+    if not isinstance(entry, dict):
+        raise ValueError(f"Ground-truth JSON export is not a JSON object: {path}")
+    for key in ("row", "window_days", "captured_at", "scope"):
+        if key not in entry:
+            raise ValueError(f"Ground-truth JSON export missing required key {key!r}: {path}")
+    inner = entry["row"]
+    if not isinstance(inner, dict):
+        raise ValueError(f"Ground-truth JSON export: 'row' must be a JSON object: {path}")
+    missing_row_keys = _CANONICAL_ROW_KEYS - set(inner)
+    if missing_row_keys:
+        raise ValueError(
+            f"Ground-truth JSON export: 'row' missing keys {sorted(missing_row_keys)}: {path}"
+        )
+    # Coerce to int (the JSON spec allows float; guard non-numeric values).
+    row: dict[str, int] = {}
+    for key in _CANONICAL_ROW_KEYS:
+        val = inner[key]
+        try:
+            row[key] = int(val)
+        except (ValueError, TypeError) as exc:
+            raise ValueError(
+                f"Ground-truth JSON export: non-numeric value {val!r} for row key {key!r}: {path}"
+            ) from exc
+    # window_days coerce to int.
+    try:
+        window_days = int(entry["window_days"])
+    except (ValueError, TypeError) as exc:
+        raise ValueError(
+            f"Ground-truth JSON export: non-numeric window_days {entry['window_days']!r}: {path}"
+        ) from exc
+    captured_at = entry["captured_at"]
+    if not isinstance(captured_at, str) or not captured_at.strip():
+        raise ValueError(f"Ground-truth JSON export: invalid captured_at: {path}")
+    # Normalise scope (already stored as a normalised string or None).
+    scope_raw = entry.get("scope")
+    if isinstance(scope_raw, str) and scope_raw:
+        parts = scope_raw.split("/", 1)
+        database = parts[0] if parts[0] else None
+        schema = parts[1] if len(parts) > 1 and parts[1] else None
+        scope = _normalize_scope(database, schema)
+    else:
+        scope = None
+    return {
+        "row": row,
+        "window_days": window_days,
+        "captured_at": captured_at,
+        "scope": scope,
+    }
+def read_csv_export(path: Path) -> dict | None:
+    """Read and validate a ground-truth export file (JSON or CSV).
+    Prefers the ``.json`` sibling when ``path`` does not exist but the
+    sibling does — but callers are encouraged to pass the resolved path
+    from ``_csv_export_path()`` directly.
+    BQ-3 (resolved): when both ``.json`` and ``.csv`` exist at the SAME
+    stem, the caller (``_csv_export_path`` → gain.py PR2 wiring) resolves
+    preference BEFORE calling this function by passing the JSON path first.
+    This function routes on the file's suffix.
+    Args:
+        path: Full path to the export file.  The suffix must be ``.json``
+            or ``.csv``; if the file is absent, ``None`` is returned.
+    Returns:
+        A ``read_cache``-shaped dict ``{row, window_days, captured_at, scope}``
+        with all 5 canonical row keys present as ``int``\\s and ``scope``
+        normalised to ``None`` (account-wide), ``"MY_DWH"`` (database), or
+        ``"MY_DWH/SCHEMA"`` (database+schema).  Returns ``None`` if the file
+        does not exist.
+    Raises:
+        ValueError: if the file exists but is malformed — empty, missing
+            required columns/keys, non-numeric counts.  The message always
+            contains the file path so callers can surface it.
+    """
+    if not path.exists():
+        return None
+    suffix = path.suffix.lower()
+    if suffix == ".json":
+        return _parse_json(path)
+    if suffix == ".csv":
+        return _parse_csv(path)
+    # Unknown extension — treat as CSV (best-effort).
+    return _parse_csv(path)
+def write_ground_truth_export(
+    path: Path,
+    row: dict,
+    window_days: int,
+    captured_at: str,
+    database: str | None = None,
+    schema: str | None = None,
+) -> None:
+    """Write a ground-truth export JSON file that round-trips through ``read_csv_export``.
+    Called from the live-success block in ``gain.py`` (PR3) when
+    ``--export-ground-truth`` is set.  Mirrors ``write_cache`` in
+    ``ground_truth_cache.py`` — atomic temp-file + rename to prevent partial
+    writes.
+    The written file is the EXACT shape ``_parse_json`` expects so that
+    ``read_csv_export(path)`` round-trips without loss.
+    Args:
+        path: Destination path for the export JSON file.
+        row: The aggregate counts dict (5 canonical keys, all ``int``).
+        window_days: Lookback window in days.
+        captured_at: ISO-8601 string timestamp of the probe.
+        database: Optional database scope (already B5-validated by caller).
+        schema: Optional schema scope (already B5-validated by caller).
+    """
+    entry: dict = {
+        "row": {k: int(v) for k, v in row.items() if k in _CANONICAL_ROW_KEYS},
+        "window_days": int(window_days),
+        "captured_at": captured_at,
+        "scope": _normalize_scope(database, schema),
+    }
+    payload = json.dumps(entry, indent=2)
+    tmp = path.with_suffix(".json.tmp")
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        tmp.write_text(payload, encoding="utf-8")
+        tmp.replace(path)
+    except Exception:
+        try:
+            tmp.unlink(missing_ok=True)
+        except Exception:
+            pass