PyPI - waxsql - Versions diffs - 1.0.0__py3-none-any.whl - Mend

waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

waxsql/__init__.py +158 -0
waxsql/ast.py +757 -0
waxsql/catalog.py +363 -0
waxsql/cli.py +888 -0
waxsql/config.py +477 -0
waxsql/context.py +255 -0
waxsql/data.py +99 -0
waxsql/gen/__init__.py +51 -0
waxsql/gen/cte.py +367 -0
waxsql/gen/data/__init__.py +14 -0
waxsql/gen/data/columns.py +48 -0
waxsql/gen/data/emit.py +247 -0
waxsql/gen/data/rows.py +236 -0
waxsql/gen/data/strategies.py +299 -0
waxsql/gen/expr.py +723 -0
waxsql/gen/select.py +831 -0
waxsql/gen/setop.py +259 -0
waxsql/gen/subquery.py +397 -0
waxsql/gen/window.py +398 -0
waxsql/pretty.py +81 -0
waxsql/printer.py +688 -0
waxsql/py.typed +0 -0
waxsql/schema.py +557 -0
waxsql/scope.py +391 -0
waxsql/types.py +187 -0
waxsql/validate/__init__.py +52 -0
waxsql/validate/parse.py +194 -0
waxsql/validate/plan.py +149 -0
waxsql/validate/syntax.py +87 -0
waxsql-1.0.0.dist-info/METADATA +746 -0
waxsql-1.0.0.dist-info/RECORD +35 -0
waxsql-1.0.0.dist-info/WHEEL +5 -0
waxsql-1.0.0.dist-info/entry_points.txt +2 -0
waxsql-1.0.0.dist-info/licenses/LICENSE +21 -0
waxsql-1.0.0.dist-info/top_level.txt +1 -0

waxsql/validate/parse.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""Parse-tier validation via PREPARE against a live PostgreSQL.
+Role in the system: the middle tier — strictly stronger than SYNTAX,
+strictly cheaper than PLAN. Used when the test harness has a live
+DB available and wants to catch the entire class of name/type
+errors that pglast silently passes.
+Runs full PG parse-analysis: column resolution, type checking,
+aggregate-context rules, function lookup, view validity. Catches
+the entire class of "PG accepts at SYNTAX tier but rejects at
+parse-analysis" issues that pglast (libpg_query) silently passes:
+  * Column refs to undefined aliases or non-existent columns
+  * Aggregates in disallowed contexts (WHERE, JOIN ON, aggregate args)
+  * Window functions in disallowed contexts (WHERE, HAVING, agg args)
+  * GROUP BY consistency violations
+  * Type mismatches (e.g. `int = uuid` with no implicit cast)
+  * Set-op arms with mismatched column counts/types
+What it does NOT catch:
+  * Operator-class lookup failures for ORDER BY / DISTINCT /
+    GROUP BY (deferred to PLAN tier — would need EXPLAIN)
+  * Runtime errors (division by zero, type cast failures on
+    actual data, etc.)
+Cost: roughly 1ms per query on a local PG with the schema already
+loaded. Schema setup itself is ~1ms per CREATE TABLE plus FK/index
+DDL — typically 50-300ms total for a milestone-sized schema.
+The savepoint-around-PREPARE pattern is what lets a single failing
+query not poison the surrounding transaction's state (the schema
+setup, prior PREPAREs, ...). Without it, the first parse error
+would put the surrounding transaction into an aborted state and
+every subsequent check in the batch would error with "current
+transaction is aborted, commands ignored until end of transaction
+block" — losing all signal beyond the first failure. The test
+harness owns the connection and transaction; this module is a thin
+wrapper around the PREPARE + savepoint dance.
+"""
+from __future__ import annotations
+import contextlib
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    # psycopg is an OPTIONAL runtime dep (only required when callers
+    # actually use the PARSE tier); but for static type-checkers we
+    # want the annotations to carry the real types. The TYPE_CHECKING
+    # block is False at runtime (so no import happens for users of
+    # SYNTAX-tier only) but True for mypy/pyright. The `from __future__
+    # import annotations` above makes all annotations lazy strings,
+    # so the runtime never tries to resolve `psycopg.Connection`.
+    import psycopg
+    from ..schema import Schema
+@dataclass(frozen=True)
+class ParseResult:
+    """The outcome of a single PARSE-tier check.
+    `ok=True` means PG accepted the SQL at parse-analysis time.
+    `ok=False` carries the PG error message; `error_code` carries
+    the SQLSTATE if the error came from psycopg's structured error
+    object (mostly always available for parse failures).
+    """
+    ok: bool
+    error: Optional[str] = None
+    error_code: Optional[str] = None
+def check_parse(sql: str, conn: psycopg.Connection) -> ParseResult:
+    """PREPARE the SQL against the live `conn`. Wraps the PREPARE in
+    a savepoint so a parse failure rolls back JUST that statement,
+    not the surrounding transaction.
+    Uses an anonymous-prepared-statement-then-rollback pattern:
+    the PREPARE itself does parse-analysis; the savepoint rollback
+    cleans up the prepared statement so the name is reusable for
+    the next call.
+    `conn` must be a psycopg connection in a transaction (autocommit
+    off). The caller is responsible for connection lifecycle and
+    schema setup.
+    Raises if psycopg isn't available (the package is an optional
+    `[parse]` extra).
+    """
+    try:
+        import psycopg
+    except ImportError as e:  # pragma: no cover
+        raise RuntimeError(
+            "PARSE validation requires psycopg. "
+            "Install with: pip install 'waxsql[parse]'"
+        ) from e
+    # Savepoint name is a fixed identifier (`_waxsql_parse`) rather
+    # than per-call-unique because PREPARE/savepoint pairs are strictly
+    # sequential within a single check_parse call — the savepoint is
+    # always released before this function returns, so name collision
+    # across concurrent callers on the same connection is impossible
+    # (psycopg serializes statements on a single connection anyway).
+    with conn.cursor() as cur:
+        cur.execute("SAVEPOINT _waxsql_parse")
+        try:
+            # PREPARE forces full parse-analysis at PREPARE time (this
+            # is the whole point of using PREPARE rather than, say,
+            # EXPLAIN — we only want to pay for parsing, not planning,
+            # because the PLAN tier exists for the planner check).
+            cur.execute(f"PREPARE _waxsql_check AS {sql}")
+        except psycopg.Error as e:
+            # `e.diag.sqlstate` is the 5-char SQLSTATE code (e.g.
+            # "42703" for undefined_column). Far more useful for
+            # programmatic filtering than the human-readable message.
+            # Captured BEFORE cleanup so a cleanup failure can't lose it.
+            sqlstate = (
+                e.diag.sqlstate if hasattr(e, "diag") and e.diag else None
+            )
+            # Parse failed — savepoint rollback discards the failed
+            # statement and any partial state. The ROLLBACK + RELEASE
+            # ordering matters: ROLLBACK TO returns us to the pre-PREPARE
+            # state, then RELEASE removes the (now-empty) savepoint frame
+            # so the surrounding transaction's savepoint stack doesn't
+            # accumulate dead entries across many failed checks.
+            #
+            # Suppressed: if the connection died mid-PREPARE, the ROLLBACK
+            # itself raises — without suppression that secondary error
+            # would mask the original parse failure we're trying to
+            # report. The caller's outer transaction handling deals with a
+            # truly-dead connection; here we just make sure the original
+            # `e` is what propagates.
+            with contextlib.suppress(Exception):
+                cur.execute("ROLLBACK TO SAVEPOINT _waxsql_parse")
+                cur.execute("RELEASE SAVEPOINT _waxsql_parse")
+            return ParseResult(
+                ok=False,
+                error=str(e).strip(),
+                error_code=sqlstate,
+            )
+        # PREPARE succeeded. PG's transaction handling does NOT remove a
+        # PREPARE via savepoint rollback — verified empirically against
+        # PG 18.3 (and long-standing behavior across PG 12+): after
+        # `SAVEPOINT s; PREPARE p; ROLLBACK TO SAVEPOINT s`, re-PREPAREing
+        # `p` fails with SQLSTATE 42P05 (duplicate_prepared_statement), so
+        # the statement survived the rollback. See
+        # tests/test_parse.py::test_prepared_statement_survives_savepoint_rollback.
+        # Hence the explicit DEALLOCATE: on the success path we RELEASE
+        # (not roll back) the savepoint, and RELEASE never undoes the
+        # PREPARE, so without DEALLOCATE the name `_waxsql_check` would
+        # persist and the next call's PREPARE would collide. Order:
+        # DEALLOCATE before RELEASE — if we released first, the DEALLOCATE
+        # would still work but we'd have a brief window where the savepoint
+        # is gone but the prepared statement isn't, a worse failure mode if
+        # an interleaved error occurs.
+        cur.execute("DEALLOCATE _waxsql_check")
+        cur.execute("RELEASE SAVEPOINT _waxsql_parse")
+    return ParseResult(ok=True)
+def install_schema(schema: Schema, conn: psycopg.Connection) -> None:
+    """Execute the schema DDL against `conn`. The caller should run
+    this once per (seed, complexity) inside a transaction that's
+    later rolled back, so the schema doesn't persist between tests.
+    `schema` is a waxsql.Schema; `conn` is a psycopg connection.
+    This is the canonical high-level pattern WHEN THE CALLER HAS A
+    `Schema` OBJECT — library users, the conftest test fixture, and
+    the README examples. The CLI's `validate --tier {parse,plan}`
+    path deliberately does NOT call this function: `_resolve_schema_
+    source` returns a raw DDL string (because the `--schema-from
+    FILE` source has no Schema object behind it), and the CLI
+    executes that string directly via `cur.execute(ddl)`. Both
+    patterns are first-class and produce the same on-disk effect;
+    pick whichever matches what's in hand. If you're holding a
+    Schema, use `install_schema(schema, conn)`. If you're holding
+    a DDL string already, `cur.execute(ddl)` is fine. There is no
+    correctness difference between them.
+    """
+    # Single execute() of the entire DDL string: psycopg sends the
+    # whole multi-statement payload to PG, which parses each statement
+    # in turn. If any statement fails, the surrounding transaction
+    # aborts — but since the caller is expected to rollback at the
+    # end of the test anyway, that's the desired behavior. No
+    # savepoint here because schema setup is all-or-nothing: a
+    # partial schema isn't a useful state for the test harness.
+    ddl = schema.emit_ddl()
+    with conn.cursor() as cur:
+        cur.execute(ddl)
+__all__ = ["ParseResult", "check_parse", "install_schema"]

waxsql/validate/plan.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""Plan-tier validation via EXPLAIN against a live PostgreSQL.
+Role in the system: the strongest validation tier — strictly stronger
+than PARSE in catch-rate (any PARSE failure is also a PLAN failure)
+but roughly the same in cost on an empty schema, because EXPLAIN on
+zero rows is mostly tree construction. The CLI surfaces PLAN as
+`validate --tier plan`.
+Runs PG's planner on the SQL — full parse-analysis (everything PARSE
+catches) plus rewriting plus plan-tree construction. Catches the
+sliver of issues that fail at planning time but not at parse-analysis:
+  * Operator-class lookup failures for ORDER BY / DISTINCT /
+    GROUP BY (e.g. types with `=` but no `<` ordering operator)
+  * Some statistics-dependent type-coercion edges
+  * Permission failures (auth happens at planning, not parse)
+  * Cost-estimation issues that prevent plan-tree construction
+Does NOT catch runtime errors (division by zero, type-cast failures
+on actual data values, scalar-subquery cardinality violations).
+EXPLAIN doesn't execute; for those, EXPLAIN ANALYZE would be needed
+— and that requires data in the schema, which waxsql deliberately
+doesn't generate.
+Cost: roughly the same as PARSE on a small schema with no rows —
+EXPLAIN is mostly statistics lookup and tree construction, both
+cheap when there are no real rows to estimate against.
+The savepoint-around-EXPLAIN pattern matches `parse.py`'s — a
+single failing query rolls back JUST that statement, not the
+schema setup or prior successes. Without the savepoint, the first
+planner error would put the surrounding transaction into the
+aborted state, and every subsequent EXPLAIN would error out before
+PG even tried to plan it, losing all signal beyond the first
+failure. Unlike PARSE, EXPLAIN doesn't create a prepared statement,
+so no DEALLOCATE step is needed (the savepoint release is
+sufficient cleanup).
+Statistics note (per ARCHITECTURE.md Pillar 4): when the input header
+carries `with-data=true`, the test harness is expected to load
+COPY blocks and run ANALYZE BEFORE calling `check_plan`, so the
+EXPLAIN here reflects real statistics rather than empty-table
+defaults — without ANALYZE, plans collapse to seq-scans-only and
+lose all signal for join order, index choice, etc. The data
+loading and ANALYZE invocation live in the caller (CLI / harness),
+not in this module; `check_plan` is intentionally just the EXPLAIN +
+savepoint dance.
+"""
+from __future__ import annotations
+import contextlib
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    # See `validate/parse.py` for the rationale: TYPE_CHECKING-gated
+    # import gives mypy the real type without forcing psycopg as a
+    # runtime dep for SYNTAX-tier-only users.
+    import psycopg
+@dataclass(frozen=True)
+class PlanResult:
+    """The outcome of a single PLAN-tier check.
+    `ok=True` means PG's planner produced a complete plan tree.
+    `ok=False` carries the PG error message; `error_code` carries
+    the SQLSTATE if available.
+    Same shape as ParseResult so callers (and aggregation tests)
+    can use the two interchangeably — a PlanResult is just a
+    ParseResult that ran the planner too.
+    """
+    ok: bool
+    error: Optional[str] = None
+    error_code: Optional[str] = None
+def check_plan(sql: str, conn: psycopg.Connection) -> PlanResult:
+    """EXPLAIN the SQL against the live `conn`. Wraps the EXPLAIN
+    in a savepoint so a planner failure rolls back JUST that
+    statement, not the surrounding transaction.
+    EXPLAIN goes through parse-analysis AND rewriting AND planning,
+    so it catches everything PARSE catches plus planner-side issues.
+    The query is NOT executed — `EXPLAIN ANALYZE` would do that, but
+    runtime errors are out of scope here (waxsql generates queries
+    against an empty schema, where most runtime errors don't fire
+    because zero rows means no per-row evaluation).
+    `conn` must be a psycopg connection in a transaction (autocommit
+    off). The caller is responsible for connection lifecycle and
+    schema setup — typically reusing `parse.install_schema`.
+    Raises if psycopg isn't available.
+    """
+    try:
+        import psycopg
+    except ImportError as e:  # pragma: no cover
+        raise RuntimeError(
+            "PLAN validation requires psycopg. "
+            "Install with: pip install 'waxsql[plan]'"
+        ) from e
+    # Savepoint name matches the parse.py pattern (fixed identifier,
+    # not per-call-unique) for the same reason: psycopg serializes
+    # statements on a connection, and the savepoint is released
+    # before this function returns, so collision is impossible.
+    with conn.cursor() as cur:
+        cur.execute("SAVEPOINT _waxsql_plan")
+        try:
+            # Plain EXPLAIN — no ANALYZE, no BUFFERS, no FORMAT JSON.
+            # We only care that planning succeeds; the plan text itself
+            # is uninteresting (and discarded — psycopg buffers result
+            # rows but doesn't fetch them unless asked).
+            #
+            # No EXPLAIN ANALYZE: that would execute the query, which
+            # could throw runtime errors (div-by-zero, cast failures)
+            # that are out of scope for this tier. It would also pay
+            # for real execution on every check — orders of magnitude
+            # more expensive than plan-only EXPLAIN.
+            cur.execute(f"EXPLAIN {sql}")
+        except psycopg.Error as e:
+            # SQLSTATE provenance mirrors parse.py — `e.diag.sqlstate`
+            # is the 5-char code (e.g. "42883" for undefined_function).
+            # Captured BEFORE cleanup so a cleanup failure can't lose it.
+            sqlstate = (
+                e.diag.sqlstate if hasattr(e, "diag") and e.diag else None
+            )
+            # Planning failed — savepoint rollback discards the failed
+            # statement. EXPLAIN doesn't create persistent state on
+            # success either, so no DEALLOCATE needed in the success path.
+            #
+            # Suppressed for the same reason as parse.py: if the connection
+            # died mid-EXPLAIN, the ROLLBACK itself raises and would
+            # otherwise mask the original planner error we're reporting.
+            with contextlib.suppress(Exception):
+                cur.execute("ROLLBACK TO SAVEPOINT _waxsql_plan")
+                cur.execute("RELEASE SAVEPOINT _waxsql_plan")
+            return PlanResult(
+                ok=False,
+                error=str(e).strip(),
+                error_code=sqlstate,
+            )
+        cur.execute("RELEASE SAVEPOINT _waxsql_plan")
+    return PlanResult(ok=True)
+__all__ = ["PlanResult", "check_plan"]

waxsql/validate/syntax.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""Syntax validation via pglast (libpg_query bindings).
+Role in the system: the fast inner-loop validator used by the round-trip
+test pattern (Pillar 3). Every parametrized test that generates SQL
+pipes the output through `check_syntax` and asserts ok=True — this is
+how the generator catches malformed token streams the moment a printer
+or generator regression appears, without paying for a live PG.
+This is the cheapest and most universally available validation layer:
+it requires nothing beyond the pglast wheel — no PostgreSQL server, no
+network, no per-query cost worth measuring. It catches the entire class
+of generator bugs that produce malformed token streams.
+pglast is pinned to v7 (libpg_query for PG17). v8 (PG18) is in
+development on the upstream `lelit/pglast` v8 branch but not on PyPI
+as of May 2026 — when it ships, bump the `pyproject.toml` pin and
+re-run the suite. The version pin is load-bearing: libpg_query is
+the actual PostgreSQL parser compiled as a static library, so the
+grammar accepted here is exactly the grammar PG accepts (modulo
+version skew, which is why we pin).
+What it does NOT catch:
+  * Undefined column or table references
+  * Type mismatches
+  * Aggregates in WHERE
+  * Anything requiring catalog lookup
+  * Operator-class lookup failures, constant-foldable runtime errors
+For those, use the PARSE tier (`waxsql.validate.parse.check_parse`),
+which runs the live PG parser via PREPARE, or the PLAN tier
+(`waxsql.validate.plan.check_plan`), which runs the planner via
+EXPLAIN. Both ship at v1.0; both require psycopg and a live PG.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+# Frozen dataclass per project convention (Conventions section of
+# ARCHITECTURE.md): hashable, can't accidentally mutate, cheap to pass.
+# Same general shape as ParseResult/PlanResult so callers can branch
+# on `.ok` uniformly, but this tier carries an `error_position`
+# because pglast surfaces a 1-based character offset on failure —
+# the live-DB tiers carry a SQLSTATE instead (different provenance).
+@dataclass(frozen=True)
+class SyntaxResult:
+    ok: bool
+    error: Optional[str] = None
+    error_position: Optional[int] = None  # 1-based char offset, if available
+def check_syntax(sql: str) -> SyntaxResult:
+    """Parse `sql` via pglast. Returns SyntaxResult with ok=False on any
+    parse error.
+    Raises RuntimeError if pglast is not installed — install with the
+    `syntax` extra: `pip install 'waxsql[syntax]'`.
+    """
+    # Lazy import: pglast is technically optional (`[syntax]` extra),
+    # so importing at module-load would force the dependency on every
+    # consumer of `ValidationMode`, including callers who only ever
+    # use NONE. Deferring keeps the import cost off the hot path for
+    # users who don't need syntax validation.
+    try:
+        from pglast import parse_sql
+    except ImportError as e:  # pragma: no cover
+        raise RuntimeError(
+            "Syntax validation requires pglast. "
+            "Install with: pip install 'waxsql[syntax]'"
+        ) from e
+    try:
+        parse_sql(sql)
+    except Exception as e:
+        # Broad `Exception` catch is deliberate: pglast normally raises
+        # ParseError, but version skew or upstream changes could surface
+        # other exception types. Masking them as "non-ok with the
+        # exception text" beats letting an unrelated exception type
+        # punch through and crash the test harness.
+        # pglast raises pglast.parser.ParseError with a `.location` attr
+        # (1-based character offset) on parse failure. Other exception
+        # types are unexpected but worth surfacing rather than masking.
+        loc = getattr(e, "location", None)
+        return SyntaxResult(ok=False, error=str(e), error_position=loc)
+    return SyntaxResult(ok=True)