waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,194 @@
1
+ """Parse-tier validation via PREPARE against a live PostgreSQL.
2
+
3
+ Role in the system: the middle tier — strictly stronger than SYNTAX,
4
+ strictly cheaper than PLAN. Used when the test harness has a live
5
+ DB available and wants to catch the entire class of name/type
6
+ errors that pglast silently passes.
7
+
8
+ Runs full PG parse-analysis: column resolution, type checking,
9
+ aggregate-context rules, function lookup, view validity. Catches
10
+ the entire class of "PG accepts at SYNTAX tier but rejects at
11
+ parse-analysis" issues that pglast (libpg_query) silently passes:
12
+
13
+ * Column refs to undefined aliases or non-existent columns
14
+ * Aggregates in disallowed contexts (WHERE, JOIN ON, aggregate args)
15
+ * Window functions in disallowed contexts (WHERE, HAVING, agg args)
16
+ * GROUP BY consistency violations
17
+ * Type mismatches (e.g. `int = uuid` with no implicit cast)
18
+ * Set-op arms with mismatched column counts/types
19
+
20
+ What it does NOT catch:
21
+
22
+ * Operator-class lookup failures for ORDER BY / DISTINCT /
23
+ GROUP BY (deferred to PLAN tier — would need EXPLAIN)
24
+ * Runtime errors (division by zero, type cast failures on
25
+ actual data, etc.)
26
+
27
+ Cost: roughly 1ms per query on a local PG with the schema already
28
+ loaded. Schema setup itself is ~1ms per CREATE TABLE plus FK/index
29
+ DDL — typically 50-300ms total for a milestone-sized schema.
30
+
31
+ The savepoint-around-PREPARE pattern is what lets a single failing
32
+ query not poison the surrounding transaction's state (the schema
33
+ setup, prior PREPAREs, ...). Without it, the first parse error
34
+ would put the surrounding transaction into an aborted state and
35
+ every subsequent check in the batch would error with "current
36
+ transaction is aborted, commands ignored until end of transaction
37
+ block" — losing all signal beyond the first failure. The test
38
+ harness owns the connection and transaction; this module is a thin
39
+ wrapper around the PREPARE + savepoint dance.
40
+ """
41
+ from __future__ import annotations
42
+
43
+ import contextlib
44
+ from dataclasses import dataclass
45
+ from typing import TYPE_CHECKING, Optional
46
+
47
+ if TYPE_CHECKING:
48
+ # psycopg is an OPTIONAL runtime dep (only required when callers
49
+ # actually use the PARSE tier); but for static type-checkers we
50
+ # want the annotations to carry the real types. The TYPE_CHECKING
51
+ # block is False at runtime (so no import happens for users of
52
+ # SYNTAX-tier only) but True for mypy/pyright. The `from __future__
53
+ # import annotations` above makes all annotations lazy strings,
54
+ # so the runtime never tries to resolve `psycopg.Connection`.
55
+ import psycopg
56
+
57
+ from ..schema import Schema
58
+
59
+
60
+ @dataclass(frozen=True)
61
+ class ParseResult:
62
+ """The outcome of a single PARSE-tier check.
63
+
64
+ `ok=True` means PG accepted the SQL at parse-analysis time.
65
+ `ok=False` carries the PG error message; `error_code` carries
66
+ the SQLSTATE if the error came from psycopg's structured error
67
+ object (mostly always available for parse failures).
68
+ """
69
+ ok: bool
70
+ error: Optional[str] = None
71
+ error_code: Optional[str] = None
72
+
73
+
74
+ def check_parse(sql: str, conn: psycopg.Connection) -> ParseResult:
75
+ """PREPARE the SQL against the live `conn`. Wraps the PREPARE in
76
+ a savepoint so a parse failure rolls back JUST that statement,
77
+ not the surrounding transaction.
78
+
79
+ Uses an anonymous-prepared-statement-then-rollback pattern:
80
+ the PREPARE itself does parse-analysis; the savepoint rollback
81
+ cleans up the prepared statement so the name is reusable for
82
+ the next call.
83
+
84
+ `conn` must be a psycopg connection in a transaction (autocommit
85
+ off). The caller is responsible for connection lifecycle and
86
+ schema setup.
87
+
88
+ Raises if psycopg isn't available (the package is an optional
89
+ `[parse]` extra).
90
+ """
91
+ try:
92
+ import psycopg
93
+ except ImportError as e: # pragma: no cover
94
+ raise RuntimeError(
95
+ "PARSE validation requires psycopg. "
96
+ "Install with: pip install 'waxsql[parse]'"
97
+ ) from e
98
+
99
+ # Savepoint name is a fixed identifier (`_waxsql_parse`) rather
100
+ # than per-call-unique because PREPARE/savepoint pairs are strictly
101
+ # sequential within a single check_parse call — the savepoint is
102
+ # always released before this function returns, so name collision
103
+ # across concurrent callers on the same connection is impossible
104
+ # (psycopg serializes statements on a single connection anyway).
105
+ with conn.cursor() as cur:
106
+ cur.execute("SAVEPOINT _waxsql_parse")
107
+ try:
108
+ # PREPARE forces full parse-analysis at PREPARE time (this
109
+ # is the whole point of using PREPARE rather than, say,
110
+ # EXPLAIN — we only want to pay for parsing, not planning,
111
+ # because the PLAN tier exists for the planner check).
112
+ cur.execute(f"PREPARE _waxsql_check AS {sql}")
113
+ except psycopg.Error as e:
114
+ # `e.diag.sqlstate` is the 5-char SQLSTATE code (e.g.
115
+ # "42703" for undefined_column). Far more useful for
116
+ # programmatic filtering than the human-readable message.
117
+ # Captured BEFORE cleanup so a cleanup failure can't lose it.
118
+ sqlstate = (
119
+ e.diag.sqlstate if hasattr(e, "diag") and e.diag else None
120
+ )
121
+ # Parse failed — savepoint rollback discards the failed
122
+ # statement and any partial state. The ROLLBACK + RELEASE
123
+ # ordering matters: ROLLBACK TO returns us to the pre-PREPARE
124
+ # state, then RELEASE removes the (now-empty) savepoint frame
125
+ # so the surrounding transaction's savepoint stack doesn't
126
+ # accumulate dead entries across many failed checks.
127
+ #
128
+ # Suppressed: if the connection died mid-PREPARE, the ROLLBACK
129
+ # itself raises — without suppression that secondary error
130
+ # would mask the original parse failure we're trying to
131
+ # report. The caller's outer transaction handling deals with a
132
+ # truly-dead connection; here we just make sure the original
133
+ # `e` is what propagates.
134
+ with contextlib.suppress(Exception):
135
+ cur.execute("ROLLBACK TO SAVEPOINT _waxsql_parse")
136
+ cur.execute("RELEASE SAVEPOINT _waxsql_parse")
137
+ return ParseResult(
138
+ ok=False,
139
+ error=str(e).strip(),
140
+ error_code=sqlstate,
141
+ )
142
+ # PREPARE succeeded. PG's transaction handling does NOT remove a
143
+ # PREPARE via savepoint rollback — verified empirically against
144
+ # PG 18.3 (and long-standing behavior across PG 12+): after
145
+ # `SAVEPOINT s; PREPARE p; ROLLBACK TO SAVEPOINT s`, re-PREPAREing
146
+ # `p` fails with SQLSTATE 42P05 (duplicate_prepared_statement), so
147
+ # the statement survived the rollback. See
148
+ # tests/test_parse.py::test_prepared_statement_survives_savepoint_rollback.
149
+ # Hence the explicit DEALLOCATE: on the success path we RELEASE
150
+ # (not roll back) the savepoint, and RELEASE never undoes the
151
+ # PREPARE, so without DEALLOCATE the name `_waxsql_check` would
152
+ # persist and the next call's PREPARE would collide. Order:
153
+ # DEALLOCATE before RELEASE — if we released first, the DEALLOCATE
154
+ # would still work but we'd have a brief window where the savepoint
155
+ # is gone but the prepared statement isn't, a worse failure mode if
156
+ # an interleaved error occurs.
157
+ cur.execute("DEALLOCATE _waxsql_check")
158
+ cur.execute("RELEASE SAVEPOINT _waxsql_parse")
159
+ return ParseResult(ok=True)
160
+
161
+
162
+ def install_schema(schema: Schema, conn: psycopg.Connection) -> None:
163
+ """Execute the schema DDL against `conn`. The caller should run
164
+ this once per (seed, complexity) inside a transaction that's
165
+ later rolled back, so the schema doesn't persist between tests.
166
+
167
+ `schema` is a waxsql.Schema; `conn` is a psycopg connection.
168
+
169
+ This is the canonical high-level pattern WHEN THE CALLER HAS A
170
+ `Schema` OBJECT — library users, the conftest test fixture, and
171
+ the README examples. The CLI's `validate --tier {parse,plan}`
172
+ path deliberately does NOT call this function: `_resolve_schema_
173
+ source` returns a raw DDL string (because the `--schema-from
174
+ FILE` source has no Schema object behind it), and the CLI
175
+ executes that string directly via `cur.execute(ddl)`. Both
176
+ patterns are first-class and produce the same on-disk effect;
177
+ pick whichever matches what's in hand. If you're holding a
178
+ Schema, use `install_schema(schema, conn)`. If you're holding
179
+ a DDL string already, `cur.execute(ddl)` is fine. There is no
180
+ correctness difference between them.
181
+ """
182
+ # Single execute() of the entire DDL string: psycopg sends the
183
+ # whole multi-statement payload to PG, which parses each statement
184
+ # in turn. If any statement fails, the surrounding transaction
185
+ # aborts — but since the caller is expected to rollback at the
186
+ # end of the test anyway, that's the desired behavior. No
187
+ # savepoint here because schema setup is all-or-nothing: a
188
+ # partial schema isn't a useful state for the test harness.
189
+ ddl = schema.emit_ddl()
190
+ with conn.cursor() as cur:
191
+ cur.execute(ddl)
192
+
193
+
194
+ __all__ = ["ParseResult", "check_parse", "install_schema"]
@@ -0,0 +1,149 @@
1
+ """Plan-tier validation via EXPLAIN against a live PostgreSQL.
2
+
3
+ Role in the system: the strongest validation tier — strictly stronger
4
+ than PARSE in catch-rate (any PARSE failure is also a PLAN failure)
5
+ but roughly the same in cost on an empty schema, because EXPLAIN on
6
+ zero rows is mostly tree construction. The CLI surfaces PLAN as
7
+ `validate --tier plan`.
8
+
9
+ Runs PG's planner on the SQL — full parse-analysis (everything PARSE
10
+ catches) plus rewriting plus plan-tree construction. Catches the
11
+ sliver of issues that fail at planning time but not at parse-analysis:
12
+
13
+ * Operator-class lookup failures for ORDER BY / DISTINCT /
14
+ GROUP BY (e.g. types with `=` but no `<` ordering operator)
15
+ * Some statistics-dependent type-coercion edges
16
+ * Permission failures (auth happens at planning, not parse)
17
+ * Cost-estimation issues that prevent plan-tree construction
18
+
19
+ Does NOT catch runtime errors (division by zero, type-cast failures
20
+ on actual data values, scalar-subquery cardinality violations).
21
+ EXPLAIN doesn't execute; for those, EXPLAIN ANALYZE would be needed
22
+ — and that requires data in the schema, which waxsql deliberately
23
+ doesn't generate.
24
+
25
+ Cost: roughly the same as PARSE on a small schema with no rows —
26
+ EXPLAIN is mostly statistics lookup and tree construction, both
27
+ cheap when there are no real rows to estimate against.
28
+
29
+ The savepoint-around-EXPLAIN pattern matches `parse.py`'s — a
30
+ single failing query rolls back JUST that statement, not the
31
+ schema setup or prior successes. Without the savepoint, the first
32
+ planner error would put the surrounding transaction into the
33
+ aborted state, and every subsequent EXPLAIN would error out before
34
+ PG even tried to plan it, losing all signal beyond the first
35
+ failure. Unlike PARSE, EXPLAIN doesn't create a prepared statement,
36
+ so no DEALLOCATE step is needed (the savepoint release is
37
+ sufficient cleanup).
38
+
39
+ Statistics note (per ARCHITECTURE.md Pillar 4): when the input header
40
+ carries `with-data=true`, the test harness is expected to load
41
+ COPY blocks and run ANALYZE BEFORE calling `check_plan`, so the
42
+ EXPLAIN here reflects real statistics rather than empty-table
43
+ defaults — without ANALYZE, plans collapse to seq-scans-only and
44
+ lose all signal for join order, index choice, etc. The data
45
+ loading and ANALYZE invocation live in the caller (CLI / harness),
46
+ not in this module; `check_plan` is intentionally just the EXPLAIN +
47
+ savepoint dance.
48
+ """
49
+ from __future__ import annotations
50
+
51
+ import contextlib
52
+ from dataclasses import dataclass
53
+ from typing import TYPE_CHECKING, Optional
54
+
55
+ if TYPE_CHECKING:
56
+ # See `validate/parse.py` for the rationale: TYPE_CHECKING-gated
57
+ # import gives mypy the real type without forcing psycopg as a
58
+ # runtime dep for SYNTAX-tier-only users.
59
+ import psycopg
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class PlanResult:
64
+ """The outcome of a single PLAN-tier check.
65
+
66
+ `ok=True` means PG's planner produced a complete plan tree.
67
+ `ok=False` carries the PG error message; `error_code` carries
68
+ the SQLSTATE if available.
69
+
70
+ Same shape as ParseResult so callers (and aggregation tests)
71
+ can use the two interchangeably — a PlanResult is just a
72
+ ParseResult that ran the planner too.
73
+ """
74
+ ok: bool
75
+ error: Optional[str] = None
76
+ error_code: Optional[str] = None
77
+
78
+
79
+ def check_plan(sql: str, conn: psycopg.Connection) -> PlanResult:
80
+ """EXPLAIN the SQL against the live `conn`. Wraps the EXPLAIN
81
+ in a savepoint so a planner failure rolls back JUST that
82
+ statement, not the surrounding transaction.
83
+
84
+ EXPLAIN goes through parse-analysis AND rewriting AND planning,
85
+ so it catches everything PARSE catches plus planner-side issues.
86
+ The query is NOT executed — `EXPLAIN ANALYZE` would do that, but
87
+ runtime errors are out of scope here (waxsql generates queries
88
+ against an empty schema, where most runtime errors don't fire
89
+ because zero rows means no per-row evaluation).
90
+
91
+ `conn` must be a psycopg connection in a transaction (autocommit
92
+ off). The caller is responsible for connection lifecycle and
93
+ schema setup — typically reusing `parse.install_schema`.
94
+
95
+ Raises if psycopg isn't available.
96
+ """
97
+ try:
98
+ import psycopg
99
+ except ImportError as e: # pragma: no cover
100
+ raise RuntimeError(
101
+ "PLAN validation requires psycopg. "
102
+ "Install with: pip install 'waxsql[plan]'"
103
+ ) from e
104
+
105
+ # Savepoint name matches the parse.py pattern (fixed identifier,
106
+ # not per-call-unique) for the same reason: psycopg serializes
107
+ # statements on a connection, and the savepoint is released
108
+ # before this function returns, so collision is impossible.
109
+ with conn.cursor() as cur:
110
+ cur.execute("SAVEPOINT _waxsql_plan")
111
+ try:
112
+ # Plain EXPLAIN — no ANALYZE, no BUFFERS, no FORMAT JSON.
113
+ # We only care that planning succeeds; the plan text itself
114
+ # is uninteresting (and discarded — psycopg buffers result
115
+ # rows but doesn't fetch them unless asked).
116
+ #
117
+ # No EXPLAIN ANALYZE: that would execute the query, which
118
+ # could throw runtime errors (div-by-zero, cast failures)
119
+ # that are out of scope for this tier. It would also pay
120
+ # for real execution on every check — orders of magnitude
121
+ # more expensive than plan-only EXPLAIN.
122
+ cur.execute(f"EXPLAIN {sql}")
123
+ except psycopg.Error as e:
124
+ # SQLSTATE provenance mirrors parse.py — `e.diag.sqlstate`
125
+ # is the 5-char code (e.g. "42883" for undefined_function).
126
+ # Captured BEFORE cleanup so a cleanup failure can't lose it.
127
+ sqlstate = (
128
+ e.diag.sqlstate if hasattr(e, "diag") and e.diag else None
129
+ )
130
+ # Planning failed — savepoint rollback discards the failed
131
+ # statement. EXPLAIN doesn't create persistent state on
132
+ # success either, so no DEALLOCATE needed in the success path.
133
+ #
134
+ # Suppressed for the same reason as parse.py: if the connection
135
+ # died mid-EXPLAIN, the ROLLBACK itself raises and would
136
+ # otherwise mask the original planner error we're reporting.
137
+ with contextlib.suppress(Exception):
138
+ cur.execute("ROLLBACK TO SAVEPOINT _waxsql_plan")
139
+ cur.execute("RELEASE SAVEPOINT _waxsql_plan")
140
+ return PlanResult(
141
+ ok=False,
142
+ error=str(e).strip(),
143
+ error_code=sqlstate,
144
+ )
145
+ cur.execute("RELEASE SAVEPOINT _waxsql_plan")
146
+ return PlanResult(ok=True)
147
+
148
+
149
+ __all__ = ["PlanResult", "check_plan"]
@@ -0,0 +1,87 @@
1
+ """Syntax validation via pglast (libpg_query bindings).
2
+
3
+ Role in the system: the fast inner-loop validator used by the round-trip
4
+ test pattern (Pillar 3). Every parametrized test that generates SQL
5
+ pipes the output through `check_syntax` and asserts ok=True — this is
6
+ how the generator catches malformed token streams the moment a printer
7
+ or generator regression appears, without paying for a live PG.
8
+
9
+ This is the cheapest and most universally available validation layer:
10
+ it requires nothing beyond the pglast wheel — no PostgreSQL server, no
11
+ network, no per-query cost worth measuring. It catches the entire class
12
+ of generator bugs that produce malformed token streams.
13
+
14
+ pglast is pinned to v7 (libpg_query for PG17). v8 (PG18) is in
15
+ development on the upstream `lelit/pglast` v8 branch but not on PyPI
16
+ as of May 2026 — when it ships, bump the `pyproject.toml` pin and
17
+ re-run the suite. The version pin is load-bearing: libpg_query is
18
+ the actual PostgreSQL parser compiled as a static library, so the
19
+ grammar accepted here is exactly the grammar PG accepts (modulo
20
+ version skew, which is why we pin).
21
+
22
+ What it does NOT catch:
23
+
24
+ * Undefined column or table references
25
+ * Type mismatches
26
+ * Aggregates in WHERE
27
+ * Anything requiring catalog lookup
28
+ * Operator-class lookup failures, constant-foldable runtime errors
29
+
30
+ For those, use the PARSE tier (`waxsql.validate.parse.check_parse`),
31
+ which runs the live PG parser via PREPARE, or the PLAN tier
32
+ (`waxsql.validate.plan.check_plan`), which runs the planner via
33
+ EXPLAIN. Both ship at v1.0; both require psycopg and a live PG.
34
+ """
35
+ from __future__ import annotations
36
+
37
+ from dataclasses import dataclass
38
+ from typing import Optional
39
+
40
+
41
+ # Frozen dataclass per project convention (Conventions section of
42
+ # ARCHITECTURE.md): hashable, can't accidentally mutate, cheap to pass.
43
+ # Same general shape as ParseResult/PlanResult so callers can branch
44
+ # on `.ok` uniformly, but this tier carries an `error_position`
45
+ # because pglast surfaces a 1-based character offset on failure —
46
+ # the live-DB tiers carry a SQLSTATE instead (different provenance).
47
+ @dataclass(frozen=True)
48
+ class SyntaxResult:
49
+ ok: bool
50
+ error: Optional[str] = None
51
+ error_position: Optional[int] = None # 1-based char offset, if available
52
+
53
+
54
+ def check_syntax(sql: str) -> SyntaxResult:
55
+ """Parse `sql` via pglast. Returns SyntaxResult with ok=False on any
56
+ parse error.
57
+
58
+ Raises RuntimeError if pglast is not installed — install with the
59
+ `syntax` extra: `pip install 'waxsql[syntax]'`.
60
+ """
61
+ # Lazy import: pglast is technically optional (`[syntax]` extra),
62
+ # so importing at module-load would force the dependency on every
63
+ # consumer of `ValidationMode`, including callers who only ever
64
+ # use NONE. Deferring keeps the import cost off the hot path for
65
+ # users who don't need syntax validation.
66
+ try:
67
+ from pglast import parse_sql
68
+ except ImportError as e: # pragma: no cover
69
+ raise RuntimeError(
70
+ "Syntax validation requires pglast. "
71
+ "Install with: pip install 'waxsql[syntax]'"
72
+ ) from e
73
+
74
+ try:
75
+ parse_sql(sql)
76
+ except Exception as e:
77
+ # Broad `Exception` catch is deliberate: pglast normally raises
78
+ # ParseError, but version skew or upstream changes could surface
79
+ # other exception types. Masking them as "non-ok with the
80
+ # exception text" beats letting an unrelated exception type
81
+ # punch through and crash the test harness.
82
+ # pglast raises pglast.parser.ParseError with a `.location` attr
83
+ # (1-based character offset) on parse failure. Other exception
84
+ # types are unexpected but worth surfacing rather than masking.
85
+ loc = getattr(e, "location", None)
86
+ return SyntaxResult(ok=False, error=str(e), error_position=loc)
87
+ return SyntaxResult(ok=True)