waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waxsql/scope.py ADDED
@@ -0,0 +1,391 @@
1
+ """Binding stack for query generation.
2
+
3
+ A `Scope` answers two questions for the expression generator:
4
+
5
+ 1. "Which columns can I reference here?" — `visible_columns(of_type)`.
6
+ 2. "Which tables are in the current FROM clause, and under what
7
+ aliases?" — `aliased_tables()` (used by the SELECT generator to
8
+ bias JOIN conditions toward FK-related tables).
9
+
10
+ Scopes form a parent chain. A nested subquery's scope points at the
11
+ outer query's scope; whether the lookup walks up the chain depends on
12
+ whether the subquery is correlated (and, equivalently for FROM-clause
13
+ subqueries, whether it's LATERAL). This is the same mechanism that
14
+ PostgreSQL's parser uses internally for name resolution; modelling it
15
+ the same way means the generator's notion of "what's visible" matches
16
+ PostgreSQL's notion when we eventually wire up PARSE-level validation.
17
+
18
+ The aggregate / GROUP BY / window flags do NOT live here. Those are
19
+ expression-context state (they flip per-call inside an aggregate
20
+ argument, etc.) and belong on GenContext. Scope is purely about
21
+ binding visibility.
22
+
23
+ There's no explicit pop. Scopes nest via parent pointers, and a
24
+ "popped" scope is one the caller simply stops referencing — typically
25
+ by returning from the function that built it, or by replacing the
26
+ GenContext.scope field with the parent. This is enforced structurally
27
+ because GenContext is frozen: `descend_subquery` produces a NEW
28
+ GenContext with a child scope, and once that GenContext goes out of
29
+ scope, the child scope does too. The discipline is "every scope
30
+ borrowed for a subquery is local to that subquery's generation call".
31
+ Violating this means stale bindings leak into sibling generation —
32
+ the prototypical "I see columns from a sibling I shouldn't see" bug.
33
+
34
+ Two related visibility mechanisms:
35
+
36
+ * CTE table-level visibility. A `_cte_defs` dict on each scope holds
37
+ CTE definitions; CTE lookup walks the chain unconditionally — CTEs
38
+ are visible regardless of correlation.
39
+
40
+ * Subquery support. `push_subquery(correlated=...)` creates a child
41
+ scope; the `correlated` flag at construction time decides whether
42
+ parent-chain column lookups walk past this level.
43
+
44
+ What's deliberately not modeled:
45
+
46
+ * Nullability propagation through outer joins. Bindings carry their
47
+ declared `nullable` flag, but the generator currently treats
48
+ everything as potentially-NULL anyway. Refining this requires
49
+ join-tree analysis at generation time and is its own piece of work.
50
+ """
51
+ from __future__ import annotations
52
+
53
+ from dataclasses import dataclass
54
+ from typing import Optional
55
+
56
+ from .schema import Table
57
+ from .types import PgType, implicitly_castable
58
+
59
+
60
+ @dataclass(frozen=True)
61
+ class Binding:
62
+ """One column visible at a particular scope level.
63
+
64
+ `table_alias` matches the alias used in the FROM clause (not the
65
+ underlying table name) — see the printer's ColumnRef handling for
66
+ why we always reference columns through the alias.
67
+ """
68
+ table_alias: str
69
+ column: str
70
+ type: PgType
71
+ nullable: bool
72
+
73
+
74
+ class Scope:
75
+ """Mutable binding container with an immutable parent link.
76
+
77
+ A fresh Scope has no bindings; the caller adds them as it processes
78
+ the FROM clause. Use `push_subquery(...)` to create a child scope
79
+ when entering a subquery; the parent is kept alive (and visible,
80
+ if `correlated=True`) for the lifetime of the child.
81
+
82
+ The `correlated` flag is set once at construction and not changed:
83
+ it reflects what kind of nesting created this scope. For LATERAL
84
+ FROM-subqueries and most expression-position subqueries, pass
85
+ `correlated=True`; for non-LATERAL FROM-subqueries that may not
86
+ reference their siblings, pass `correlated=False`.
87
+
88
+ Scope is NOT frozen, by deliberate exception to the project-wide
89
+ immutability convention. The reason: binding lists grow over a
90
+ single FROM-clause pass — `add_table` is called once per FROM
91
+ item — and rebuilding the whole Scope object for each addition
92
+ would force the caller into an awkward fold-style loop. The
93
+ mutation is confined to one writer (the FROM-clause builder) per
94
+ scope instance, and the parent link is set in __init__ and never
95
+ changed, so the parent-chain walk remains effectively immutable.
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ parent: Optional["Scope"] = None,
101
+ *,
102
+ correlated: bool = True,
103
+ ) -> None:
104
+ self._parent = parent
105
+ self._correlated = correlated
106
+ # Bindings in insertion order. The expression generator picks
107
+ # weighted-randomly from this list, so the order has to be
108
+ # stable across runs. Insertion order is naturally stable when
109
+ # `add_table` / `add_derived` is called in a deterministic
110
+ # sequence.
111
+ self._bindings: list[Binding] = []
112
+ # alias -> Table-or-None, in insertion order. Base-table
113
+ # aliases get their Table; derived-table aliases get None
114
+ # (derived tables don't have an underlying base Table to
115
+ # carry, but the alias still occupies a slot for collision
116
+ # detection and for ordered enumeration).
117
+ #
118
+ # Used by the SELECT generator's join-condition FK biasing
119
+ # (which filters out the derived-None entries since FKs only
120
+ # exist on base tables).
121
+ self._aliases: list[tuple[str, Optional[Table]]] = []
122
+ # CTE definitions visible at this scope level. Maps CTE name
123
+ # to its column info — list of (col_name, col_type) pairs.
124
+ # Bindings aren't stored here because the table_alias on a
125
+ # binding depends on the LOCAL alias used in a future CteRef,
126
+ # which doesn't exist yet at CTE-definition time. The CteRef-
127
+ # add-to-scope step constructs Bindings with the right alias
128
+ # via `add_derived`.
129
+ #
130
+ # Lookup walks the parent chain UNCONDITIONALLY — CTEs are
131
+ # visible from any nested scope regardless of `_correlated`.
132
+ # That flag gates column visibility, not CTE visibility:
133
+ # those are two separate static-scoping rules in PG.
134
+ self._cte_defs: dict[str, list[tuple[str, PgType]]] = {}
135
+
136
+ # -- mutation -----------------------------------------------------------
137
+
138
+ def add_table(self, alias: str, table: Table) -> None:
139
+ """Register `table` under `alias` and add all its columns as
140
+ bindings visible at this scope level.
141
+
142
+ Aliases must be unique within a single Scope (PostgreSQL
143
+ enforces this for FROM-clause aliases). The check is intended
144
+ to catch generator bugs, not to police user input. Both base
145
+ and derived aliases participate in the uniqueness check.
146
+ """
147
+ if any(a == alias for a, _ in self._aliases):
148
+ raise ValueError(f"alias {alias!r} already in scope")
149
+ self._aliases.append((alias, table))
150
+ for col in table.columns:
151
+ self._bindings.append(Binding(
152
+ table_alias=alias,
153
+ column=col.name,
154
+ type=col.type,
155
+ nullable=col.nullable,
156
+ ))
157
+
158
+ def add_derived(
159
+ self,
160
+ alias: str,
161
+ columns: list[tuple[str, PgType]],
162
+ ) -> None:
163
+ """Register a derived-table alias whose columns come from a
164
+ FROM-clause subquery's targets, not a base Table.
165
+
166
+ `columns` is a list of (column_name, column_type) pairs —
167
+ typically one entry per inner SELECT target. The generator
168
+ uses synthetic column names (`c1`, `c2`, ...) on the inner
169
+ targets and passes those names here; that keeps column
170
+ resolution `derived.c1` deterministic regardless of what the
171
+ inner expression evaluated to.
172
+
173
+ Derived columns are always treated as nullable: we don't
174
+ propagate NOT NULL constraints through SELECT-list
175
+ expressions, consistent with the generator-wide rule that
176
+ treats everything as potentially-NULL.
177
+ """
178
+ if any(a == alias for a, _ in self._aliases):
179
+ raise ValueError(f"alias {alias!r} already in scope")
180
+ self._aliases.append((alias, None))
181
+ for col_name, col_type in columns:
182
+ self._bindings.append(Binding(
183
+ table_alias=alias,
184
+ column=col_name,
185
+ type=col_type,
186
+ nullable=True,
187
+ ))
188
+
189
+ # -- queries ------------------------------------------------------------
190
+
191
+ def local_bindings(
192
+ self,
193
+ of_type: Optional[PgType] = None,
194
+ ) -> list[Binding]:
195
+ """Bindings introduced at THIS scope level only — no parent
196
+ chain walk.
197
+
198
+ Used by gen_expr when generating aggregate args inside a
199
+ correlated subquery: outer-column refs inside such aggregates
200
+ trigger PG's implicit-grouping inference on the outer query
201
+ (PARSE-tier error 42803). Restricting to local bindings
202
+ prevents the leak. The visible_columns method (which DOES
203
+ walk the chain) remains the right tool everywhere else.
204
+ """
205
+ if of_type is None:
206
+ return list(self._bindings)
207
+ return [
208
+ b for b in self._bindings
209
+ if implicitly_castable(b.type, of_type)
210
+ ]
211
+
212
+ def visible_columns(
213
+ self,
214
+ of_type: Optional[PgType] = None,
215
+ ) -> list[Binding]:
216
+ """All bindings visible at this scope level.
217
+
218
+ Walks up the parent chain when this scope is correlated;
219
+ stops at the first uncorrelated scope. With `of_type` set,
220
+ filters to bindings whose declared type implicitly casts to
221
+ the requested type — same coercion rule the catalog uses.
222
+
223
+ The returned list is freshly built on each call; the caller
224
+ owns it and may freely sort or sample without affecting Scope.
225
+
226
+ The walk order is innermost-first, then outward. Sample
227
+ consumers (gen/expr.py column-ref candidate selection)
228
+ weight earlier entries — i.e., closer scopes — implicitly
229
+ through this ordering, which matches the intuition that
230
+ "the current query's columns" are more relevant than
231
+ "the enclosing query's columns" for a casual column pick.
232
+ """
233
+ out: list[Binding] = []
234
+ s: Optional[Scope] = self
235
+ while s is not None:
236
+ if of_type is None:
237
+ out.extend(s._bindings)
238
+ else:
239
+ out.extend(
240
+ b for b in s._bindings
241
+ if implicitly_castable(b.type, of_type)
242
+ )
243
+ if not s._correlated:
244
+ break
245
+ s = s._parent
246
+ return out
247
+
248
+ def aliased_tables(self) -> list[tuple[str, Table]]:
249
+ """Return (alias, table) pairs from this scope only, BASE
250
+ TABLES ONLY — derived-table aliases are filtered out.
251
+
252
+ Used for FK-biased JOIN condition generation, which only
253
+ applies to base tables (derived tables don't have FKs).
254
+ Scope-local because the use case is within a single SELECT's
255
+ FROM clause; parent-scope tables aren't JOIN candidates.
256
+ """
257
+ return [(a, t) for a, t in self._aliases if t is not None]
258
+
259
+ def lookup_alias(self, alias: str) -> Optional[Table]:
260
+ """Resolve `alias` to its underlying Table at this scope
261
+ level. Returns None if the alias is absent OR if it's a
262
+ derived-table alias (no underlying Table).
263
+
264
+ The two None cases are deliberately collapsed because the
265
+ only current caller (FK-biased JOIN-condition generation)
266
+ treats both the same way: "can't generate an FK predicate
267
+ against this." Use `has_alias()` for the unambiguous
268
+ existence check.
269
+ """
270
+ for a, t in self._aliases:
271
+ if a == alias:
272
+ return t # may legitimately be None for derived
273
+ return None
274
+
275
+ def has_alias(self, alias: str) -> bool:
276
+ """True iff `alias` is registered at this scope level,
277
+ regardless of base-table vs derived. Distinct from
278
+ `lookup_alias`, which returns None for both "absent" and
279
+ "present but derived"."""
280
+ return any(a == alias for a, _ in self._aliases)
281
+
282
+ # -- CTE management -----------------------------------------------------
283
+
284
+ def add_cte(
285
+ self,
286
+ name: str,
287
+ columns: list[tuple[str, PgType]],
288
+ ) -> None:
289
+ """Register a CTE definition under `name` with its column
290
+ info. Same shape as `add_derived`'s columns parameter —
291
+ list of (col_name, col_type) pairs.
292
+
293
+ CTE name uniqueness is per-scope (one WITH clause). PG
294
+ enforces this; reject early to surface generator bugs.
295
+ Cross-scope shadowing (an inner WITH defining the same name
296
+ as an outer) is allowed by PG but outside milestone-5 scope.
297
+ """
298
+ if name in self._cte_defs:
299
+ raise ValueError(f"CTE name {name!r} already defined in scope")
300
+ self._cte_defs[name] = list(columns)
301
+
302
+ def lookup_cte(
303
+ self,
304
+ name: str,
305
+ ) -> Optional[list[tuple[str, PgType]]]:
306
+ """Resolve a CTE name to its column info, walking the parent
307
+ chain UNCONDITIONALLY.
308
+
309
+ Unconditional walk because CTE visibility is static-scope:
310
+ a CTE defined in an outer query is visible from every nested
311
+ SELECT, regardless of correlation/LATERAL semantics. That's
312
+ different from `visible_columns`, which gates parent-chain
313
+ walking on the `_correlated` flag.
314
+
315
+ Returns a fresh list per call so callers can freely mutate it.
316
+ Returns None when the name isn't found anywhere in the chain.
317
+ """
318
+ s: Optional[Scope] = self
319
+ while s is not None:
320
+ if name in s._cte_defs:
321
+ return list(s._cte_defs[name])
322
+ s = s._parent
323
+ return None
324
+
325
+ def has_visible_ctes(self) -> bool:
326
+ """True iff at least one CTE is defined in this scope or any
327
+ ancestor. Used by the FROM-clause generator to gate the
328
+ "use a CTE reference?" decision — meaningless when no CTEs
329
+ are in scope."""
330
+ s: Optional[Scope] = self
331
+ while s is not None:
332
+ if s._cte_defs:
333
+ return True
334
+ s = s._parent
335
+ return False
336
+
337
+ def visible_cte_names(self) -> list[str]:
338
+ """All CTE names visible at this scope level, walking the
339
+ parent chain. Insertion-order within each scope, child
340
+ scopes' CTEs first — de-duped so a name shadowed by a
341
+ closer scope appears exactly once, with the child binding
342
+ winning (because the child's copy is emitted before the
343
+ walk reaches the parent's entry for that name).
344
+
345
+ Today's generator only emits top-level WITHs, so the
346
+ shadow case never fires in production; the dedupe is a
347
+ latent-correctness guard for the eventual nested-WITH
348
+ path. Without it, the list would contain the same name
349
+ twice and a caller picking a CTE to reference by name
350
+ could resolve the wrong binding — `lookup_cte` walks
351
+ closest-first and would return the child's binding,
352
+ producing a name/binding mismatch.
353
+
354
+ Used when the generator needs to pick a CTE to reference
355
+ from the FROM clause. Order is deterministic — both dict
356
+ insertion order (Python 3.7+) and the parent-chain walk
357
+ order are stable. The membership-check set is consulted
358
+ only for `in`-tests, never iterated, so this does not
359
+ violate the project's no-set-iteration-in-RNG-paths rule.
360
+ """
361
+ out: list[str] = []
362
+ seen: set[str] = set()
363
+ s: Optional[Scope] = self
364
+ while s is not None:
365
+ for name in s._cte_defs:
366
+ if name not in seen:
367
+ seen.add(name)
368
+ out.append(name)
369
+ s = s._parent
370
+ return out
371
+
372
+ # -- nesting ------------------------------------------------------------
373
+
374
+ def push_subquery(self, *, correlated: bool) -> "Scope":
375
+ """Construct a child scope for a nested query.
376
+
377
+ `correlated=True` is the default for subqueries in expression
378
+ position (e.g. `WHERE x = (SELECT ...)`) and for LATERAL FROM
379
+ subqueries. `correlated=False` is for plain FROM subqueries,
380
+ which by SQL standard cannot reference their siblings.
381
+
382
+ Caller is responsible for not leaking the child scope past
383
+ the subquery's generation call — see module docstring.
384
+ Practically, that means the child is held only by a
385
+ descended-then-discarded GenContext, never assigned to a
386
+ long-lived attribute.
387
+ """
388
+ return Scope(parent=self, correlated=correlated)
389
+
390
+
391
+ __all__ = ["Binding", "Scope"]
waxsql/types.py ADDED
@@ -0,0 +1,187 @@
1
+ """PostgreSQL type system model.
2
+
3
+ Mirrors the abstractions PostgreSQL itself uses (pg_type.typcategory) so
4
+ that as we add more types and casts later, the structure already lines up
5
+ with how the planner reasons about coercion.
6
+
7
+ This is a deliberately small slice of PostgreSQL's actual type system —
8
+ ~12 scalar types plus arrays. Expand `_IMPLICIT_CASTS` and `SCALAR_TYPES`
9
+ as the generator needs more variety.
10
+
11
+ This module is the load-bearing foundation under the type-driven
12
+ expression generator: every "what produces type T?" lookup in the
13
+ catalog, every column visibility filter in scope.py, and every
14
+ function/operator argument check runs through `implicitly_castable`.
15
+ Mistakes here propagate as "valid-looking SQL that fails parse-analysis"
16
+ across the whole generator. Cross-reference with pg_cast when changing
17
+ anything below.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ from dataclasses import dataclass
22
+ from enum import Enum
23
+ from typing import Optional
24
+
25
+
26
+ class TypeCategory(str, Enum):
27
+ """Type categories from pg_type.typcategory.
28
+
29
+ Used by the planner to decide implicit coercion in contexts like
30
+ UNION resolution and operator/function dispatch. We track it on every
31
+ type so the catalog can answer "is this thing usable here" without
32
+ reinventing PostgreSQL's logic.
33
+ """
34
+ ARRAY = "A"
35
+ BOOLEAN = "B"
36
+ COMPOSITE = "C"
37
+ DATETIME = "D"
38
+ ENUM = "E"
39
+ GEOMETRIC = "G"
40
+ NETWORK = "I"
41
+ NUMERIC = "N"
42
+ PSEUDO = "P"
43
+ RANGE = "R"
44
+ STRING = "S"
45
+ TIMESPAN = "T"
46
+ USER = "U"
47
+ BITSTRING = "V"
48
+ UNKNOWN = "X"
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class PgType:
53
+ """A PostgreSQL type.
54
+
55
+ `name` matches pg_type.typname (so `int8`, not `bigint`); we rely on
56
+ PostgreSQL accepting both spellings in DDL. `element` is set only for
57
+ array types, in which case `name` is conventionally the underscore-
58
+ prefixed form (`_int4` for `int4[]`), again matching pg_type.
59
+
60
+ `typmod` is the type modifier tuple, e.g. (10, 2) for `numeric(10,2)`
61
+ or (50,) for `varchar(50)`. Empty tuple means no modifier.
62
+
63
+ Frozen so PgType instances are hashable and usable as dict keys, which
64
+ matters for type weight tables and catalog indexes.
65
+ """
66
+ name: str
67
+ category: TypeCategory
68
+ element: Optional["PgType"] = None
69
+ typmod: tuple[int, ...] = ()
70
+
71
+ # The `is_*` predicates are convenience wrappers. They exist so that
72
+ # callers don't have to import TypeCategory just to ask the obvious
73
+ # question, and so future re-categorization (e.g. splitting NUMERIC
74
+ # into INTEGRAL/REAL) only has to touch this file.
75
+ def is_array(self) -> bool:
76
+ return self.element is not None
77
+
78
+ def is_numeric(self) -> bool:
79
+ return self.category == TypeCategory.NUMERIC
80
+
81
+ def is_string(self) -> bool:
82
+ return self.category == TypeCategory.STRING
83
+
84
+ def sql(self) -> str:
85
+ """Render as a SQL type expression suitable for DDL or CAST."""
86
+ if self.element is not None:
87
+ return f"{self.element.sql()}[]"
88
+ if self.typmod:
89
+ return f"{self.name}({','.join(str(t) for t in self.typmod)})"
90
+ return self.name
91
+
92
+
93
+ # Day-one scalar set. Picked to give the generator interesting variety
94
+ # (numeric, string, temporal, structured) without drowning the catalog
95
+ # in every cast rule PostgreSQL ships with.
96
+ INT4 = PgType("int4", TypeCategory.NUMERIC)
97
+ INT8 = PgType("int8", TypeCategory.NUMERIC)
98
+ NUMERIC = PgType("numeric", TypeCategory.NUMERIC)
99
+ FLOAT8 = PgType("float8", TypeCategory.NUMERIC)
100
+ TEXT = PgType("text", TypeCategory.STRING)
101
+ VARCHAR = PgType("varchar", TypeCategory.STRING)
102
+ BOOL = PgType("bool", TypeCategory.BOOLEAN)
103
+ DATE = PgType("date", TypeCategory.DATETIME)
104
+ TIMESTAMPTZ = PgType("timestamptz", TypeCategory.DATETIME)
105
+ INTERVAL = PgType("interval", TypeCategory.TIMESPAN)
106
+ UUID = PgType("uuid", TypeCategory.USER)
107
+ JSONB = PgType("jsonb", TypeCategory.USER)
108
+
109
+
110
+ def array_of(t: PgType) -> PgType:
111
+ """Construct an array type over `t`. Mirrors pg_type's `_typname` convention."""
112
+ return PgType(name=f"_{t.name}", category=TypeCategory.ARRAY, element=t)
113
+
114
+
115
+ SCALAR_TYPES: tuple[PgType, ...] = (
116
+ INT4, INT8, NUMERIC, FLOAT8,
117
+ TEXT, VARCHAR, BOOL,
118
+ DATE, TIMESTAMPTZ, INTERVAL,
119
+ UUID, JSONB,
120
+ )
121
+
122
+
123
+ # Implicit cast graph. Each key maps to the set of target type names that
124
+ # the source coerces to *implicitly* (no CAST needed). This is a small
125
+ # subset of pg_cast — enough to keep the generator honest about what it
126
+ # can pass where, without trying to be a complete oracle for PG semantics.
127
+ #
128
+ # Convention: every type implicitly casts to itself, so the target set
129
+ # always contains the source's own name.
130
+ #
131
+ # Direction matters: this is a source→target relation, not symmetric.
132
+ # `int4 → int8` is listed; `int8 → int4` is not. The numeric chain
133
+ # (int4 → int8 → numeric → float8) reflects PG's standard promotion
134
+ # ladder. A type missing from this dict still casts to itself via the
135
+ # `src == tgt` short-circuit in implicitly_castable, so adding a new
136
+ # scalar without an entry here degrades to "no implicit casts" rather
137
+ # than to broken behavior.
138
+ #
139
+ # Transitivity is precomputed, not derived. `int4` lists `float8`
140
+ # directly even though PG reaches float8 only via the int8 → numeric
141
+ # → float8 chain. The lookup must be O(1) because it runs once per
142
+ # candidate type per expression-generator decision; we'd rather
143
+ # maintain the closure by hand than walk the graph at every check.
144
+ # Anyone editing this dict must keep the closure consistent.
145
+ _IMPLICIT_CASTS: dict[str, frozenset[str]] = {
146
+ "int4": frozenset({"int4", "int8", "numeric", "float8"}),
147
+ "int8": frozenset({"int8", "numeric", "float8"}),
148
+ "numeric": frozenset({"numeric", "float8"}),
149
+ "float8": frozenset({"float8"}),
150
+ "text": frozenset({"text"}),
151
+ "varchar": frozenset({"varchar", "text"}),
152
+ "bool": frozenset({"bool"}),
153
+ "date": frozenset({"date", "timestamptz"}),
154
+ "timestamptz": frozenset({"timestamptz"}),
155
+ "interval": frozenset({"interval"}),
156
+ "uuid": frozenset({"uuid"}),
157
+ "jsonb": frozenset({"jsonb"}),
158
+ }
159
+
160
+
161
+ def implicitly_castable(src: PgType, tgt: PgType) -> bool:
162
+ """True iff a value of type `src` can be used where `tgt` is expected
163
+ without an explicit CAST.
164
+
165
+ Arrays are handled with a deliberately strict rule: arrays cast iff
166
+ their element types match exactly. PostgreSQL's actual array casting
167
+ rules are more permissive in some cases, but the strict rule keeps
168
+ the generator from emitting things that *might* parse but rarely
169
+ type-check.
170
+ """
171
+ # Identity short-circuit before the dict lookup. Two reasons:
172
+ # (1) it's the common case and avoids a hash/lookup per check;
173
+ # (2) it ensures a type with no entry in _IMPLICIT_CASTS still
174
+ # casts to itself — see the dict comment about "degrades to
175
+ # no implicit casts" when an entry is missing.
176
+ if src == tgt:
177
+ return True
178
+ # Mixed scalar/array combinations are always rejected. PG allows
179
+ # some such coercions via container cast machinery, but generating
180
+ # them requires special-cased SQL output (e.g. ARRAY[expr]); the
181
+ # generator doesn't emit those today, so refusing here keeps the
182
+ # generator's notion of cast-availability conservative.
183
+ if src.is_array() or tgt.is_array():
184
+ if src.is_array() and tgt.is_array():
185
+ return src.element == tgt.element
186
+ return False
187
+ return tgt.name in _IMPLICIT_CASTS.get(src.name, frozenset())
@@ -0,0 +1,52 @@
1
+ """Validation modes for generated SQL.
2
+
3
+ Role in the system: the public surface for "how thoroughly do we
4
+ check this query?" Every CLI entry point and test takes a
5
+ ValidationMode and dispatches to the right submodule. Keeping the
6
+ enum here (separate from the implementations in `syntax.py`,
7
+ `parse.py`, `plan.py`) means callers can refer to a mode without
8
+ importing psycopg or pglast — those imports are deferred to the
9
+ submodule that actually needs them.
10
+
11
+ Three layers, each strictly stronger than the previous in BOTH
12
+ cost and catch-rate — pick the cheapest one that catches the
13
+ failure class you care about. The ordering (SYNTAX < PARSE < PLAN)
14
+ is load-bearing: anything PARSE catches, PLAN also catches, and
15
+ anything SYNTAX catches, the other two also catch. That's why a
16
+ test that fails at PARSE is automatically a failure at PLAN — the
17
+ tiers compose.
18
+
19
+ SYNTAX — parse via libpg_query (pglast). No DB needed. Catches every
20
+ grammar error PostgreSQL itself catches but no name/type
21
+ resolution. Microseconds per check.
22
+
23
+ PARSE — PREPARE against a live DB. Runs full parse analysis: name
24
+ resolution, type checking, aggregate/GROUP BY rules, function
25
+ lookup. Milliseconds per check. Implemented in `parse.py`.
26
+
27
+ PLAN — EXPLAIN against a live DB. Runs the full planner pipeline:
28
+ parse-analysis + rewriting + plan-tree construction. Catches
29
+ operator-class lookup failures (ORDER BY / DISTINCT / GROUP
30
+ BY on types without comparison operators) and the subset of
31
+ runtime errors PG can constant-fold at planning time
32
+ (division by zero on literal divisors, etc.). Implemented
33
+ in `plan.py`.
34
+ """
35
+ from enum import Enum, auto
36
+
37
+
38
+ # Enum (not a string constant) so callers can't pass typos that fail
39
+ # silently — every dispatch path is forced through the typed match.
40
+ # `auto()` for values because nothing outside this module should
41
+ # depend on the integer identity; only the symbolic name is API.
42
+ class ValidationMode(Enum):
43
+ # NONE: skip validation entirely. Reserved for future "generate-only"
44
+ # callers (benchmarks, reproducer dumps); the test suite always runs
45
+ # at SYNTAX or higher to keep generator bugs visible.
46
+ NONE = auto()
47
+ SYNTAX = auto()
48
+ PARSE = auto()
49
+ PLAN = auto()
50
+
51
+
52
+ __all__ = ["ValidationMode"]