waxsql 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waxsql/__init__.py +158 -0
- waxsql/ast.py +757 -0
- waxsql/catalog.py +363 -0
- waxsql/cli.py +888 -0
- waxsql/config.py +477 -0
- waxsql/context.py +255 -0
- waxsql/data.py +99 -0
- waxsql/gen/__init__.py +51 -0
- waxsql/gen/cte.py +367 -0
- waxsql/gen/data/__init__.py +14 -0
- waxsql/gen/data/columns.py +48 -0
- waxsql/gen/data/emit.py +247 -0
- waxsql/gen/data/rows.py +236 -0
- waxsql/gen/data/strategies.py +299 -0
- waxsql/gen/expr.py +723 -0
- waxsql/gen/select.py +831 -0
- waxsql/gen/setop.py +259 -0
- waxsql/gen/subquery.py +397 -0
- waxsql/gen/window.py +398 -0
- waxsql/pretty.py +81 -0
- waxsql/printer.py +688 -0
- waxsql/py.typed +0 -0
- waxsql/schema.py +557 -0
- waxsql/scope.py +391 -0
- waxsql/types.py +187 -0
- waxsql/validate/__init__.py +52 -0
- waxsql/validate/parse.py +194 -0
- waxsql/validate/plan.py +149 -0
- waxsql/validate/syntax.py +87 -0
- waxsql-1.0.0.dist-info/METADATA +746 -0
- waxsql-1.0.0.dist-info/RECORD +35 -0
- waxsql-1.0.0.dist-info/WHEEL +5 -0
- waxsql-1.0.0.dist-info/entry_points.txt +2 -0
- waxsql-1.0.0.dist-info/licenses/LICENSE +21 -0
- waxsql-1.0.0.dist-info/top_level.txt +1 -0
waxsql/ast.py
ADDED
|
@@ -0,0 +1,757 @@
|
|
|
1
|
+
"""AST for generated SELECT queries.
|
|
2
|
+
|
|
3
|
+
Pure data — no rendering, no generation logic. The printer in `printer.py`
|
|
4
|
+
turns these into SQL; the generators in `gen/` turn random choices into
|
|
5
|
+
these. Splitting the pipeline this way keeps each stage testable in
|
|
6
|
+
isolation: the printer can be exercised against handcrafted ASTs, and
|
|
7
|
+
the generators can be exercised by inspecting their output AST without
|
|
8
|
+
parsing SQL back out.
|
|
9
|
+
|
|
10
|
+
Design notes:
|
|
11
|
+
|
|
12
|
+
* Every `Expr` subclass carries its own `pg_type`. The generator
|
|
13
|
+
knows the target type when it emits each node, so storing it on
|
|
14
|
+
the node lets the printer (and future planners) reason about types
|
|
15
|
+
without going back to the catalog.
|
|
16
|
+
|
|
17
|
+
* All AST nodes are frozen dataclasses. Hashable, structurally
|
|
18
|
+
comparable (handy for tests), and immune to "inner generator
|
|
19
|
+
mutated my outer node" bugs.
|
|
20
|
+
|
|
21
|
+
* `Expr` and `FromItem` are nominal marker bases (plain classes
|
|
22
|
+
declaring the expected attributes). Concrete subclasses are the
|
|
23
|
+
@dataclass(frozen=True). This avoids the dataclass-inheritance
|
|
24
|
+
ordering rule (parents-before-children, defaults-after-non-
|
|
25
|
+
defaults) that bites when the base wants required fields and a
|
|
26
|
+
subclass wants to add more required fields.
|
|
27
|
+
|
|
28
|
+
* The AST models the full SELECT-statement surface (GROUP BY,
|
|
29
|
+
HAVING, OFFSET, Cast, UnaryOp, etc.) uniformly, whether or not
|
|
30
|
+
every generator path emits each shape. A complete printer is
|
|
31
|
+
easier to test than one with conditional branches.
|
|
32
|
+
"""
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
from dataclasses import dataclass
|
|
36
|
+
from typing import Optional, Protocol, runtime_checkable
|
|
37
|
+
|
|
38
|
+
from .types import PgType
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Allowed value carriers for a SQL literal in our AST. The printer's
|
|
42
|
+
# `_render_literal` switches on `pg_type` to render the value; the
|
|
43
|
+
# value's Python type must be one of these. Date/timestamp/interval/
|
|
44
|
+
# UUID/JSONB literals carry their PG-textual form as `str` (see the
|
|
45
|
+
# `_DATE_LIT` / `_TIMESTAMPTZ_LIT` / etc. constants in `gen/expr.py`),
|
|
46
|
+
# so the union doesn't need datetime/uuid/Decimal members. Defined
|
|
47
|
+
# at module scope so callers (and printer.py's `_render_literal`)
|
|
48
|
+
# can refer to a single named type instead of inlining the union.
|
|
49
|
+
LiteralValue = int | float | str | bool | None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Expression hierarchy
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
@runtime_checkable
|
|
57
|
+
class Expr(Protocol):
|
|
58
|
+
"""Structural protocol for expression nodes.
|
|
59
|
+
|
|
60
|
+
Concrete expression types (`ColumnRef`, `Literal`, `FuncCall`,
|
|
61
|
+
`BinaryOp`, `UnaryOp`, `Cast`, `Subquery`, `Exists`, `InSubquery`)
|
|
62
|
+
are `@dataclass(frozen=True)` classes that each declare a
|
|
63
|
+
`pg_type` field. `Expr` is a `Protocol` (PEP 544) rather than a
|
|
64
|
+
plain base class so that mypy treats it as a *structural* type:
|
|
65
|
+
any class with a `PgType`-typed `pg_type` attribute satisfies
|
|
66
|
+
`Expr` where one is expected, regardless of nominal inheritance.
|
|
67
|
+
|
|
68
|
+
Scope of the static check (read this if you're tempted to declare
|
|
69
|
+
a new Expr subclass): Protocol attributes are NOT abstract. mypy
|
|
70
|
+
rejects a *use* that fails the structural check — e.g., passing
|
|
71
|
+
an instance without `pg_type` where an `Expr` is expected — but
|
|
72
|
+
it does NOT force `class X(Expr):` itself to define `pg_type`.
|
|
73
|
+
Forgetting the field in a subclass declaration will silently
|
|
74
|
+
produce an under-typed class; only the call site catches it.
|
|
75
|
+
If declaration-time enforcement becomes important, switch to
|
|
76
|
+
`abc.ABC` with `@property @abstractmethod def pg_type`. dataclass
|
|
77
|
+
fields satisfy abstract properties, so subclass shapes would not
|
|
78
|
+
need to change. The Protocol form is preferred today because the
|
|
79
|
+
structural check is strictly more permissive (free dataclasses
|
|
80
|
+
not in this file can satisfy Expr without importing it) and the
|
|
81
|
+
runtime cost is lower than ABC's metaclass machinery.
|
|
82
|
+
|
|
83
|
+
`@runtime_checkable` keeps `isinstance(x, Expr)` working: the
|
|
84
|
+
check verifies attribute presence rather than nominal
|
|
85
|
+
inheritance — slightly more permissive than a nominal check
|
|
86
|
+
(anything with a `pg_type` attribute counts as an Expr instance),
|
|
87
|
+
but for the test suite's purposes ("did the generator return
|
|
88
|
+
something with a typed AST shape?") structural is the right
|
|
89
|
+
semantics.
|
|
90
|
+
"""
|
|
91
|
+
pg_type: PgType
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass(frozen=True)
|
|
95
|
+
class ColumnRef(Expr):
|
|
96
|
+
"""A qualified column reference: `alias.column`.
|
|
97
|
+
|
|
98
|
+
We always qualify with the FROM-item alias rather than the raw table
|
|
99
|
+
name. This keeps generated SQL unambiguous when the same table
|
|
100
|
+
appears more than once (self-joins) and removes the need to track
|
|
101
|
+
"which table did this column come from" outside the scope.
|
|
102
|
+
"""
|
|
103
|
+
pg_type: PgType
|
|
104
|
+
table_alias: str
|
|
105
|
+
column: str
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass(frozen=True)
|
|
109
|
+
class Literal(Expr):
|
|
110
|
+
"""A typed literal.
|
|
111
|
+
|
|
112
|
+
`value=None` represents SQL NULL; the printer emits `NULL::<type>`
|
|
113
|
+
to keep PostgreSQL from defaulting NULL's inferred type to `text`
|
|
114
|
+
in surprising ways. Bare untyped NULL is a known footgun in
|
|
115
|
+
PostgreSQL function-resolution code.
|
|
116
|
+
"""
|
|
117
|
+
pg_type: PgType
|
|
118
|
+
value: LiteralValue
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass(frozen=True)
|
|
122
|
+
class FuncCall(Expr):
|
|
123
|
+
"""A function call: `name(arg1, arg2, ...) [OVER (...)]`.
|
|
124
|
+
|
|
125
|
+
The printer specially renders the SQL-keyword "nullary functions"
|
|
126
|
+
(current_date, current_timestamp, ...) without parentheses, since
|
|
127
|
+
PostgreSQL's grammar treats those as keyword expressions, not
|
|
128
|
+
function invocations — `current_date()` is a parse error.
|
|
129
|
+
|
|
130
|
+
`over` is None for ordinary scalar/aggregate calls; a WindowSpec
|
|
131
|
+
when the call is window-style (`func(args) OVER (...)`). Adding
|
|
132
|
+
it as an optional field on FuncCall (rather than a separate
|
|
133
|
+
WindowFuncCall node) keeps existing FuncCall callers unchanged
|
|
134
|
+
— pre-milestone-6 construction sites pass nothing and get None.
|
|
135
|
+
|
|
136
|
+
`star=True` represents the `name(*)` special form. In standard PG
|
|
137
|
+
this is only valid for `count(*)` — every other aggregate rejects
|
|
138
|
+
`*` as a placeholder. The generator enforces that restriction;
|
|
139
|
+
the AST does not, because adding the constraint here would force
|
|
140
|
+
a name-list dependency between ast.py and the catalog. The post-
|
|
141
|
+
init check enforces only the invariants that are universally true
|
|
142
|
+
regardless of which function is involved (no args when starred).
|
|
143
|
+
|
|
144
|
+
`filter_` (Optional[Expr] returning BOOL) renders as a trailing
|
|
145
|
+
`FILTER (WHERE <expr>)` clause. Only valid for aggregate
|
|
146
|
+
functions in PG — `upper('x') FILTER (WHERE ...)` is a parse
|
|
147
|
+
error. As with `star`, the AST doesn't validate the aggregate-
|
|
148
|
+
only constraint (would require coupling to the catalog); the
|
|
149
|
+
generator does. The trailing underscore avoids shadowing the
|
|
150
|
+
`filter` builtin.
|
|
151
|
+
|
|
152
|
+
`within_group` (tuple of OrderByItem, default empty) renders as
|
|
153
|
+
`WITHIN GROUP (ORDER BY ...)`. Required syntactically for
|
|
154
|
+
ordered-set aggregates (percentile_cont, percentile_disc, mode,
|
|
155
|
+
hypothetical-set rank/dense_rank/etc.); rejected by PG on any
|
|
156
|
+
other function. As with star/filter_, the AST trusts the
|
|
157
|
+
generator to only set this on appropriate functions.
|
|
158
|
+
|
|
159
|
+
Clause order in PG grammar: `name(args) [WITHIN GROUP (...)]
|
|
160
|
+
[FILTER (WHERE ...)] [OVER (...)]`. The printer renders in that
|
|
161
|
+
order; the AST fields are independent.
|
|
162
|
+
"""
|
|
163
|
+
pg_type: PgType
|
|
164
|
+
name: str
|
|
165
|
+
args: tuple[Expr, ...]
|
|
166
|
+
# `over` carries either an inline WindowSpec (the OVER (...) form)
|
|
167
|
+
# or a WindowRef (OVER name, where name resolves against the
|
|
168
|
+
# enclosing Select.windows). The named-window hoist pass in
|
|
169
|
+
# gen/window.py rewrites inline specs into refs after generation.
|
|
170
|
+
over: "WindowSpec | WindowRef | None" = None
|
|
171
|
+
star: bool = False
|
|
172
|
+
filter_: Optional[Expr] = None
|
|
173
|
+
within_group: tuple["OrderByItem", ...] = ()
|
|
174
|
+
|
|
175
|
+
def __post_init__(self) -> None:
|
|
176
|
+
# `name(*)` and `name(arg, ...)` are mutually exclusive
|
|
177
|
+
# syntactic forms — PG rejects `count(*, x)` etc. Catching
|
|
178
|
+
# this at construction time prevents the printer from emitting
|
|
179
|
+
# a malformed `count(*)` followed by mystery args.
|
|
180
|
+
if self.star and self.args:
|
|
181
|
+
raise ValueError(
|
|
182
|
+
f"FuncCall(star=True) cannot have args; "
|
|
183
|
+
f"got {len(self.args)} arg(s) for {self.name!r}"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@dataclass(frozen=True)
|
|
188
|
+
class BinaryOp(Expr):
|
|
189
|
+
"""A binary operator application: `left SYMBOL right`.
|
|
190
|
+
|
|
191
|
+
`symbol` is the literal operator text from the catalog ("+", "AND",
|
|
192
|
+
"LIKE", "->>", ...). The printer always pads with spaces so the
|
|
193
|
+
word-form operators don't collide with their operands.
|
|
194
|
+
|
|
195
|
+
No precedence field: the printer parenthesizes operands
|
|
196
|
+
conservatively rather than walking a precedence table. See
|
|
197
|
+
printer.py `_wrap_if_compound` — emitting extra parens is harmless,
|
|
198
|
+
missing one is a parse error, so the trade favors verbosity.
|
|
199
|
+
"""
|
|
200
|
+
pg_type: PgType
|
|
201
|
+
symbol: str
|
|
202
|
+
left: Expr
|
|
203
|
+
right: Expr
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dataclass(frozen=True)
|
|
207
|
+
class UnaryOp(Expr):
|
|
208
|
+
"""A prefix unary operator: `SYMBOL operand`.
|
|
209
|
+
|
|
210
|
+
Used for NOT and unary minus/plus. Postfix unaries are not
|
|
211
|
+
modeled — none of the operators in the catalog need them.
|
|
212
|
+
"""
|
|
213
|
+
pg_type: PgType
|
|
214
|
+
symbol: str
|
|
215
|
+
operand: Expr
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@dataclass(frozen=True)
|
|
219
|
+
class Cast(Expr):
|
|
220
|
+
"""An explicit type cast.
|
|
221
|
+
|
|
222
|
+
Two SQL renderings exist (`expr::type` and `CAST(expr AS type)`);
|
|
223
|
+
the printer picks the `::` form for compactness. The `pg_type`
|
|
224
|
+
field intentionally duplicates `target_type`; it's there so every
|
|
225
|
+
Expr satisfies the "has pg_type" invariant uniformly.
|
|
226
|
+
"""
|
|
227
|
+
pg_type: PgType
|
|
228
|
+
expr: Expr
|
|
229
|
+
target_type: PgType
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
# Subquery expressions
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
#
|
|
236
|
+
# Three forms cover the bulk of subquery usage in real SQL:
|
|
237
|
+
#
|
|
238
|
+
# * Subquery: `(SELECT col FROM ...)` in expression position.
|
|
239
|
+
# Returns a single value; pg_type is the type of that value.
|
|
240
|
+
#
|
|
241
|
+
# * Exists: `[NOT ]EXISTS (SELECT ...)`. Always BOOL. The inner
|
|
242
|
+
# SELECT's targets are ignored at runtime — canonical idiom is
|
|
243
|
+
# `SELECT 1`.
|
|
244
|
+
#
|
|
245
|
+
# * InSubquery: `<expr> [NOT ]IN (SELECT col FROM ...)`. Always BOOL.
|
|
246
|
+
# The inner SELECT must produce a single column whose type matches
|
|
247
|
+
# `expr`'s type.
|
|
248
|
+
#
|
|
249
|
+
# Forward reference to Select via the string annotation; this works
|
|
250
|
+
# because the module uses `from __future__ import annotations`, so all
|
|
251
|
+
# annotations are lazily-resolved strings rather than runtime types.
|
|
252
|
+
|
|
253
|
+
@dataclass(frozen=True)
|
|
254
|
+
class Subquery(Expr):
|
|
255
|
+
"""Scalar subquery: `(SELECT col FROM ...)`.
|
|
256
|
+
|
|
257
|
+
PG accepts a multi-row scalar subquery at parse time but errors at
|
|
258
|
+
runtime if it returns more than one row. The generator pairs every
|
|
259
|
+
Subquery with a `LIMIT 1` on the inner Select to be runtime-safe;
|
|
260
|
+
the AST itself doesn't enforce that — it's the generator's job.
|
|
261
|
+
"""
|
|
262
|
+
pg_type: PgType
|
|
263
|
+
select: "Select"
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@dataclass(frozen=True)
|
|
267
|
+
class Exists(Expr):
|
|
268
|
+
"""`[NOT ]EXISTS (SELECT ...)` test. Always BOOL.
|
|
269
|
+
|
|
270
|
+
The inner SELECT's targets are semantically irrelevant — PG only
|
|
271
|
+
tests whether the subquery yields any rows. Canonical idiom is
|
|
272
|
+
`SELECT 1 FROM ...`; the generator emits exactly that shape.
|
|
273
|
+
"""
|
|
274
|
+
pg_type: PgType # always BOOL; field present for Expr-uniformity
|
|
275
|
+
select: "Select"
|
|
276
|
+
negated: bool = False
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@dataclass(frozen=True)
|
|
280
|
+
class InSubquery(Expr):
|
|
281
|
+
"""`<expr> [NOT ]IN (SELECT col FROM ...)` test. Always BOOL.
|
|
282
|
+
|
|
283
|
+
The inner SELECT must produce exactly one column whose type matches
|
|
284
|
+
`expr.pg_type` (or implicitly casts to it). The generator enforces
|
|
285
|
+
this; the AST doesn't validate it.
|
|
286
|
+
"""
|
|
287
|
+
pg_type: PgType # always BOOL
|
|
288
|
+
expr: Expr
|
|
289
|
+
select: "Select"
|
|
290
|
+
negated: bool = False
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
# FROM clause
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
class FromItem:
|
|
298
|
+
"""Marker base for things that can appear in a FROM clause.
|
|
299
|
+
|
|
300
|
+
Concrete subclasses: `TableRef`, `JoinExpr`, `DerivedTable`,
|
|
301
|
+
`CteRef`. The hierarchy stays open for additional FROM kinds
|
|
302
|
+
(table-valued functions, VALUES lists, etc.) without affecting
|
|
303
|
+
existing subclasses.
|
|
304
|
+
"""
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
@dataclass(frozen=True)
|
|
309
|
+
class TableRef(FromItem):
|
|
310
|
+
"""A table reference with a mandatory alias.
|
|
311
|
+
|
|
312
|
+
Always-aliased even when the alias equals the table name; this
|
|
313
|
+
sidesteps the "is this column qualified by table name or alias"
|
|
314
|
+
distinction in PostgreSQL's name resolution and keeps generated
|
|
315
|
+
SQL self-consistent.
|
|
316
|
+
"""
|
|
317
|
+
table: str
|
|
318
|
+
alias: str
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@dataclass(frozen=True)
|
|
322
|
+
class DerivedTable(FromItem):
|
|
323
|
+
"""A FROM-clause subquery: `[LATERAL ](SELECT ...) AS alias`.
|
|
324
|
+
|
|
325
|
+
The inner SELECT acts as a virtual table; the outer query
|
|
326
|
+
references its columns through `alias.<column>`.
|
|
327
|
+
|
|
328
|
+
With `lateral=True`, the inner SELECT may reference aliases from
|
|
329
|
+
preceding sibling FROM items (PG's LATERAL semantics). Without
|
|
330
|
+
LATERAL, the inner is independent of all siblings — the same
|
|
331
|
+
distinction as correlated vs uncorrelated subqueries in the
|
|
332
|
+
expression position, applied to FROM.
|
|
333
|
+
|
|
334
|
+
`column_aliases` corresponds to the optional `AS sq(a, b, c)`
|
|
335
|
+
column-list syntax. Reserved here for later milestones; the
|
|
336
|
+
milestone-4 generator leaves it empty and relies on the inner
|
|
337
|
+
SELECT's target aliases instead.
|
|
338
|
+
"""
|
|
339
|
+
select: "Select"
|
|
340
|
+
alias: str
|
|
341
|
+
column_aliases: tuple[str, ...] = ()
|
|
342
|
+
lateral: bool = False
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# Allowed JoinExpr.kind values. The set is the source of truth — keep
|
|
346
|
+
# the printer's switch and the generator's choices in sync with it.
|
|
347
|
+
JOIN_KINDS: frozenset[str] = frozenset({
|
|
348
|
+
"INNER", "LEFT", "RIGHT", "FULL", "CROSS",
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@dataclass(frozen=True)
|
|
353
|
+
class JoinExpr(FromItem):
|
|
354
|
+
"""A binary JOIN tree node.
|
|
355
|
+
|
|
356
|
+
Multi-table joins build a left-deep tree (`((t1 JOIN t2) JOIN t3)`).
|
|
357
|
+
For CROSS joins, both `on` and `using` are empty; for non-CROSS,
|
|
358
|
+
exactly one of them is set.
|
|
359
|
+
|
|
360
|
+
Invariant the generator must uphold (printer raises on violation):
|
|
361
|
+
a non-CROSS join must have `on` set XOR `using` non-empty. Bare
|
|
362
|
+
`LEFT JOIN t` with no qualifier is a parse error in PG.
|
|
363
|
+
|
|
364
|
+
The printer is responsible for parenthesizing nested join trees
|
|
365
|
+
correctly. We accept any nesting; the printer's job is to make it
|
|
366
|
+
print parseably.
|
|
367
|
+
"""
|
|
368
|
+
left: FromItem
|
|
369
|
+
right: FromItem
|
|
370
|
+
kind: str # one of JOIN_KINDS
|
|
371
|
+
on: Optional[Expr] = None
|
|
372
|
+
using: tuple[str, ...] = ()
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
@dataclass(frozen=True)
|
|
376
|
+
class CteRef(FromItem):
|
|
377
|
+
"""A reference to a CTE in a FROM clause: `cte_name AS alias`.
|
|
378
|
+
|
|
379
|
+
Same shape as TableRef but the source is a CTE definition (in
|
|
380
|
+
the enclosing WITH) rather than a base table. Resolution of
|
|
381
|
+
`cte_name` against a defined CTE is the generator's responsibility;
|
|
382
|
+
the AST itself doesn't validate that the name exists.
|
|
383
|
+
|
|
384
|
+
The local `alias` introduces column bindings derived from the
|
|
385
|
+
CTE's inner SELECT targets — same way DerivedTable does, just
|
|
386
|
+
that the inner SELECT lives in a sibling CteDef rather than
|
|
387
|
+
here.
|
|
388
|
+
"""
|
|
389
|
+
cte_name: str
|
|
390
|
+
alias: str
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# ---------------------------------------------------------------------------
|
|
394
|
+
# WITH clause / CTE definitions
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
@dataclass(frozen=True)
|
|
398
|
+
class CteSearch:
|
|
399
|
+
"""`SEARCH BREADTH|DEPTH FIRST BY col, ... SET seqcol` clause on
|
|
400
|
+
a recursive CTE.
|
|
401
|
+
|
|
402
|
+
Adds one synthetic column (`set_column`) to the CTE's exposed
|
|
403
|
+
columns — the generator updates the scope's column list to
|
|
404
|
+
include it. PG uses the synthetic column to expose the search
|
|
405
|
+
order, allowing the outer query to ORDER BY it for reliable
|
|
406
|
+
BFS/DFS traversal output."""
|
|
407
|
+
breadth_first: bool # True = BREADTH FIRST, False = DEPTH FIRST
|
|
408
|
+
by_columns: tuple[str, ...]
|
|
409
|
+
set_column: str
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
@dataclass(frozen=True)
|
|
413
|
+
class CteCycle:
|
|
414
|
+
"""`CYCLE col, ... SET cyclecol USING pathcol` clause on a
|
|
415
|
+
recursive CTE.
|
|
416
|
+
|
|
417
|
+
Adds two synthetic columns: `cycle_mark_column` (BOOL — true on
|
|
418
|
+
rows where a cycle was detected) and `path_column` (an array of
|
|
419
|
+
row tuples tracing the recursion path so far). Both get added to
|
|
420
|
+
the CTE's exposed columns by the generator's scope-registration
|
|
421
|
+
step. Used to defend against infinite recursion in graph walks.
|
|
422
|
+
|
|
423
|
+
PG also accepts an extended form `SET cycle_mark TO val DEFAULT
|
|
424
|
+
val2` for non-default cycle-detection markers; not modeled
|
|
425
|
+
here (defaults TO TRUE / DEFAULT FALSE are what 99% of real
|
|
426
|
+
queries want)."""
|
|
427
|
+
columns: tuple[str, ...]
|
|
428
|
+
cycle_mark_column: str
|
|
429
|
+
path_column: str
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
@dataclass(frozen=True)
|
|
433
|
+
class CteDef:
|
|
434
|
+
"""One entry in a WITH clause:
|
|
435
|
+
`name [(col1, col2, ...)] AS [MATERIALIZED|NOT MATERIALIZED] (SELECT ...)
|
|
436
|
+
[SEARCH ...] [CYCLE ...]`.
|
|
437
|
+
|
|
438
|
+
`column_aliases` is empty by default; reserved for the explicit-
|
|
439
|
+
column-list syntax (same pattern as DerivedTable.column_aliases).
|
|
440
|
+
|
|
441
|
+
`materialized` is None for PG's default behavior (which since
|
|
442
|
+
PG 12 may inline single-use CTEs); True forces MATERIALIZED;
|
|
443
|
+
False forces NOT MATERIALIZED. The milestone-5 generator leaves
|
|
444
|
+
this at None — the printer emits no modifier in that case.
|
|
445
|
+
|
|
446
|
+
`recursive` is True for recursive CTEs (added in milestone 8).
|
|
447
|
+
The keyword `RECURSIVE` is per-WITH-list, not per-CteDef — the
|
|
448
|
+
printer scans the WITH list and emits `WITH RECURSIVE` if ANY
|
|
449
|
+
CteDef has recursive=True. The flag is per-CteDef here because
|
|
450
|
+
that's where the generator decides "this one will self-reference."
|
|
451
|
+
|
|
452
|
+
`select` widens to `Select | SetOp` in milestone 8: a recursive
|
|
453
|
+
CTE's body is `base UNION ALL recursive` (a SetOp). Non-recursive
|
|
454
|
+
CTEs continue to hold a plain Select.
|
|
455
|
+
|
|
456
|
+
`search` and `cycle` are PG-specific recursive-CTE clauses, valid
|
|
457
|
+
only when `recursive=True`. Each adds synthetic exposed columns
|
|
458
|
+
(see CteSearch / CteCycle docstrings). Both can coexist on the
|
|
459
|
+
same CTE — `... SEARCH ... CYCLE ...` is valid PG."""
|
|
460
|
+
name: str
|
|
461
|
+
select: "Select | SetOp"
|
|
462
|
+
column_aliases: tuple[str, ...] = ()
|
|
463
|
+
materialized: Optional[bool] = None
|
|
464
|
+
recursive: bool = False
|
|
465
|
+
search: Optional[CteSearch] = None
|
|
466
|
+
cycle: Optional[CteCycle] = None
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
# ---------------------------------------------------------------------------
|
|
470
|
+
# SELECT clause pieces
|
|
471
|
+
# ---------------------------------------------------------------------------
|
|
472
|
+
|
|
473
|
+
@dataclass(frozen=True)
|
|
474
|
+
class SelectTarget:
|
|
475
|
+
"""One item in the SELECT list: an expression with optional alias.
|
|
476
|
+
|
|
477
|
+
No `SELECT *` form is modeled — the generator always projects an
|
|
478
|
+
explicit target list. Star-projection collides badly with the
|
|
479
|
+
type-driven pipeline (the projected types depend on the FROM
|
|
480
|
+
items rather than being a property of the SELECT itself), and
|
|
481
|
+
nothing downstream needs it."""
|
|
482
|
+
expr: Expr
|
|
483
|
+
alias: Optional[str] = None
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@dataclass(frozen=True)
|
|
487
|
+
class OrderByItem:
|
|
488
|
+
"""One item in ORDER BY.
|
|
489
|
+
|
|
490
|
+
`direction` is "ASC" or "DESC". `nulls` is "FIRST", "LAST", or None
|
|
491
|
+
(let PostgreSQL apply its default: NULLS LAST for ASC, NULLS FIRST
|
|
492
|
+
for DESC).
|
|
493
|
+
|
|
494
|
+
Strings (not enums) for the same reason FrameClause.unit and
|
|
495
|
+
GroupingSet.kind use strings: tiny fixed alphabet, mirrors the
|
|
496
|
+
grammar tokens 1:1, the printer can splat them in directly. PG
|
|
497
|
+
validates at parse time, so a stray value surfaces immediately
|
|
498
|
+
via the round-trip test rather than silently mis-rendering.
|
|
499
|
+
"""
|
|
500
|
+
expr: Expr
|
|
501
|
+
direction: str = "ASC"
|
|
502
|
+
nulls: Optional[str] = None
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
@dataclass(frozen=True)
|
|
506
|
+
class FrameBound:
|
|
507
|
+
"""One bound of a window frame (the `start` or `end` of a
|
|
508
|
+
BETWEEN ... AND ... extent, or the sole bound of a single-bound
|
|
509
|
+
extent).
|
|
510
|
+
|
|
511
|
+
PG's grammar gives five choices:
|
|
512
|
+
UNBOUNDED PRECEDING | <offset> PRECEDING | CURRENT ROW
|
|
513
|
+
| <offset> FOLLOWING | UNBOUNDED FOLLOWING
|
|
514
|
+
|
|
515
|
+
We model them via a string `kind` field — same convention as
|
|
516
|
+
OrderByItem.direction. Strings keep printer dispatch trivial and
|
|
517
|
+
avoid an enum import boilerplate. Valid kinds:
|
|
518
|
+
"unbounded_preceding"
|
|
519
|
+
"preceding"
|
|
520
|
+
"current_row"
|
|
521
|
+
"following"
|
|
522
|
+
"unbounded_following"
|
|
523
|
+
|
|
524
|
+
`offset` is the `<offset>` expression for preceding/following
|
|
525
|
+
kinds (typically a non-negative integer literal); None for the
|
|
526
|
+
unbounded kinds and CURRENT ROW. Post-init validates the
|
|
527
|
+
pairing — preceding/following without offset (or unbounded/
|
|
528
|
+
current_row WITH offset) would be malformed SQL.
|
|
529
|
+
"""
|
|
530
|
+
kind: str
|
|
531
|
+
offset: Optional[Expr] = None
|
|
532
|
+
|
|
533
|
+
def __post_init__(self) -> None:
|
|
534
|
+
needs_offset = self.kind in ("preceding", "following")
|
|
535
|
+
has_offset = self.offset is not None
|
|
536
|
+
if needs_offset and not has_offset:
|
|
537
|
+
raise ValueError(
|
|
538
|
+
f"FrameBound(kind={self.kind!r}) requires an offset"
|
|
539
|
+
)
|
|
540
|
+
if not needs_offset and has_offset:
|
|
541
|
+
raise ValueError(
|
|
542
|
+
f"FrameBound(kind={self.kind!r}) must not have an offset"
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
@dataclass(frozen=True)
|
|
547
|
+
class FrameClause:
|
|
548
|
+
"""A window frame clause: the `ROWS BETWEEN ... AND ...` part
|
|
549
|
+
that follows PARTITION BY and ORDER BY inside an OVER clause.
|
|
550
|
+
|
|
551
|
+
PG grammar:
|
|
552
|
+
{ RANGE | ROWS | GROUPS } frame_extent [ frame_exclusion ]
|
|
553
|
+
|
|
554
|
+
`unit` is "ROWS", "RANGE", or "GROUPS" (literal strings, mirroring
|
|
555
|
+
the grammar tokens for printer convenience).
|
|
556
|
+
|
|
557
|
+
`start` is the lower bound. `end` is the upper bound; when None,
|
|
558
|
+
the printer emits the single-bound form `unit start`, which PG
|
|
559
|
+
interprets as `unit BETWEEN start AND CURRENT ROW`. Both forms
|
|
560
|
+
are valid; the explicit BETWEEN is more common and clearer.
|
|
561
|
+
|
|
562
|
+
`exclude` is the EXCLUDE clause's body — one of "CURRENT ROW",
|
|
563
|
+
"GROUP", "TIES", "NO OTHERS" — or None for the default (same
|
|
564
|
+
as omitting the clause entirely). The printer prepends "EXCLUDE"
|
|
565
|
+
so callers store just the body. Kept as a string rather than an
|
|
566
|
+
enum for symmetry with `unit` and `direction` elsewhere; the
|
|
567
|
+
set is small and validated by PG at parse time anyway.
|
|
568
|
+
"""
|
|
569
|
+
unit: str
|
|
570
|
+
start: FrameBound
|
|
571
|
+
end: Optional[FrameBound] = None
|
|
572
|
+
exclude: Optional[str] = None
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
@dataclass(frozen=True)
|
|
576
|
+
class WindowSpec:
|
|
577
|
+
"""The `OVER (PARTITION BY ... ORDER BY ... [frame])` clause
|
|
578
|
+
attached to a window-style function call.
|
|
579
|
+
|
|
580
|
+
Empty `partition_by` and empty `order_by` together produce the
|
|
581
|
+
`OVER ()` form (entire result set as one partition). Some window
|
|
582
|
+
functions (lag, lead, first_value, last_value) are typically used
|
|
583
|
+
with ORDER BY but PG accepts them without it syntactically.
|
|
584
|
+
|
|
585
|
+
`frame`, when set, is a structured FrameClause that the printer
|
|
586
|
+
renders as `ROWS BETWEEN ... AND ...` (or RANGE/GROUPS variants).
|
|
587
|
+
Was a raw string in earlier milestones; switched to structured
|
|
588
|
+
representation when frame generation landed so the printer can
|
|
589
|
+
guarantee well-formed output and the generator can compose
|
|
590
|
+
bounds deterministically without string concatenation.
|
|
591
|
+
"""
|
|
592
|
+
partition_by: tuple[Expr, ...] = ()
|
|
593
|
+
order_by: tuple[OrderByItem, ...] = ()
|
|
594
|
+
frame: Optional[FrameClause] = None
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
@dataclass(frozen=True)
|
|
598
|
+
class WindowRef:
|
|
599
|
+
"""Reference to a named window declared in the enclosing SELECT's
|
|
600
|
+
WINDOW clause. Used as `OVER w` (no parens around the name).
|
|
601
|
+
|
|
602
|
+
Modeled as a separate AST node from WindowSpec so the printer
|
|
603
|
+
dispatches structurally without a name-vs-spec field hack on
|
|
604
|
+
WindowSpec itself. FuncCall.over accepts either type:
|
|
605
|
+
`Optional[WindowSpec | WindowRef]`.
|
|
606
|
+
|
|
607
|
+
PG's grammar also allows `OVER (w PARTITION BY extra-col)` —
|
|
608
|
+
a named window with inline extension. That form is not modeled
|
|
609
|
+
yet; if needed it would be a third FuncCall.over option."""
|
|
610
|
+
name: str
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
@dataclass(frozen=True)
|
|
614
|
+
class NamedWindow:
|
|
615
|
+
"""One entry in a SELECT's WINDOW clause: a name bound to a spec.
|
|
616
|
+
|
|
617
|
+
Multiple FuncCall.over WindowRefs can reference the same name —
|
|
618
|
+
that's the whole point of the WINDOW clause, deduplicating window
|
|
619
|
+
specs across multiple aggregates. The Select.windows tuple
|
|
620
|
+
declares all names visible from that SELECT's body."""
|
|
621
|
+
name: str
|
|
622
|
+
spec: WindowSpec
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
@dataclass(frozen=True)
|
|
626
|
+
class GroupingSet:
|
|
627
|
+
"""A grouping-set construct in a GROUP BY clause: ROLLUP, CUBE,
|
|
628
|
+
or GROUPING SETS.
|
|
629
|
+
|
|
630
|
+
PG grammar:
|
|
631
|
+
ROLLUP ( expr_list_or_paren_list, ... )
|
|
632
|
+
CUBE ( expr_list_or_paren_list, ... )
|
|
633
|
+
GROUPING SETS ( ( expr_list ), ( expr_list ), ... )
|
|
634
|
+
|
|
635
|
+
`kind` is the keyword: "ROLLUP", "CUBE", or "GROUPING SETS"
|
|
636
|
+
(literal strings for printer convenience, mirroring `unit` on
|
|
637
|
+
FrameClause and `direction` on OrderByItem).
|
|
638
|
+
|
|
639
|
+
`elements` is a tuple-of-tuples. Each outer entry is one
|
|
640
|
+
"element" of the grouping construct:
|
|
641
|
+
|
|
642
|
+
* For ROLLUP/CUBE: each element is typically a single-expression
|
|
643
|
+
tuple (`ROLLUP (a, b, c)`); multi-expr elements get parens
|
|
644
|
+
(`ROLLUP ((a, b), c)` — first element is the compound (a,b)).
|
|
645
|
+
|
|
646
|
+
* For GROUPING SETS: each element is one grouping set,
|
|
647
|
+
rendered with explicit parens including the empty-tuple case
|
|
648
|
+
(which becomes `()` — the grand-total grouping).
|
|
649
|
+
|
|
650
|
+
The same expressions that appear in `elements` may be referenced
|
|
651
|
+
by SELECT-list items; PG's "must appear in GROUP BY" rule is
|
|
652
|
+
satisfied by structural equality (same Expr instance or
|
|
653
|
+
equivalent frozen-dataclass value).
|
|
654
|
+
"""
|
|
655
|
+
kind: str
|
|
656
|
+
elements: tuple[tuple[Expr, ...], ...]
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# ---------------------------------------------------------------------------
|
|
660
|
+
# Top-level statement
|
|
661
|
+
# ---------------------------------------------------------------------------
|
|
662
|
+
|
|
663
|
+
@dataclass(frozen=True)
|
|
664
|
+
class Select:
|
|
665
|
+
"""A SELECT statement.
|
|
666
|
+
|
|
667
|
+
Field order roughly matches the SQL clause order — `targets`
|
|
668
|
+
first because that's the SELECT body, then `from_` and any
|
|
669
|
+
optional clauses. `with_ctes` is positionally after the required
|
|
670
|
+
fields (the dataclass rule that defaulted fields follow non-
|
|
671
|
+
defaulted ones), but the printer emits it as a `WITH ...` prefix
|
|
672
|
+
AHEAD of SELECT in the output.
|
|
673
|
+
|
|
674
|
+
`from_` is a tuple of FromItems that the printer joins with commas
|
|
675
|
+
(a cross product). To express explicit JOINs, nest a JoinExpr
|
|
676
|
+
inside a single from_ slot.
|
|
677
|
+
|
|
678
|
+
`with_ctes` is empty by default; when non-empty, the printer
|
|
679
|
+
prefixes the SELECT with `WITH cte1 AS (...), cte2 AS (...) ...`.
|
|
680
|
+
Defaulting empty means callers that don't care about CTEs can
|
|
681
|
+
construct a Select without thinking about the WITH list.
|
|
682
|
+
"""
|
|
683
|
+
targets: tuple[SelectTarget, ...]
|
|
684
|
+
from_: tuple[FromItem, ...]
|
|
685
|
+
with_ctes: tuple[CteDef, ...] = ()
|
|
686
|
+
where: Optional[Expr] = None
|
|
687
|
+
# Each item is either a plain Expr (regular GROUP BY column) or a
|
|
688
|
+
# GroupingSet (ROLLUP/CUBE/GROUPING SETS extension). The printer
|
|
689
|
+
# dispatches on type. PG accepts mixing the two within one
|
|
690
|
+
# GROUP BY: `GROUP BY a, ROLLUP (b, c)` is valid.
|
|
691
|
+
group_by: tuple["Expr | GroupingSet", ...] = ()
|
|
692
|
+
having: Optional[Expr] = None
|
|
693
|
+
windows: tuple[NamedWindow, ...] = ()
|
|
694
|
+
order_by: tuple[OrderByItem, ...] = ()
|
|
695
|
+
limit: Optional[Expr] = None
|
|
696
|
+
offset: Optional[Expr] = None
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
SET_OPS: frozenset[str] = frozenset({"UNION", "INTERSECT", "EXCEPT"})
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
@dataclass(frozen=True)
|
|
703
|
+
class SetOp:
|
|
704
|
+
"""A set operation combining N SELECT-style arms — UNION,
|
|
705
|
+
INTERSECT, or EXCEPT — with optional ALL modifier.
|
|
706
|
+
|
|
707
|
+
`arms` is a tuple of length 2+; each arm is a Select (nested
|
|
708
|
+
SetOps deferred to milestone 8+). PG requires every arm to
|
|
709
|
+
produce the same number of columns with implicitly-castable
|
|
710
|
+
types; the generator enforces this by extracting the first
|
|
711
|
+
arm's target types and forcing subsequent arms to match.
|
|
712
|
+
|
|
713
|
+
`order_by` / `limit` / `offset` belong to the COMBINED result,
|
|
714
|
+
not individual arms — PG's grammar requires per-arm ORDER BY/
|
|
715
|
+
LIMIT to be parenthesized inside an arm, and milestone 7 keeps
|
|
716
|
+
them at the SetOp level only.
|
|
717
|
+
|
|
718
|
+
Lives at the same nesting level as Select — `Query.select` is
|
|
719
|
+
typed `Union[Select, SetOp]` — so callers can hold a top-level
|
|
720
|
+
query body uniformly without dispatching on shape at the
|
|
721
|
+
wrapper layer.
|
|
722
|
+
"""
|
|
723
|
+
op: str # one of SET_OPS
|
|
724
|
+
all: bool
|
|
725
|
+
# Each arm is either a Select or a nested SetOp. Track B #5
|
|
726
|
+
# added the nested case (`A UNION (B INTERSECT C)`); the printer
|
|
727
|
+
# and the test-walking helpers all dispatch on isinstance.
|
|
728
|
+
arms: tuple["Select | SetOp", ...]
|
|
729
|
+
order_by: tuple[OrderByItem, ...] = ()
|
|
730
|
+
limit: Optional[Expr] = None
|
|
731
|
+
offset: Optional[Expr] = None
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
@dataclass(frozen=True)
|
|
735
|
+
class Query:
|
|
736
|
+
"""Top-level wrapper for a generated query.
|
|
737
|
+
|
|
738
|
+
`select` (despite the name) holds either a Select or a SetOp.
|
|
739
|
+
The name is kept for API continuity since milestone 1; treat
|
|
740
|
+
it as "query body" semantically. The Union allows top-level
|
|
741
|
+
UNION/INTERSECT/EXCEPT from milestone 7 onward without
|
|
742
|
+
breaking the public Query type.
|
|
743
|
+
"""
|
|
744
|
+
select: "Select | SetOp"
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
__all__ = [
|
|
748
|
+
"Expr", "ColumnRef", "Literal", "FuncCall", "BinaryOp", "UnaryOp", "Cast",
|
|
749
|
+
"Subquery", "Exists", "InSubquery",
|
|
750
|
+
"FromItem", "TableRef", "JoinExpr", "DerivedTable", "CteRef", "JOIN_KINDS",
|
|
751
|
+
"CteDef", "CteSearch", "CteCycle",
|
|
752
|
+
"SelectTarget", "OrderByItem",
|
|
753
|
+
"WindowSpec", "WindowRef", "NamedWindow", "FrameBound", "FrameClause",
|
|
754
|
+
"GroupingSet",
|
|
755
|
+
"Select", "Query",
|
|
756
|
+
"SetOp", "SET_OPS",
|
|
757
|
+
]
|