waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waxsql/ast.py ADDED
@@ -0,0 +1,757 @@
1
+ """AST for generated SELECT queries.
2
+
3
+ Pure data — no rendering, no generation logic. The printer in `printer.py`
4
+ turns these into SQL; the generators in `gen/` turn random choices into
5
+ these. Splitting the pipeline this way keeps each stage testable in
6
+ isolation: the printer can be exercised against handcrafted ASTs, and
7
+ the generators can be exercised by inspecting their output AST without
8
+ parsing SQL back out.
9
+
10
+ Design notes:
11
+
12
+ * Every `Expr` subclass carries its own `pg_type`. The generator
13
+ knows the target type when it emits each node, so storing it on
14
+ the node lets the printer (and future planners) reason about types
15
+ without going back to the catalog.
16
+
17
+ * All AST nodes are frozen dataclasses. Hashable, structurally
18
+ comparable (handy for tests), and immune to "inner generator
19
+ mutated my outer node" bugs.
20
+
21
+ * `Expr` and `FromItem` are nominal marker bases (plain classes
22
+ declaring the expected attributes). Concrete subclasses are the
23
+ @dataclass(frozen=True). This avoids the dataclass-inheritance
24
+ ordering rule (parents-before-children, defaults-after-non-
25
+ defaults) that bites when the base wants required fields and a
26
+ subclass wants to add more required fields.
27
+
28
+ * The AST models the full SELECT-statement surface (GROUP BY,
29
+ HAVING, OFFSET, Cast, UnaryOp, etc.) uniformly, whether or not
30
+ every generator path emits each shape. A complete printer is
31
+ easier to test than one with conditional branches.
32
+ """
33
+ from __future__ import annotations
34
+
35
+ from dataclasses import dataclass
36
+ from typing import Optional, Protocol, runtime_checkable
37
+
38
+ from .types import PgType
39
+
40
+
41
+ # Allowed value carriers for a SQL literal in our AST. The printer's
42
+ # `_render_literal` switches on `pg_type` to render the value; the
43
+ # value's Python type must be one of these. Date/timestamp/interval/
44
+ # UUID/JSONB literals carry their PG-textual form as `str` (see the
45
+ # `_DATE_LIT` / `_TIMESTAMPTZ_LIT` / etc. constants in `gen/expr.py`),
46
+ # so the union doesn't need datetime/uuid/Decimal members. Defined
47
+ # at module scope so callers (and printer.py's `_render_literal`)
48
+ # can refer to a single named type instead of inlining the union.
49
+ LiteralValue = int | float | str | bool | None
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Expression hierarchy
54
+ # ---------------------------------------------------------------------------
55
+
56
+ @runtime_checkable
57
+ class Expr(Protocol):
58
+ """Structural protocol for expression nodes.
59
+
60
+ Concrete expression types (`ColumnRef`, `Literal`, `FuncCall`,
61
+ `BinaryOp`, `UnaryOp`, `Cast`, `Subquery`, `Exists`, `InSubquery`)
62
+ are `@dataclass(frozen=True)` classes that each declare a
63
+ `pg_type` field. `Expr` is a `Protocol` (PEP 544) rather than a
64
+ plain base class so that mypy treats it as a *structural* type:
65
+ any class with a `PgType`-typed `pg_type` attribute satisfies
66
+ `Expr` where one is expected, regardless of nominal inheritance.
67
+
68
+ Scope of the static check (read this if you're tempted to declare
69
+ a new Expr subclass): Protocol attributes are NOT abstract. mypy
70
+ rejects a *use* that fails the structural check — e.g., passing
71
+ an instance without `pg_type` where an `Expr` is expected — but
72
+ it does NOT force `class X(Expr):` itself to define `pg_type`.
73
+ Forgetting the field in a subclass declaration will silently
74
+ produce an under-typed class; only the call site catches it.
75
+ If declaration-time enforcement becomes important, switch to
76
+ `abc.ABC` with `@property @abstractmethod def pg_type`. dataclass
77
+ fields satisfy abstract properties, so subclass shapes would not
78
+ need to change. The Protocol form is preferred today because the
79
+ structural check is strictly more permissive (free dataclasses
80
+ not in this file can satisfy Expr without importing it) and the
81
+ runtime cost is lower than ABC's metaclass machinery.
82
+
83
+ `@runtime_checkable` keeps `isinstance(x, Expr)` working: the
84
+ check verifies attribute presence rather than nominal
85
+ inheritance — slightly more permissive than a nominal check
86
+ (anything with a `pg_type` attribute counts as an Expr instance),
87
+ but for the test suite's purposes ("did the generator return
88
+ something with a typed AST shape?") structural is the right
89
+ semantics.
90
+ """
91
+ pg_type: PgType
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class ColumnRef(Expr):
96
+ """A qualified column reference: `alias.column`.
97
+
98
+ We always qualify with the FROM-item alias rather than the raw table
99
+ name. This keeps generated SQL unambiguous when the same table
100
+ appears more than once (self-joins) and removes the need to track
101
+ "which table did this column come from" outside the scope.
102
+ """
103
+ pg_type: PgType
104
+ table_alias: str
105
+ column: str
106
+
107
+
108
+ @dataclass(frozen=True)
109
+ class Literal(Expr):
110
+ """A typed literal.
111
+
112
+ `value=None` represents SQL NULL; the printer emits `NULL::<type>`
113
+ to keep PostgreSQL from defaulting NULL's inferred type to `text`
114
+ in surprising ways. Bare untyped NULL is a known footgun in
115
+ PostgreSQL function-resolution code.
116
+ """
117
+ pg_type: PgType
118
+ value: LiteralValue
119
+
120
+
121
+ @dataclass(frozen=True)
122
+ class FuncCall(Expr):
123
+ """A function call: `name(arg1, arg2, ...) [OVER (...)]`.
124
+
125
+ The printer specially renders the SQL-keyword "nullary functions"
126
+ (current_date, current_timestamp, ...) without parentheses, since
127
+ PostgreSQL's grammar treats those as keyword expressions, not
128
+ function invocations — `current_date()` is a parse error.
129
+
130
+ `over` is None for ordinary scalar/aggregate calls; a WindowSpec
131
+ when the call is window-style (`func(args) OVER (...)`). Adding
132
+ it as an optional field on FuncCall (rather than a separate
133
+ WindowFuncCall node) keeps existing FuncCall callers unchanged
134
+ — pre-milestone-6 construction sites pass nothing and get None.
135
+
136
+ `star=True` represents the `name(*)` special form. In standard PG
137
+ this is only valid for `count(*)` — every other aggregate rejects
138
+ `*` as a placeholder. The generator enforces that restriction;
139
+ the AST does not, because adding the constraint here would force
140
+ a name-list dependency between ast.py and the catalog. The post-
141
+ init check enforces only the invariants that are universally true
142
+ regardless of which function is involved (no args when starred).
143
+
144
+ `filter_` (Optional[Expr] returning BOOL) renders as a trailing
145
+ `FILTER (WHERE <expr>)` clause. Only valid for aggregate
146
+ functions in PG — `upper('x') FILTER (WHERE ...)` is a parse
147
+ error. As with `star`, the AST doesn't validate the aggregate-
148
+ only constraint (would require coupling to the catalog); the
149
+ generator does. The trailing underscore avoids shadowing the
150
+ `filter` builtin.
151
+
152
+ `within_group` (tuple of OrderByItem, default empty) renders as
153
+ `WITHIN GROUP (ORDER BY ...)`. Required syntactically for
154
+ ordered-set aggregates (percentile_cont, percentile_disc, mode,
155
+ hypothetical-set rank/dense_rank/etc.); rejected by PG on any
156
+ other function. As with star/filter_, the AST trusts the
157
+ generator to only set this on appropriate functions.
158
+
159
+ Clause order in PG grammar: `name(args) [WITHIN GROUP (...)]
160
+ [FILTER (WHERE ...)] [OVER (...)]`. The printer renders in that
161
+ order; the AST fields are independent.
162
+ """
163
+ pg_type: PgType
164
+ name: str
165
+ args: tuple[Expr, ...]
166
+ # `over` carries either an inline WindowSpec (the OVER (...) form)
167
+ # or a WindowRef (OVER name, where name resolves against the
168
+ # enclosing Select.windows). The named-window hoist pass in
169
+ # gen/window.py rewrites inline specs into refs after generation.
170
+ over: "WindowSpec | WindowRef | None" = None
171
+ star: bool = False
172
+ filter_: Optional[Expr] = None
173
+ within_group: tuple["OrderByItem", ...] = ()
174
+
175
+ def __post_init__(self) -> None:
176
+ # `name(*)` and `name(arg, ...)` are mutually exclusive
177
+ # syntactic forms — PG rejects `count(*, x)` etc. Catching
178
+ # this at construction time prevents the printer from emitting
179
+ # a malformed `count(*)` followed by mystery args.
180
+ if self.star and self.args:
181
+ raise ValueError(
182
+ f"FuncCall(star=True) cannot have args; "
183
+ f"got {len(self.args)} arg(s) for {self.name!r}"
184
+ )
185
+
186
+
187
+ @dataclass(frozen=True)
188
+ class BinaryOp(Expr):
189
+ """A binary operator application: `left SYMBOL right`.
190
+
191
+ `symbol` is the literal operator text from the catalog ("+", "AND",
192
+ "LIKE", "->>", ...). The printer always pads with spaces so the
193
+ word-form operators don't collide with their operands.
194
+
195
+ No precedence field: the printer parenthesizes operands
196
+ conservatively rather than walking a precedence table. See
197
+ printer.py `_wrap_if_compound` — emitting extra parens is harmless,
198
+ missing one is a parse error, so the trade favors verbosity.
199
+ """
200
+ pg_type: PgType
201
+ symbol: str
202
+ left: Expr
203
+ right: Expr
204
+
205
+
206
+ @dataclass(frozen=True)
207
+ class UnaryOp(Expr):
208
+ """A prefix unary operator: `SYMBOL operand`.
209
+
210
+ Used for NOT and unary minus/plus. Postfix unaries are not
211
+ modeled — none of the operators in the catalog need them.
212
+ """
213
+ pg_type: PgType
214
+ symbol: str
215
+ operand: Expr
216
+
217
+
218
+ @dataclass(frozen=True)
219
+ class Cast(Expr):
220
+ """An explicit type cast.
221
+
222
+ Two SQL renderings exist (`expr::type` and `CAST(expr AS type)`);
223
+ the printer picks the `::` form for compactness. The `pg_type`
224
+ field intentionally duplicates `target_type`; it's there so every
225
+ Expr satisfies the "has pg_type" invariant uniformly.
226
+ """
227
+ pg_type: PgType
228
+ expr: Expr
229
+ target_type: PgType
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # Subquery expressions
234
+ # ---------------------------------------------------------------------------
235
+ #
236
+ # Three forms cover the bulk of subquery usage in real SQL:
237
+ #
238
+ # * Subquery: `(SELECT col FROM ...)` in expression position.
239
+ # Returns a single value; pg_type is the type of that value.
240
+ #
241
+ # * Exists: `[NOT ]EXISTS (SELECT ...)`. Always BOOL. The inner
242
+ # SELECT's targets are ignored at runtime — canonical idiom is
243
+ # `SELECT 1`.
244
+ #
245
+ # * InSubquery: `<expr> [NOT ]IN (SELECT col FROM ...)`. Always BOOL.
246
+ # The inner SELECT must produce a single column whose type matches
247
+ # `expr`'s type.
248
+ #
249
+ # Forward reference to Select via the string annotation; this works
250
+ # because the module uses `from __future__ import annotations`, so all
251
+ # annotations are lazily-resolved strings rather than runtime types.
252
+
253
+ @dataclass(frozen=True)
254
+ class Subquery(Expr):
255
+ """Scalar subquery: `(SELECT col FROM ...)`.
256
+
257
+ PG accepts a multi-row scalar subquery at parse time but errors at
258
+ runtime if it returns more than one row. The generator pairs every
259
+ Subquery with a `LIMIT 1` on the inner Select to be runtime-safe;
260
+ the AST itself doesn't enforce that — it's the generator's job.
261
+ """
262
+ pg_type: PgType
263
+ select: "Select"
264
+
265
+
266
+ @dataclass(frozen=True)
267
+ class Exists(Expr):
268
+ """`[NOT ]EXISTS (SELECT ...)` test. Always BOOL.
269
+
270
+ The inner SELECT's targets are semantically irrelevant — PG only
271
+ tests whether the subquery yields any rows. Canonical idiom is
272
+ `SELECT 1 FROM ...`; the generator emits exactly that shape.
273
+ """
274
+ pg_type: PgType # always BOOL; field present for Expr-uniformity
275
+ select: "Select"
276
+ negated: bool = False
277
+
278
+
279
+ @dataclass(frozen=True)
280
+ class InSubquery(Expr):
281
+ """`<expr> [NOT ]IN (SELECT col FROM ...)` test. Always BOOL.
282
+
283
+ The inner SELECT must produce exactly one column whose type matches
284
+ `expr.pg_type` (or implicitly casts to it). The generator enforces
285
+ this; the AST doesn't validate it.
286
+ """
287
+ pg_type: PgType # always BOOL
288
+ expr: Expr
289
+ select: "Select"
290
+ negated: bool = False
291
+
292
+
293
+ # ---------------------------------------------------------------------------
294
+ # FROM clause
295
+ # ---------------------------------------------------------------------------
296
+
297
+ class FromItem:
298
+ """Marker base for things that can appear in a FROM clause.
299
+
300
+ Concrete subclasses: `TableRef`, `JoinExpr`, `DerivedTable`,
301
+ `CteRef`. The hierarchy stays open for additional FROM kinds
302
+ (table-valued functions, VALUES lists, etc.) without affecting
303
+ existing subclasses.
304
+ """
305
+ pass
306
+
307
+
308
+ @dataclass(frozen=True)
309
+ class TableRef(FromItem):
310
+ """A table reference with a mandatory alias.
311
+
312
+ Always-aliased even when the alias equals the table name; this
313
+ sidesteps the "is this column qualified by table name or alias"
314
+ distinction in PostgreSQL's name resolution and keeps generated
315
+ SQL self-consistent.
316
+ """
317
+ table: str
318
+ alias: str
319
+
320
+
321
+ @dataclass(frozen=True)
322
+ class DerivedTable(FromItem):
323
+ """A FROM-clause subquery: `[LATERAL ](SELECT ...) AS alias`.
324
+
325
+ The inner SELECT acts as a virtual table; the outer query
326
+ references its columns through `alias.<column>`.
327
+
328
+ With `lateral=True`, the inner SELECT may reference aliases from
329
+ preceding sibling FROM items (PG's LATERAL semantics). Without
330
+ LATERAL, the inner is independent of all siblings — the same
331
+ distinction as correlated vs uncorrelated subqueries in the
332
+ expression position, applied to FROM.
333
+
334
+ `column_aliases` corresponds to the optional `AS sq(a, b, c)`
335
+ column-list syntax. Reserved here for later milestones; the
336
+ milestone-4 generator leaves it empty and relies on the inner
337
+ SELECT's target aliases instead.
338
+ """
339
+ select: "Select"
340
+ alias: str
341
+ column_aliases: tuple[str, ...] = ()
342
+ lateral: bool = False
343
+
344
+
345
+ # Allowed JoinExpr.kind values. The set is the source of truth — keep
346
+ # the printer's switch and the generator's choices in sync with it.
347
+ JOIN_KINDS: frozenset[str] = frozenset({
348
+ "INNER", "LEFT", "RIGHT", "FULL", "CROSS",
349
+ })
350
+
351
+
352
+ @dataclass(frozen=True)
353
+ class JoinExpr(FromItem):
354
+ """A binary JOIN tree node.
355
+
356
+ Multi-table joins build a left-deep tree (`((t1 JOIN t2) JOIN t3)`).
357
+ For CROSS joins, both `on` and `using` are empty; for non-CROSS,
358
+ exactly one of them is set.
359
+
360
+ Invariant the generator must uphold (printer raises on violation):
361
+ a non-CROSS join must have `on` set XOR `using` non-empty. Bare
362
+ `LEFT JOIN t` with no qualifier is a parse error in PG.
363
+
364
+ The printer is responsible for parenthesizing nested join trees
365
+ correctly. We accept any nesting; the printer's job is to make it
366
+ print parseably.
367
+ """
368
+ left: FromItem
369
+ right: FromItem
370
+ kind: str # one of JOIN_KINDS
371
+ on: Optional[Expr] = None
372
+ using: tuple[str, ...] = ()
373
+
374
+
375
+ @dataclass(frozen=True)
376
+ class CteRef(FromItem):
377
+ """A reference to a CTE in a FROM clause: `cte_name AS alias`.
378
+
379
+ Same shape as TableRef but the source is a CTE definition (in
380
+ the enclosing WITH) rather than a base table. Resolution of
381
+ `cte_name` against a defined CTE is the generator's responsibility;
382
+ the AST itself doesn't validate that the name exists.
383
+
384
+ The local `alias` introduces column bindings derived from the
385
+ CTE's inner SELECT targets — same way DerivedTable does, just
386
+ that the inner SELECT lives in a sibling CteDef rather than
387
+ here.
388
+ """
389
+ cte_name: str
390
+ alias: str
391
+
392
+
393
+ # ---------------------------------------------------------------------------
394
+ # WITH clause / CTE definitions
395
+ # ---------------------------------------------------------------------------
396
+
397
+ @dataclass(frozen=True)
398
+ class CteSearch:
399
+ """`SEARCH BREADTH|DEPTH FIRST BY col, ... SET seqcol` clause on
400
+ a recursive CTE.
401
+
402
+ Adds one synthetic column (`set_column`) to the CTE's exposed
403
+ columns — the generator updates the scope's column list to
404
+ include it. PG uses the synthetic column to expose the search
405
+ order, allowing the outer query to ORDER BY it for reliable
406
+ BFS/DFS traversal output."""
407
+ breadth_first: bool # True = BREADTH FIRST, False = DEPTH FIRST
408
+ by_columns: tuple[str, ...]
409
+ set_column: str
410
+
411
+
412
+ @dataclass(frozen=True)
413
+ class CteCycle:
414
+ """`CYCLE col, ... SET cyclecol USING pathcol` clause on a
415
+ recursive CTE.
416
+
417
+ Adds two synthetic columns: `cycle_mark_column` (BOOL — true on
418
+ rows where a cycle was detected) and `path_column` (an array of
419
+ row tuples tracing the recursion path so far). Both get added to
420
+ the CTE's exposed columns by the generator's scope-registration
421
+ step. Used to defend against infinite recursion in graph walks.
422
+
423
+ PG also accepts an extended form `SET cycle_mark TO val DEFAULT
424
+ val2` for non-default cycle-detection markers; not modeled
425
+ here (defaults TO TRUE / DEFAULT FALSE are what 99% of real
426
+ queries want)."""
427
+ columns: tuple[str, ...]
428
+ cycle_mark_column: str
429
+ path_column: str
430
+
431
+
432
+ @dataclass(frozen=True)
433
+ class CteDef:
434
+ """One entry in a WITH clause:
435
+ `name [(col1, col2, ...)] AS [MATERIALIZED|NOT MATERIALIZED] (SELECT ...)
436
+ [SEARCH ...] [CYCLE ...]`.
437
+
438
+ `column_aliases` is empty by default; reserved for the explicit-
439
+ column-list syntax (same pattern as DerivedTable.column_aliases).
440
+
441
+ `materialized` is None for PG's default behavior (which since
442
+ PG 12 may inline single-use CTEs); True forces MATERIALIZED;
443
+ False forces NOT MATERIALIZED. The milestone-5 generator leaves
444
+ this at None — the printer emits no modifier in that case.
445
+
446
+ `recursive` is True for recursive CTEs (added in milestone 8).
447
+ The keyword `RECURSIVE` is per-WITH-list, not per-CteDef — the
448
+ printer scans the WITH list and emits `WITH RECURSIVE` if ANY
449
+ CteDef has recursive=True. The flag is per-CteDef here because
450
+ that's where the generator decides "this one will self-reference."
451
+
452
+ `select` widens to `Select | SetOp` in milestone 8: a recursive
453
+ CTE's body is `base UNION ALL recursive` (a SetOp). Non-recursive
454
+ CTEs continue to hold a plain Select.
455
+
456
+ `search` and `cycle` are PG-specific recursive-CTE clauses, valid
457
+ only when `recursive=True`. Each adds synthetic exposed columns
458
+ (see CteSearch / CteCycle docstrings). Both can coexist on the
459
+ same CTE — `... SEARCH ... CYCLE ...` is valid PG."""
460
+ name: str
461
+ select: "Select | SetOp"
462
+ column_aliases: tuple[str, ...] = ()
463
+ materialized: Optional[bool] = None
464
+ recursive: bool = False
465
+ search: Optional[CteSearch] = None
466
+ cycle: Optional[CteCycle] = None
467
+
468
+
469
+ # ---------------------------------------------------------------------------
470
+ # SELECT clause pieces
471
+ # ---------------------------------------------------------------------------
472
+
473
+ @dataclass(frozen=True)
474
+ class SelectTarget:
475
+ """One item in the SELECT list: an expression with optional alias.
476
+
477
+ No `SELECT *` form is modeled — the generator always projects an
478
+ explicit target list. Star-projection collides badly with the
479
+ type-driven pipeline (the projected types depend on the FROM
480
+ items rather than being a property of the SELECT itself), and
481
+ nothing downstream needs it."""
482
+ expr: Expr
483
+ alias: Optional[str] = None
484
+
485
+
486
+ @dataclass(frozen=True)
487
+ class OrderByItem:
488
+ """One item in ORDER BY.
489
+
490
+ `direction` is "ASC" or "DESC". `nulls` is "FIRST", "LAST", or None
491
+ (let PostgreSQL apply its default: NULLS LAST for ASC, NULLS FIRST
492
+ for DESC).
493
+
494
+ Strings (not enums) for the same reason FrameClause.unit and
495
+ GroupingSet.kind use strings: tiny fixed alphabet, mirrors the
496
+ grammar tokens 1:1, the printer can splat them in directly. PG
497
+ validates at parse time, so a stray value surfaces immediately
498
+ via the round-trip test rather than silently mis-rendering.
499
+ """
500
+ expr: Expr
501
+ direction: str = "ASC"
502
+ nulls: Optional[str] = None
503
+
504
+
505
+ @dataclass(frozen=True)
506
+ class FrameBound:
507
+ """One bound of a window frame (the `start` or `end` of a
508
+ BETWEEN ... AND ... extent, or the sole bound of a single-bound
509
+ extent).
510
+
511
+ PG's grammar gives five choices:
512
+ UNBOUNDED PRECEDING | <offset> PRECEDING | CURRENT ROW
513
+ | <offset> FOLLOWING | UNBOUNDED FOLLOWING
514
+
515
+ We model them via a string `kind` field — same convention as
516
+ OrderByItem.direction. Strings keep printer dispatch trivial and
517
+ avoid an enum import boilerplate. Valid kinds:
518
+ "unbounded_preceding"
519
+ "preceding"
520
+ "current_row"
521
+ "following"
522
+ "unbounded_following"
523
+
524
+ `offset` is the `<offset>` expression for preceding/following
525
+ kinds (typically a non-negative integer literal); None for the
526
+ unbounded kinds and CURRENT ROW. Post-init validates the
527
+ pairing — preceding/following without offset (or unbounded/
528
+ current_row WITH offset) would be malformed SQL.
529
+ """
530
+ kind: str
531
+ offset: Optional[Expr] = None
532
+
533
+ def __post_init__(self) -> None:
534
+ needs_offset = self.kind in ("preceding", "following")
535
+ has_offset = self.offset is not None
536
+ if needs_offset and not has_offset:
537
+ raise ValueError(
538
+ f"FrameBound(kind={self.kind!r}) requires an offset"
539
+ )
540
+ if not needs_offset and has_offset:
541
+ raise ValueError(
542
+ f"FrameBound(kind={self.kind!r}) must not have an offset"
543
+ )
544
+
545
+
546
+ @dataclass(frozen=True)
547
+ class FrameClause:
548
+ """A window frame clause: the `ROWS BETWEEN ... AND ...` part
549
+ that follows PARTITION BY and ORDER BY inside an OVER clause.
550
+
551
+ PG grammar:
552
+ { RANGE | ROWS | GROUPS } frame_extent [ frame_exclusion ]
553
+
554
+ `unit` is "ROWS", "RANGE", or "GROUPS" (literal strings, mirroring
555
+ the grammar tokens for printer convenience).
556
+
557
+ `start` is the lower bound. `end` is the upper bound; when None,
558
+ the printer emits the single-bound form `unit start`, which PG
559
+ interprets as `unit BETWEEN start AND CURRENT ROW`. Both forms
560
+ are valid; the explicit BETWEEN is more common and clearer.
561
+
562
+ `exclude` is the EXCLUDE clause's body — one of "CURRENT ROW",
563
+ "GROUP", "TIES", "NO OTHERS" — or None for the default (same
564
+ as omitting the clause entirely). The printer prepends "EXCLUDE"
565
+ so callers store just the body. Kept as a string rather than an
566
+ enum for symmetry with `unit` and `direction` elsewhere; the
567
+ set is small and validated by PG at parse time anyway.
568
+ """
569
+ unit: str
570
+ start: FrameBound
571
+ end: Optional[FrameBound] = None
572
+ exclude: Optional[str] = None
573
+
574
+
575
+ @dataclass(frozen=True)
576
+ class WindowSpec:
577
+ """The `OVER (PARTITION BY ... ORDER BY ... [frame])` clause
578
+ attached to a window-style function call.
579
+
580
+ Empty `partition_by` and empty `order_by` together produce the
581
+ `OVER ()` form (entire result set as one partition). Some window
582
+ functions (lag, lead, first_value, last_value) are typically used
583
+ with ORDER BY but PG accepts them without it syntactically.
584
+
585
+ `frame`, when set, is a structured FrameClause that the printer
586
+ renders as `ROWS BETWEEN ... AND ...` (or RANGE/GROUPS variants).
587
+ Was a raw string in earlier milestones; switched to structured
588
+ representation when frame generation landed so the printer can
589
+ guarantee well-formed output and the generator can compose
590
+ bounds deterministically without string concatenation.
591
+ """
592
+ partition_by: tuple[Expr, ...] = ()
593
+ order_by: tuple[OrderByItem, ...] = ()
594
+ frame: Optional[FrameClause] = None
595
+
596
+
597
+ @dataclass(frozen=True)
598
+ class WindowRef:
599
+ """Reference to a named window declared in the enclosing SELECT's
600
+ WINDOW clause. Used as `OVER w` (no parens around the name).
601
+
602
+ Modeled as a separate AST node from WindowSpec so the printer
603
+ dispatches structurally without a name-vs-spec field hack on
604
+ WindowSpec itself. FuncCall.over accepts either type:
605
+ `Optional[WindowSpec | WindowRef]`.
606
+
607
+ PG's grammar also allows `OVER (w PARTITION BY extra-col)` —
608
+ a named window with inline extension. That form is not modeled
609
+ yet; if needed it would be a third FuncCall.over option."""
610
+ name: str
611
+
612
+
613
+ @dataclass(frozen=True)
614
+ class NamedWindow:
615
+ """One entry in a SELECT's WINDOW clause: a name bound to a spec.
616
+
617
+ Multiple FuncCall.over WindowRefs can reference the same name —
618
+ that's the whole point of the WINDOW clause, deduplicating window
619
+ specs across multiple aggregates. The Select.windows tuple
620
+ declares all names visible from that SELECT's body."""
621
+ name: str
622
+ spec: WindowSpec
623
+
624
+
625
+ @dataclass(frozen=True)
626
+ class GroupingSet:
627
+ """A grouping-set construct in a GROUP BY clause: ROLLUP, CUBE,
628
+ or GROUPING SETS.
629
+
630
+ PG grammar:
631
+ ROLLUP ( expr_list_or_paren_list, ... )
632
+ CUBE ( expr_list_or_paren_list, ... )
633
+ GROUPING SETS ( ( expr_list ), ( expr_list ), ... )
634
+
635
+ `kind` is the keyword: "ROLLUP", "CUBE", or "GROUPING SETS"
636
+ (literal strings for printer convenience, mirroring `unit` on
637
+ FrameClause and `direction` on OrderByItem).
638
+
639
+ `elements` is a tuple-of-tuples. Each outer entry is one
640
+ "element" of the grouping construct:
641
+
642
+ * For ROLLUP/CUBE: each element is typically a single-expression
643
+ tuple (`ROLLUP (a, b, c)`); multi-expr elements get parens
644
+ (`ROLLUP ((a, b), c)` — first element is the compound (a,b)).
645
+
646
+ * For GROUPING SETS: each element is one grouping set,
647
+ rendered with explicit parens including the empty-tuple case
648
+ (which becomes `()` — the grand-total grouping).
649
+
650
+ The same expressions that appear in `elements` may be referenced
651
+ by SELECT-list items; PG's "must appear in GROUP BY" rule is
652
+ satisfied by structural equality (same Expr instance or
653
+ equivalent frozen-dataclass value).
654
+ """
655
+ kind: str
656
+ elements: tuple[tuple[Expr, ...], ...]
657
+
658
+
659
+ # ---------------------------------------------------------------------------
660
+ # Top-level statement
661
+ # ---------------------------------------------------------------------------
662
+
663
+ @dataclass(frozen=True)
664
+ class Select:
665
+ """A SELECT statement.
666
+
667
+ Field order roughly matches the SQL clause order — `targets`
668
+ first because that's the SELECT body, then `from_` and any
669
+ optional clauses. `with_ctes` is positionally after the required
670
+ fields (the dataclass rule that defaulted fields follow non-
671
+ defaulted ones), but the printer emits it as a `WITH ...` prefix
672
+ AHEAD of SELECT in the output.
673
+
674
+ `from_` is a tuple of FromItems that the printer joins with commas
675
+ (a cross product). To express explicit JOINs, nest a JoinExpr
676
+ inside a single from_ slot.
677
+
678
+ `with_ctes` is empty by default; when non-empty, the printer
679
+ prefixes the SELECT with `WITH cte1 AS (...), cte2 AS (...) ...`.
680
+ Defaulting empty means callers that don't care about CTEs can
681
+ construct a Select without thinking about the WITH list.
682
+ """
683
+ targets: tuple[SelectTarget, ...]
684
+ from_: tuple[FromItem, ...]
685
+ with_ctes: tuple[CteDef, ...] = ()
686
+ where: Optional[Expr] = None
687
+ # Each item is either a plain Expr (regular GROUP BY column) or a
688
+ # GroupingSet (ROLLUP/CUBE/GROUPING SETS extension). The printer
689
+ # dispatches on type. PG accepts mixing the two within one
690
+ # GROUP BY: `GROUP BY a, ROLLUP (b, c)` is valid.
691
+ group_by: tuple["Expr | GroupingSet", ...] = ()
692
+ having: Optional[Expr] = None
693
+ windows: tuple[NamedWindow, ...] = ()
694
+ order_by: tuple[OrderByItem, ...] = ()
695
+ limit: Optional[Expr] = None
696
+ offset: Optional[Expr] = None
697
+
698
+
699
+ SET_OPS: frozenset[str] = frozenset({"UNION", "INTERSECT", "EXCEPT"})
700
+
701
+
702
+ @dataclass(frozen=True)
703
+ class SetOp:
704
+ """A set operation combining N SELECT-style arms — UNION,
705
+ INTERSECT, or EXCEPT — with optional ALL modifier.
706
+
707
+ `arms` is a tuple of length 2+; each arm is a Select (nested
708
+ SetOps deferred to milestone 8+). PG requires every arm to
709
+ produce the same number of columns with implicitly-castable
710
+ types; the generator enforces this by extracting the first
711
+ arm's target types and forcing subsequent arms to match.
712
+
713
+ `order_by` / `limit` / `offset` belong to the COMBINED result,
714
+ not individual arms — PG's grammar requires per-arm ORDER BY/
715
+ LIMIT to be parenthesized inside an arm, and milestone 7 keeps
716
+ them at the SetOp level only.
717
+
718
+ Lives at the same nesting level as Select — `Query.select` is
719
+ typed `Union[Select, SetOp]` — so callers can hold a top-level
720
+ query body uniformly without dispatching on shape at the
721
+ wrapper layer.
722
+ """
723
+ op: str # one of SET_OPS
724
+ all: bool
725
+ # Each arm is either a Select or a nested SetOp. Track B #5
726
+ # added the nested case (`A UNION (B INTERSECT C)`); the printer
727
+ # and the test-walking helpers all dispatch on isinstance.
728
+ arms: tuple["Select | SetOp", ...]
729
+ order_by: tuple[OrderByItem, ...] = ()
730
+ limit: Optional[Expr] = None
731
+ offset: Optional[Expr] = None
732
+
733
+
734
+ @dataclass(frozen=True)
735
+ class Query:
736
+ """Top-level wrapper for a generated query.
737
+
738
+ `select` (despite the name) holds either a Select or a SetOp.
739
+ The name is kept for API continuity since milestone 1; treat
740
+ it as "query body" semantically. The Union allows top-level
741
+ UNION/INTERSECT/EXCEPT from milestone 7 onward without
742
+ breaking the public Query type.
743
+ """
744
+ select: "Select | SetOp"
745
+
746
+
747
+ __all__ = [
748
+ "Expr", "ColumnRef", "Literal", "FuncCall", "BinaryOp", "UnaryOp", "Cast",
749
+ "Subquery", "Exists", "InSubquery",
750
+ "FromItem", "TableRef", "JoinExpr", "DerivedTable", "CteRef", "JOIN_KINDS",
751
+ "CteDef", "CteSearch", "CteCycle",
752
+ "SelectTarget", "OrderByItem",
753
+ "WindowSpec", "WindowRef", "NamedWindow", "FrameBound", "FrameClause",
754
+ "GroupingSet",
755
+ "Select", "Query",
756
+ "SetOp", "SET_OPS",
757
+ ]