waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waxsql/printer.py ADDED
@@ -0,0 +1,688 @@
1
+ """AST → SQL printer.
2
+
3
+ Pure function on the AST. The printer is the *only* place that knows
4
+ about SQL surface syntax — quoting, parenthesization, literal
5
+ serialization, and the handful of nodes whose SQL form differs from
6
+ what their function/operator signature suggests (the SQL-keyword
7
+ nullary "functions" like `current_date`).
8
+
9
+ Design rules:
10
+
11
+ * Conservative parenthesization. Binary operators always wrap
12
+ binary/unary operand subexpressions in parens, even when PG's
13
+ precedence rules would not require it. The cost is verbosity; the
14
+ benefit is that "the AST said `(a OR b) AND c`" prints as something
15
+ that re-parses to `(a OR b) AND c` — never `a OR (b AND c)`. The
16
+ printer-round-trip test in tests/test_printer.py is what enforces
17
+ this property.
18
+
19
+ * No string concatenation as control flow. Every clause is rendered
20
+ as a separately-built fragment, then joined with newlines. Easy to
21
+ read, easy to debug, no risk of "missed a space" bugs.
22
+
23
+ * Identifiers always go through `quote_ident`. Don't pass raw names
24
+ to f-strings; reserved-word collisions and uppercase/case-folding
25
+ issues hide there.
26
+
27
+ * SQL keyword choice: lowercase for short keywords (`from`, `where`,
28
+ `and`, `or`, `is`, etc.), uppercase only where convention is
29
+ overwhelmingly uppercase (`SELECT`, `FROM`, `WHERE`, `JOIN`, ...).
30
+ Generated SQL is for humans to read, not for shouting at.
31
+
32
+ * Literal rendering is type-driven. The Literal node carries its
33
+ own pg_type, and `_render_literal` switches on it. NULL renders as
34
+ `NULL::<type>` — bare untyped NULL has surprising overload-
35
+ resolution behavior in PostgreSQL.
36
+ """
37
+ from __future__ import annotations
38
+
39
+ from .ast import (
40
+ BinaryOp, Cast, ColumnRef, CteDef, CteRef, DerivedTable, Exists, Expr,
41
+ FrameBound, FrameClause, FromItem, FuncCall, GroupingSet, InSubquery,
42
+ JoinExpr, Literal, OrderByItem, Query, Select, SelectTarget,
43
+ SetOp, Subquery, TableRef, UnaryOp, WindowRef, WindowSpec,
44
+ )
45
+ from .schema import quote_ident
46
+ from .types import (
47
+ BOOL, FLOAT8, INT4, INT8, NUMERIC, PgType,
48
+ )
49
+
50
+
51
+ # Names that PostgreSQL parses as keyword expressions, NOT function
52
+ # calls. `current_date()` (with parens, zero args) is a syntax error;
53
+ # `current_date` (bare keyword) is the correct form.
54
+ #
55
+ # Two-arg variants like `current_timestamp(2)` exist as well but use
56
+ # special grammar productions — the catalog doesn't model those, so we
57
+ # only special-case the zero-arg form.
58
+ _BARE_KEYWORD_FUNCS: frozenset[str] = frozenset({
59
+ "current_date", "current_time", "current_timestamp",
60
+ "localtime", "localtimestamp",
61
+ "current_user", "session_user", "user",
62
+ })
63
+
64
+
65
+ # ===========================================================================
66
+ # Public entry points
67
+ # ===========================================================================
68
+
69
+ def print_query(q: Query) -> str:
70
+ """Render a Query as a SQL statement (no trailing semicolon).
71
+
72
+ The trailing-semicolon decision is left to the caller; pglast is
73
+ happy either way, and downstream batching is easier without one.
74
+
75
+ `q.select` may be a Select OR a SetOp (UNION/INTERSECT/EXCEPT
76
+ combining multiple SELECTs); dispatch on type.
77
+ """
78
+ body = q.select
79
+ if isinstance(body, SetOp):
80
+ return _print_set_op(body)
81
+ return _print_select(body)
82
+
83
+
84
+ def _print_set_op(s: SetOp, *, sep: str = "\n") -> str:
85
+ """Render `arm1 OP[ ALL] arm2 [OP[ ALL] arm3 ...] [ORDER BY ...] [LIMIT ...]`.
86
+
87
+ Each arm is a Select rendered top-level OR a nested SetOp
88
+ rendered inside parens. Parens are required for nested SetOps
89
+ because PG's set-op precedence (INTERSECT > UNION = EXCEPT)
90
+ would otherwise re-associate the operators differently from
91
+ the AST structure. Always-parenthesizing nested arms matches
92
+ pg_dump's behavior for round-trip safety.
93
+
94
+ `sep` is the join character between clause fragments (and
95
+ between arms). Default "\\n" gives multi-line readable output.
96
+ Inline contexts (subqueries, derived tables) pass " " so the
97
+ body fits on one line. Building the joined form directly avoids
98
+ a render-then-replace pass that could corrupt embedded literals
99
+ containing the join character.
100
+ """
101
+ op_kw = f"{s.op} ALL" if s.all else s.op
102
+ arm_strs = [_print_set_op_arm(arm, sep=sep) for arm in s.arms]
103
+ parts: list[str] = []
104
+ parts.append((f"{sep}{op_kw}{sep}").join(arm_strs))
105
+ if s.order_by:
106
+ # SetOp ORDER BY uses positional refs (`ORDER BY 1`) since
107
+ # the unified output column names aren't reliably nameable.
108
+ # Opt out of the integer-cast auto-fix.
109
+ parts.append("ORDER BY " + ", ".join(
110
+ _print_order_by(o, allow_positional=True) for o in s.order_by
111
+ ))
112
+ if s.limit is not None:
113
+ parts.append("LIMIT " + _print_expr(s.limit))
114
+ if s.offset is not None:
115
+ parts.append("OFFSET " + _print_expr(s.offset))
116
+ return sep.join(parts)
117
+
118
+
119
+ def _print_set_op_arm(arm: Select | SetOp, *, sep: str = "\n") -> str:
120
+ """Render a single SetOp arm. Plain Select arms render as-is;
121
+ nested SetOp arms get wrapped in parens (mandatory — see
122
+ _print_set_op docstring for the precedence-precedence rationale).
123
+ `sep` propagates to keep nested arms in the same inline/multi-line
124
+ mode as their parent."""
125
+ if isinstance(arm, SetOp):
126
+ return f"({_print_set_op(arm, sep=sep)})"
127
+ return _print_select(arm, sep=sep)
128
+
129
+
130
+ def print_expr(e: Expr) -> str:
131
+ """Render a single expression. Useful for tests and diagnostics."""
132
+ return _print_expr(e)
133
+
134
+
135
+ # ===========================================================================
136
+ # SELECT and clause rendering
137
+ # ===========================================================================
138
+
139
+ def _print_select(s: Select, *, sep: str = "\n") -> str:
140
+ """Render a Select. `sep` is the join character between clause
141
+ fragments — default "\\n" for top-level multi-line output, " "
142
+ for inline contexts (called via _print_select_inline). Parameter-
143
+ izing the join is what lets the inline case avoid a render-then-
144
+ replace pass that would corrupt embedded literals containing the
145
+ join character."""
146
+ parts: list[str] = []
147
+
148
+ # WITH clause comes first when present. Each CteDef renders on
149
+ # one line; multiple CTEs are comma-separated. The whole WITH
150
+ # gets its own line for readability.
151
+ #
152
+ # `WITH RECURSIVE` is emitted when ANY CTE in the list is
153
+ # recursive — PG's grammar requires the keyword once per WITH
154
+ # list, not per-CTE. Without RECURSIVE, a CTE that references
155
+ # its own name fails parse-analysis with "relation does not
156
+ # exist" (CTE names aren't visible to themselves under plain WITH).
157
+ if s.with_ctes:
158
+ kw = "WITH RECURSIVE" if any(c.recursive for c in s.with_ctes) else "WITH"
159
+ parts.append(kw + " " + ", ".join(_print_cte_def(c) for c in s.with_ctes))
160
+
161
+ parts.append("SELECT " + ", ".join(_print_target(t) for t in s.targets))
162
+
163
+ if s.from_:
164
+ parts.append("FROM " + ", ".join(_print_from_item(f) for f in s.from_))
165
+
166
+ if s.where is not None:
167
+ parts.append("WHERE " + _print_expr(s.where))
168
+
169
+ if s.group_by:
170
+ parts.append("GROUP BY " + ", ".join(
171
+ _print_grouping_set(g) if isinstance(g, GroupingSet) else _print_expr(g)
172
+ for g in s.group_by
173
+ ))
174
+
175
+ if s.having is not None:
176
+ parts.append("HAVING " + _print_expr(s.having))
177
+
178
+ if s.windows:
179
+ # PG grammar: WINDOW comes after HAVING and before ORDER BY.
180
+ # Each entry is `name AS (spec)`; the spec rendering is the
181
+ # same code path used by inline OVER clauses.
182
+ parts.append("WINDOW " + ", ".join(
183
+ f"{quote_ident(w.name)} AS ({_print_window_spec(w.spec)})"
184
+ for w in s.windows
185
+ ))
186
+
187
+ if s.order_by:
188
+ parts.append("ORDER BY " + ", ".join(_print_order_by(o) for o in s.order_by))
189
+
190
+ if s.limit is not None:
191
+ parts.append("LIMIT " + _print_expr(s.limit))
192
+
193
+ if s.offset is not None:
194
+ parts.append("OFFSET " + _print_expr(s.offset))
195
+
196
+ return sep.join(parts)
197
+
198
+
199
+ def _print_target(t: SelectTarget) -> str:
200
+ # The target expression renders without any compound-wrapping —
201
+ # SELECT-list position is unambiguous in PG's grammar (commas
202
+ # delimit), so extra parens would only add noise. Contrast with
203
+ # operand positions inside BinaryOp / UnaryOp / Cast where the
204
+ # _wrap_if_compound dance is required.
205
+ body = _print_expr(t.expr)
206
+ if t.alias is not None:
207
+ return f"{body} AS {quote_ident(t.alias)}"
208
+ return body
209
+
210
+
211
+ def _print_order_by(
212
+ o: OrderByItem,
213
+ *,
214
+ allow_positional: bool = False,
215
+ ) -> str:
216
+ """Render one ORDER BY item.
217
+
218
+ SUBTLE: in PG's grammar, a bare integer constant in ORDER BY is
219
+ interpreted as a 1-based output-column position, NOT as a sort
220
+ key value. `ORDER BY 0` means "position 0" (always invalid),
221
+ `ORDER BY 5` means "position 5" (only valid if there are 5+
222
+ targets). When our generator reuses a SELECT-list item that
223
+ happens to be a bare integer Literal as an ORDER BY expr,
224
+ PG mis-interprets — so by default we emit an explicit cast
225
+ (`5::int4`) which disables the positional-ref rule.
226
+
227
+ Even parens don't disable the rule (`ORDER BY (0)` is still
228
+ positional); only a cast does.
229
+
230
+ The SetOp ORDER BY path INTENTIONALLY uses positional refs
231
+ (`ORDER BY 1 ASC` to sort by the unified first output column,
232
+ since the unified column names aren't always nameable). It
233
+ passes `allow_positional=True` to opt out of the auto-cast.
234
+ """
235
+ expr_str = _print_expr(o.expr)
236
+ if (not allow_positional
237
+ and isinstance(o.expr, Literal)
238
+ and o.expr.value is not None):
239
+ # Force literal interpretation via cast.
240
+ # PG rejects ANY bare non-NULL literal in ORDER BY: "ORDER BY
241
+ # position N is not in select list" for integers, "non-integer
242
+ # constant in ORDER BY" for everything else (booleans, strings,
243
+ # numerics, jsonb, ...). NULL is exempt — `ORDER BY NULL` is
244
+ # accepted as a no-op sort key. Casting disables both rules
245
+ # uniformly across all literal types.
246
+ expr_str = f"{expr_str}::{o.expr.pg_type.sql()}"
247
+ bits = [expr_str, o.direction]
248
+ if o.nulls is not None:
249
+ bits.append("NULLS " + o.nulls)
250
+ return " ".join(bits)
251
+
252
+
253
+ # ===========================================================================
254
+ # FROM items
255
+ # ===========================================================================
256
+
257
+ def _print_from_item(f: FromItem) -> str:
258
+ # isinstance-chain dispatch instead of a registry / visitor pattern:
259
+ # the closed set of FromItem subclasses is small, and a flat chain
260
+ # keeps the printer single-file and grep-friendly. The TypeError
261
+ # tail is load-bearing — any new FromItem subclass will fail loudly
262
+ # at runtime rather than silently rendering as nothing.
263
+ if isinstance(f, TableRef):
264
+ return f"{quote_ident(f.table)} AS {quote_ident(f.alias)}"
265
+ if isinstance(f, JoinExpr):
266
+ return _print_join(f)
267
+ if isinstance(f, DerivedTable):
268
+ return _print_derived_table(f)
269
+ if isinstance(f, CteRef):
270
+ return f"{quote_ident(f.cte_name)} AS {quote_ident(f.alias)}"
271
+ raise TypeError(f"Unknown FromItem: {type(f).__name__}")
272
+
273
+
274
+ def _print_cte_def(c: CteDef) -> str:
275
+ """`name [(col1, col2, ...)] AS [MATERIALIZED|NOT MATERIALIZED] (SELECT ...)`.
276
+
277
+ The body is rendered inline (newlines collapsed to spaces) so a
278
+ multi-CTE WITH stays on its own line in the outer formatting.
279
+ For recursive CTEs (milestone 8), the body is a SetOp (base
280
+ UNION recursive); the inline-rendering helper handles either
281
+ Select or SetOp via its dispatch on type.
282
+
283
+ The MATERIALIZED keyword (PG 12+) goes between AS and the
284
+ parenthesized body. None means no modifier — let PG choose.
285
+ """
286
+ name = quote_ident(c.name)
287
+ cols = ""
288
+ if c.column_aliases:
289
+ cols = "(" + ", ".join(quote_ident(a) for a in c.column_aliases) + ")"
290
+ if c.materialized is True:
291
+ modifier = "MATERIALIZED "
292
+ elif c.materialized is False:
293
+ modifier = "NOT MATERIALIZED "
294
+ else:
295
+ modifier = ""
296
+ body = _print_query_body_inline(c.select)
297
+ out = f"{name}{cols} AS {modifier}({body})"
298
+ if c.search is not None:
299
+ sw = c.search
300
+ order_kw = "BREADTH FIRST" if sw.breadth_first else "DEPTH FIRST"
301
+ by_cols = ", ".join(quote_ident(b) for b in sw.by_columns)
302
+ out += (
303
+ f" SEARCH {order_kw} BY {by_cols} "
304
+ f"SET {quote_ident(sw.set_column)}"
305
+ )
306
+ if c.cycle is not None:
307
+ cy = c.cycle
308
+ cycle_cols = ", ".join(quote_ident(b) for b in cy.columns)
309
+ out += (
310
+ f" CYCLE {cycle_cols} "
311
+ f"SET {quote_ident(cy.cycle_mark_column)} "
312
+ f"USING {quote_ident(cy.path_column)}"
313
+ )
314
+ return out
315
+
316
+
317
+ def _print_query_body_inline(body: Select | SetOp) -> str:
318
+ """Render a query body (Select OR SetOp) as a single line for
319
+ embedding inside a CteDef body, scalar subquery, derived table,
320
+ etc. Same single-line invariant as _print_select_inline; the
321
+ only difference is dispatch on body type. Both branches build
322
+ the inline form by passing sep=" " to the renderer rather than
323
+ rendering with "\\n" and replacing — the replace approach would
324
+ corrupt any embedded literal that contained a newline."""
325
+ if isinstance(body, SetOp):
326
+ return _print_set_op(body, sep=" ")
327
+ return _print_select_inline(body)
328
+
329
+
330
+ def _print_derived_table(d: DerivedTable) -> str:
331
+ """`[LATERAL ](SELECT ...) AS alias[(col1, col2, ...)]`.
332
+
333
+ LATERAL is a prefix modifier — PG's grammar parses it before the
334
+ parens, not after the alias. The body is rendered inline (same
335
+ helper milestone 3 used for expression-position subqueries) to
336
+ keep the FROM clause readable.
337
+ """
338
+ prefix = "LATERAL " if d.lateral else ""
339
+ body = _print_select_inline(d.select)
340
+ cols = ""
341
+ if d.column_aliases:
342
+ cols = "(" + ", ".join(quote_ident(c) for c in d.column_aliases) + ")"
343
+ return f"{prefix}({body}) AS {quote_ident(d.alias)}{cols}"
344
+
345
+
346
+ def _print_join(j: JoinExpr) -> str:
347
+ left = _print_from_item(j.left)
348
+ right = _print_from_item(j.right)
349
+
350
+ # Nested JoinExprs on either side need parens to keep associativity
351
+ # explicit. PG's syntax accepts the parens, and this avoids any
352
+ # subtle disagreement between our left-deep build order and the
353
+ # parser's reduction order.
354
+ if isinstance(j.left, JoinExpr):
355
+ left = f"({left})"
356
+ if isinstance(j.right, JoinExpr):
357
+ right = f"({right})"
358
+
359
+ if j.kind == "CROSS":
360
+ return f"{left} CROSS JOIN {right}"
361
+
362
+ head = f"{left} {j.kind} JOIN {right}"
363
+ if j.on is not None:
364
+ return f"{head} ON {_print_expr(j.on)}"
365
+ if j.using:
366
+ cols = ", ".join(quote_ident(c) for c in j.using)
367
+ return f"{head} USING ({cols})"
368
+ # Non-CROSS join with neither ON nor USING is malformed; surface it
369
+ # rather than silently emitting un-parseable SQL.
370
+ raise ValueError(f"{j.kind} JOIN requires ON or USING")
371
+
372
+
373
+ # ===========================================================================
374
+ # Expression rendering
375
+ # ===========================================================================
376
+
377
+ def _print_expr(e: Expr) -> str:
378
+ # Central dispatch for every Expr subclass. Order is by frequency
379
+ # — column refs and literals dominate generated output, so they
380
+ # short-circuit first. Adding a new Expr type means adding a clause
381
+ # here; the trailing TypeError catches the "forgot to register"
382
+ # case at the first round-trip test.
383
+ if isinstance(e, ColumnRef):
384
+ return f"{quote_ident(e.table_alias)}.{quote_ident(e.column)}"
385
+ if isinstance(e, Literal):
386
+ return _render_literal(e.value, e.pg_type)
387
+ if isinstance(e, FuncCall):
388
+ return _print_func_call(e)
389
+ if isinstance(e, BinaryOp):
390
+ return _print_binary(e)
391
+ if isinstance(e, UnaryOp):
392
+ return _print_unary(e)
393
+ if isinstance(e, Cast):
394
+ return _print_cast(e)
395
+ if isinstance(e, Subquery):
396
+ return _print_subquery(e)
397
+ if isinstance(e, Exists):
398
+ return _print_exists(e)
399
+ if isinstance(e, InSubquery):
400
+ return _print_in_subquery(e)
401
+ raise TypeError(f"Unknown Expr: {type(e).__name__}")
402
+
403
+
404
+ def _print_func_call(f: FuncCall) -> str:
405
+ if f.star:
406
+ # `name(*)` special form. In practice only `count(*)` is
407
+ # valid PG, but the printer doesn't enforce that — the AST
408
+ # post-init guarantees args is empty when star is set, and
409
+ # the generator only emits star for count. OVER and FILTER
410
+ # pass through the same way they do for arg-bearing calls
411
+ # (`count(*) OVER (...)`, `count(*) FILTER (WHERE ...)` are
412
+ # both canonical SQL).
413
+ base = f"{f.name}(*)"
414
+ elif not f.args and f.name in _BARE_KEYWORD_FUNCS and f.filter_ is None:
415
+ # PostgreSQL parses these without parens. Adding parens is a
416
+ # syntax error.
417
+ # (Window functions and FILTER never appear with bare-keyword
418
+ # nullary functions — current_date doesn't take either — so
419
+ # the filter_-is-None guard is defensive against malformed
420
+ # AST construction rather than a real generator output path.)
421
+ return f.name
422
+ else:
423
+ args = ", ".join(_print_expr(a) for a in f.args)
424
+ base = f"{f.name}({args})"
425
+ # PG grammar: `func(args) [WITHIN GROUP (ORDER BY ...)]
426
+ # [FILTER (WHERE ...)] [OVER (...)]`. WITHIN GROUP comes first
427
+ # of the trailing clauses (used by ordered-set aggregates like
428
+ # percentile_cont). Then FILTER, then OVER.
429
+ if f.within_group:
430
+ wg = ", ".join(_print_order_by(o) for o in f.within_group)
431
+ base = f"{base} WITHIN GROUP (ORDER BY {wg})"
432
+ if f.filter_ is not None:
433
+ base = f"{base} FILTER (WHERE {_print_expr(f.filter_)})"
434
+ if f.over is not None:
435
+ # Window-style call. Two forms:
436
+ # * Inline WindowSpec → `OVER (PARTITION BY ... ORDER BY ...)`
437
+ # * Named-window WindowRef → `OVER name` (no parens — PG
438
+ # grammar distinguishes these by the parens presence)
439
+ if isinstance(f.over, WindowRef):
440
+ return f"{base} OVER {quote_ident(f.over.name)}"
441
+ return f"{base} OVER ({_print_window_spec(f.over)})"
442
+ return base
443
+
444
+
445
+ def _print_window_spec(w: WindowSpec) -> str:
446
+ """Render the body of an OVER clause: PARTITION BY exprs, then
447
+ ORDER BY items, then frame (if any). All sections optional —
448
+ `OVER ()` (empty body) is valid PG (entire result set as one
449
+ partition)."""
450
+ parts: list[str] = []
451
+ if w.partition_by:
452
+ parts.append("PARTITION BY " + ", ".join(
453
+ _print_expr(e) for e in w.partition_by
454
+ ))
455
+ if w.order_by:
456
+ parts.append("ORDER BY " + ", ".join(
457
+ _print_order_by(o) for o in w.order_by
458
+ ))
459
+ if w.frame is not None:
460
+ parts.append(_print_frame_clause(w.frame))
461
+ return " ".join(parts)
462
+
463
+
464
+ def _print_frame_bound(b: FrameBound) -> str:
465
+ """Render one bound: UNBOUNDED PRECEDING / N PRECEDING /
466
+ CURRENT ROW / N FOLLOWING / UNBOUNDED FOLLOWING. The kind→text
467
+ mapping is a small fixed table; defining it inline here keeps
468
+ the bound rendering self-contained."""
469
+ if b.kind == "unbounded_preceding":
470
+ return "UNBOUNDED PRECEDING"
471
+ if b.kind == "current_row":
472
+ return "CURRENT ROW"
473
+ if b.kind == "unbounded_following":
474
+ return "UNBOUNDED FOLLOWING"
475
+ # preceding/following take an offset expression. AST post-init
476
+ # guarantees b.offset is non-None for these kinds.
477
+ assert b.offset is not None # for type-checkers; enforced by AST
478
+ direction = "PRECEDING" if b.kind == "preceding" else "FOLLOWING"
479
+ return f"{_print_expr(b.offset)} {direction}"
480
+
481
+
482
+ def _print_frame_clause(fc: FrameClause) -> str:
483
+ """Render a window frame clause. Single-bound form when end is
484
+ None: `unit start` (PG implicitly treats as `unit BETWEEN start
485
+ AND CURRENT ROW`). BETWEEN form when end is set:
486
+ `unit BETWEEN start AND end`. Optional EXCLUDE clause appended
487
+ as `EXCLUDE <body>` where body is one of CURRENT ROW / GROUP /
488
+ TIES / NO OTHERS."""
489
+ if fc.end is None:
490
+ body = f"{fc.unit} {_print_frame_bound(fc.start)}"
491
+ else:
492
+ body = (
493
+ f"{fc.unit} BETWEEN "
494
+ f"{_print_frame_bound(fc.start)} AND "
495
+ f"{_print_frame_bound(fc.end)}"
496
+ )
497
+ if fc.exclude is not None:
498
+ body = f"{body} EXCLUDE {fc.exclude}"
499
+ return body
500
+
501
+
502
+ def _print_grouping_set(gs: GroupingSet) -> str:
503
+ """Render a GROUP BY grouping-set construct: ROLLUP, CUBE, or
504
+ GROUPING SETS.
505
+
506
+ Element rendering depends on the kind:
507
+
508
+ * ROLLUP/CUBE: single-expr elements render bare
509
+ (`ROLLUP (a, b, c)`); multi-expr elements get parens
510
+ (`ROLLUP ((a, b), c)`). PG accepts either, but the bare
511
+ form is the conventional one for the common single-col case.
512
+
513
+ * GROUPING SETS: every element gets parens, including the empty
514
+ tuple → `()` (the grand-total grouping). Even single-expr
515
+ elements paren'd: `GROUPING SETS ((a), (b), ())`.
516
+ """
517
+ items: list[str] = []
518
+ for elem in gs.elements:
519
+ if not elem:
520
+ # Empty tuple — the grand-total grouping. Only meaningful
521
+ # in GROUPING SETS, but harmless if it appears in
522
+ # ROLLUP/CUBE (PG accepts).
523
+ items.append("()")
524
+ elif gs.kind == "GROUPING SETS" or len(elem) > 1:
525
+ inner = ", ".join(_print_expr(e) for e in elem)
526
+ items.append(f"({inner})")
527
+ else:
528
+ # Single-expr element in ROLLUP/CUBE — bare.
529
+ items.append(_print_expr(elem[0]))
530
+ return f"{gs.kind} ({', '.join(items)})"
531
+
532
+
533
+ def _print_binary(b: BinaryOp) -> str:
534
+ # Both operands go through _wrap_if_compound. This is the
535
+ # conservative-parens policy in action: we don't consult a
536
+ # precedence table, we just always wrap nested compound
537
+ # expressions. The round-trip tests confirm the resulting SQL
538
+ # re-parses to the same AST shape.
539
+ left = _wrap_if_compound(b.left)
540
+ right = _wrap_if_compound(b.right)
541
+ # Spaces around the symbol always: word-form ops (AND, OR, LIKE,
542
+ # ILIKE) need them for tokenization; symbolic ops (+, -, *, ||)
543
+ # don't strictly need them but read better.
544
+ return f"{left} {b.symbol} {right}"
545
+
546
+
547
+ def _print_unary(u: UnaryOp) -> str:
548
+ operand = _wrap_if_compound(u.operand)
549
+ # `sep` controls whether to insert a space between the operator
550
+ # symbol and the operand: symbolic ops bind tightly to their
551
+ # operand (`-x`, `+x`), while word-form ops need a separator or
552
+ # the lexer would merge them into one identifier (`NOTx` would
553
+ # parse as a single name, not as NOT applied to x). Compound
554
+ # operands are already parenthesized by `_wrap_if_compound`
555
+ # above; `sep` is purely about the symbol↔operand boundary.
556
+ sep = "" if u.symbol in ("-", "+") else " "
557
+ return f"{u.symbol}{sep}{operand}"
558
+
559
+
560
+ def _print_cast(c: Cast) -> str:
561
+ inner = _wrap_if_compound(c.expr)
562
+ return f"{inner}::{c.target_type.sql()}"
563
+
564
+
565
+ def _print_subquery(s: Subquery) -> str:
566
+ """Scalar subquery: `(SELECT ... FROM ...)`. Inner Select is
567
+ rendered inline (newlines collapsed to spaces) to keep embedded
568
+ SQL on one line. Outer parens are intrinsic — no precedence
569
+ interaction with surrounding ops."""
570
+ return f"({_print_select_inline(s.select)})"
571
+
572
+
573
+ def _print_exists(e: Exists) -> str:
574
+ """`[NOT ]EXISTS (SELECT ...)`. NOT is rendered as a separate
575
+ keyword; PG parses `NOT EXISTS` as `NOT (EXISTS ...)` either way."""
576
+ prefix = "NOT EXISTS" if e.negated else "EXISTS"
577
+ return f"{prefix} ({_print_select_inline(e.select)})"
578
+
579
+
580
+ def _print_in_subquery(i: InSubquery) -> str:
581
+ """`<expr> [NOT ]IN (SELECT col FROM ...)`. The left expression
582
+ gets the same compound-wrapping treatment as a binary-op operand,
583
+ keeping `(a + b) IN (...)` correctly grouped."""
584
+ left = _wrap_if_compound(i.expr)
585
+ op = "NOT IN" if i.negated else "IN"
586
+ return f"{left} {op} ({_print_select_inline(i.select)})"
587
+
588
+
589
+ def _print_select_inline(s: Select) -> str:
590
+ """Render a Select as a single line, suitable for embedding inside
591
+ expression position (subqueries).
592
+
593
+ Implementation note: passes sep=" " into _print_select so the
594
+ clause join character itself is the space. Earlier versions did
595
+ `_print_select(s).replace("\\n", " ")`, which would have corrupted
596
+ any embedded TEXT literal that contained a newline. The literal
597
+ generator never produces such strings today, so the bug was
598
+ latent — the parameterized sep approach prevents it without
599
+ relying on the literal pool's restraint."""
600
+ return _print_select(s, sep=" ")
601
+
602
+
603
+ def _wrap_if_compound(e: Expr) -> str:
604
+ """Return the rendered expression, wrapped in parens if it's a
605
+ compound (binary, unary, or cast).
606
+
607
+ Conservative: always wrap. The verbosity is preferable to a subtle
608
+ precedence bug. The pglast round-trip tests are the safety net that
609
+ keeps us honest.
610
+ """
611
+ s = _print_expr(e)
612
+ if isinstance(e, (BinaryOp, UnaryOp, Cast)):
613
+ return f"({s})"
614
+ return s
615
+
616
+
617
+ # ===========================================================================
618
+ # Literal rendering
619
+ # ===========================================================================
620
+
621
+ def _render_literal(value: int | float | str | bool | None, t: PgType) -> str:
622
+ """Render a Python value as a typed SQL literal.
623
+
624
+ NULL always renders with an explicit cast (`NULL::int4`). Bare
625
+ untyped NULL has occasionally-surprising behavior during PG's
626
+ function-overload resolution, so the cast keeps the query
627
+ well-typed regardless of where it appears.
628
+ """
629
+ if value is None:
630
+ return f"NULL::{t.sql()}"
631
+
632
+ if t == BOOL:
633
+ # PostgreSQL accepts `true`/`false` as boolean constants; use
634
+ # them in lowercase to match the rest of our keyword style.
635
+ return "true" if value else "false"
636
+
637
+ if t in (INT4, INT8):
638
+ # Integer literals print bare. We never generate negatives at
639
+ # the literal level — the operator generator can produce them
640
+ # via `0 - x` or `UnaryOp(-, ...)` if needed.
641
+ return str(int(value))
642
+
643
+ if t in (NUMERIC, FLOAT8):
644
+ # Force a decimal point so PG's lexer treats it as a numeric
645
+ # literal, not an integer that happens to fit. `repr(2.0)` →
646
+ # `'2.0'`, but `str(2.0)` → `'2.0'` too — they agree on floats.
647
+ s = repr(float(value))
648
+ return s if "." in s or "e" in s or "E" in s else s + ".0"
649
+
650
+ # Catch-all for string-quoted typed literals: text/varchar (always
651
+ # cast — see below), date/time/uuid/jsonb (need the cast for PG's
652
+ # parser to accept the string body as the right type), and any
653
+ # future scalar that doesn't have its own bare-form rendering above.
654
+ #
655
+ # Why text/varchar always carry the cast too: bare text literals
656
+ # are typed 'unknown' by PG until inferred from context; in
657
+ # polymorphic contexts (jsonb_build_object's VARIADIC "any",
658
+ # coalesce of all-bare-strings, etc.) PG can't infer them and
659
+ # errors with 42804 "could not determine polymorphic type because
660
+ # input has type unknown." The explicit cast pre-empts the
661
+ # inference entirely. Cost: noisier output. Benefit: closes a
662
+ # whole PARSE-tier leak class. (Track A, commit 731015a.)
663
+ #
664
+ # The caller is responsible for using a string body PostgreSQL
665
+ # can parse for the target type (e.g. ISO 8601 for dates).
666
+ return f"{_quote_string_literal(str(value))}::{t.sql()}"
667
+
668
+
669
+ def _quote_string_literal(s: str) -> str:
670
+ """Wrap `s` as a PostgreSQL standard-conforming string literal.
671
+
672
+ Doubles embedded single quotes; assumes no other escaping is
673
+ needed because our literal generator pulls from a curated word
674
+ list with no backslashes or non-printable characters. This
675
+ assumption is documented at the call site in the literal
676
+ generator; if it's ever violated, the safest fix is to switch to
677
+ PG's E-string form (`E'...'`) and add `\\` escaping.
678
+
679
+ Standard-conforming (single-quote-doubled) rather than E-string
680
+ is chosen so output is portable across servers regardless of
681
+ `standard_conforming_strings` GUC setting — that GUC has been on
682
+ by default since PG 9.1 but explicit single-quote-doubling works
683
+ universally without depending on it.
684
+ """
685
+ return "'" + s.replace("'", "''") + "'"
686
+
687
+
688
+ __all__ = ["print_query", "print_expr"]
waxsql/py.typed ADDED
File without changes