waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waxsql/gen/setop.py ADDED
@@ -0,0 +1,259 @@
1
+ """Set-operation generator (UNION / INTERSECT / EXCEPT).
2
+
3
+ Role: combines N SELECTs into one statement. The hard constraint
4
+ that gives the file its shape: both (all) sides of a set-op must
5
+ produce the SAME column types in the SAME order. PG enforces this
6
+ positionally — column #i of every arm must implicitly-cast to a
7
+ common type with column #i of every other arm. The generator
8
+ sidesteps the cross-arm cast negotiation entirely by FIXING arm 1's
9
+ exact target types and reusing them as the spec for arms 2..N (see
10
+ `target_types` below). Arms 2..N don't get gen_select's full freedom;
11
+ they call _gen_arm_select with a pinned type list.
12
+
13
+ One public entry point: `gen_setop(ctx)` builds a SetOp wrapping
14
+ N Selects with matching column counts and types.
15
+
16
+ Two phases:
17
+
18
+ 1. **Arm 1**: a full `gen_select(arm1_ctx)` in its own child scope.
19
+ Could be aggregate, could have any target shape. We extract its
20
+ target types after generation, then strip its with_ctes /
21
+ order_by / limit / offset (those belong to the SetOp wrapper,
22
+ not the arm).
23
+
24
+ 2. **Arms 2+**: simpler Selects that match arm 1's target types
25
+ position-by-position. Built via `_gen_arm_select` which produces
26
+ a non-aggregate SELECT with FROM, optional WHERE, and one
27
+ SelectTarget per requested type (generated via gen_expr against
28
+ the arm's local scope).
29
+
30
+ Each arm gets its own scope via `descend_subquery(correlated=False)`
31
+ so arm-local FROM aliases don't collide with the surrounding query
32
+ or with other arms. The shared `alias_counter` (and `cte_counter`)
33
+ ensures unique naming across the whole query.
34
+
35
+ After all arms are generated, the SetOp wrapper optionally gets
36
+ its own ORDER BY (positional reference: `ORDER BY 1`) and LIMIT —
37
+ both PG-valid only at the combined-statement level for milestone 7
38
+ (per-arm ORDER BY/LIMIT requires explicit parens).
39
+ """
40
+ from __future__ import annotations
41
+
42
+ from dataclasses import replace
43
+ from typing import Optional
44
+
45
+ from ..ast import (
46
+ Expr, Literal, OrderByItem, Select, SelectTarget, SetOp,
47
+ )
48
+ from ..config import (
49
+ FEATURE_LIMIT, FEATURE_ORDER_BY, FEATURE_WHERE,
50
+ )
51
+ from ..context import GenContext
52
+ from ..types import BOOL, INT4, PgType
53
+ from .expr import gen_expr
54
+
55
+
56
+ _LIMIT_VALUES: tuple[int, ...] = (1, 5, 10, 25, 50, 100)
57
+
58
+
59
+ def gen_setop(ctx: GenContext) -> SetOp:
60
+ """Generate a SetOp combining N arms with the chosen operator.
61
+
62
+ Arm 1 is a full gen_select; subsequent arms match arm 1's
63
+ target types via _gen_arm_select OR (with probability
64
+ `cfg.p_nested_set_op_arm`) by recursively generating another
65
+ SetOp whose own first arm matches the parent's target types.
66
+
67
+ Each arm has its own child scope; the shared alias_counter
68
+ prevents cross-arm alias collision. Nested SetOps consume
69
+ `subquery_depth_remaining` via the per-arm descend_subquery,
70
+ so unbounded nesting is impossible.
71
+
72
+ The SetOp wrapper's ORDER BY/LIMIT (if generated) reference
73
+ output columns positionally (`ORDER BY 1`) — the only form
74
+ that works without naming the unified output columns
75
+ explicitly.
76
+ """
77
+ # Lazy import — gen_select / _gen_arm_select live in gen/select.py,
78
+ # which currently doesn't import gen/setop.py but might in future.
79
+ from .select import gen_select
80
+
81
+ cfg = ctx.config
82
+ rng = ctx.rng
83
+
84
+ n_arms = rng.randint(2, cfg.max_set_op_arms)
85
+ op = rng.choice(("UNION", "INTERSECT", "EXCEPT"))
86
+ all_ = rng.random() < cfg.p_set_op_all
87
+
88
+ # Arm 1: full SELECT in its own child scope. MUST be generated
89
+ # before arms 2+ — its target types drive everything downstream
90
+ # (positional set-op compatibility means every later arm's
91
+ # targets are typed-matched to arm 1's, not the other way).
92
+ arm1_ctx = ctx.descend_subquery(correlated=False)
93
+ arm1 = _strip_arm_clauses(gen_select(arm1_ctx))
94
+ # `target_types` is the positional type signature every later arm
95
+ # must match. We extract it from arm 1 AFTER stripping the
96
+ # combined-statement clauses but BEFORE iterating arms 2..N —
97
+ # gen_select's own type choices become the canonical spec for
98
+ # the whole set-op.
99
+ target_types: list[PgType] = [t.expr.pg_type for t in arm1.targets]
100
+
101
+ # `arms` may hold either Selects or nested SetOps (Track B #5).
102
+ # The explicit union annotation keeps mypy honest about the
103
+ # mixed-element list — without it, the inferred type from the
104
+ # initial `[arm1]` (Select-only) would reject the SetOp append.
105
+ arms: list[Select | SetOp] = [arm1]
106
+ for _ in range(n_arms - 1):
107
+ arm_ctx = ctx.descend_subquery(correlated=False)
108
+ # Nested-SetOp candidate gating:
109
+ # * Need budget — descending into a nested SetOp consumes
110
+ # another subquery_depth in turn for its own arms.
111
+ # * Probability gate. Kept moderate-low; deeply nested
112
+ # setops aren't realistic SQL and produce huge outputs.
113
+ arm: Select | SetOp
114
+ if (not arm_ctx.at_subquery_leaf()
115
+ and rng.random() < cfg.p_nested_set_op_arm):
116
+ arm = _gen_nested_setop_arm(arm_ctx, target_types)
117
+ else:
118
+ arm = _gen_arm_select(arm_ctx, target_types)
119
+ arms.append(arm)
120
+
121
+ # SetOp-level ORDER BY (positional) and LIMIT.
122
+ order_by: tuple[OrderByItem, ...] = ()
123
+ if (FEATURE_ORDER_BY in cfg.feature_flags
124
+ and rng.random() < cfg.p_order_by):
125
+ # Positional reference: `ORDER BY <n> ASC|DESC`. PG interprets
126
+ # bare integer literals in ORDER BY as 1-based output-column
127
+ # references — works regardless of what the unified output
128
+ # columns are named.
129
+ pos = rng.randint(1, len(target_types))
130
+ direction = rng.choice(("ASC", "DESC"))
131
+ order_by = (OrderByItem(
132
+ expr=Literal(INT4, pos),
133
+ direction=direction,
134
+ ),)
135
+
136
+ limit: Optional[Expr] = None
137
+ if (FEATURE_LIMIT in cfg.feature_flags
138
+ and rng.random() < cfg.p_limit):
139
+ limit = Literal(INT4, rng.choice(_LIMIT_VALUES))
140
+
141
+ return SetOp(
142
+ op=op,
143
+ all=all_,
144
+ arms=tuple(arms),
145
+ order_by=order_by,
146
+ limit=limit,
147
+ )
148
+
149
+
150
+ # ===========================================================================
151
+ # Internals
152
+ # ===========================================================================
153
+
154
+ def _gen_nested_setop_arm(
155
+ ctx: GenContext,
156
+ target_types: list[PgType],
157
+ ) -> SetOp:
158
+ """Generate a nested SetOp suitable for use as an arm of an
159
+ outer SetOp.
160
+
161
+ All arms of the nested SetOp must produce the same target_types
162
+ as the outer's arm 1 (set-op type compatibility is positional).
163
+ The nested SetOp also doesn't get its own ORDER BY/LIMIT —
164
+ those would be ambiguous with the outer's, and PG requires
165
+ explicit per-arm parens for them anyway. The wrapping parens
166
+ happen in the printer based on AST structure.
167
+
168
+ Currently always 2-armed (no further nesting). Could recurse to
169
+ arbitrary depth via subquery_depth_remaining, but two-deep
170
+ nesting (`A UNION (B INTERSECT C)`) is the realistic case;
171
+ deeper chains produce mostly-unreadable output.
172
+ """
173
+ rng = ctx.rng
174
+ op = rng.choice(("UNION", "INTERSECT", "EXCEPT"))
175
+ all_ = rng.random() < ctx.config.p_set_op_all
176
+
177
+ # All arms of the nested SetOp use _gen_arm_select with the
178
+ # given target types, ensuring positional type alignment with
179
+ # the OUTER setop's arm 1.
180
+ n_arms = 2
181
+ inner_arms: list[Select] = []
182
+ for _ in range(n_arms):
183
+ sub_ctx = ctx.descend_subquery(correlated=False)
184
+ inner_arms.append(_gen_arm_select(sub_ctx, target_types))
185
+
186
+ # No order_by / limit / offset — those belong only to the
187
+ # outermost SetOp wrapper.
188
+ return SetOp(op=op, all=all_, arms=tuple(inner_arms))
189
+
190
+
191
+ def _strip_arm_clauses(s: Select) -> Select:
192
+ """Remove clauses that belong to the SetOp wrapper, not individual
193
+ arms — with_ctes, order_by, limit, offset.
194
+
195
+ PG's grammar requires per-arm ORDER BY/LIMIT/OFFSET to be wrapped
196
+ in parens. Stripping is simpler than parenthesizing, and the
197
+ SetOp wrapper carries equivalents at the combined-statement level."""
198
+ return replace(
199
+ s,
200
+ with_ctes=(),
201
+ order_by=(),
202
+ limit=None,
203
+ offset=None,
204
+ )
205
+
206
+
207
+ # CONTRACT: every Select returned here has exactly len(target_types)
208
+ # targets, in order, with each target's pg_type implicitly-castable
209
+ # from target_types[i]. This is what the outer SetOp wrapper relies
210
+ # on for cross-arm compatibility. Violating it produces a PG error
211
+ # like "each UNION query must have the same number of columns" or
212
+ # "UNION types int and text cannot be matched".
213
+ def _gen_arm_select(
214
+ ctx: GenContext,
215
+ target_types: list[PgType],
216
+ ) -> Select:
217
+ """Build a Select for an arms-2+ SetOp arm: FROM clause, target
218
+ list of pre-determined types, optional WHERE.
219
+
220
+ Bypasses gen_select's aggregate-vs-non-aggregate dispatch: arm
221
+ Selects in milestone 7 are always non-aggregate (matching aggregate
222
+ target types from arm 1 across arms is handled by the type
223
+ machinery, not the GROUP BY logic). Aggregate-style arms can come
224
+ in a follow-up.
225
+ """
226
+ # Lazy imports for the same cycle-breaking reason as gen/cte.py
227
+ # — gen/select.py imports from gen/setop.py at the top level, so
228
+ # importing _gen_from_clause at module top would close the loop.
229
+ from .select import _gen_from_clause
230
+
231
+ cfg = ctx.config
232
+ rng = ctx.rng
233
+ flags = cfg.feature_flags
234
+
235
+ # FROM clause — populates arm-local scope.
236
+ from_ = _gen_from_clause(ctx)
237
+
238
+ # Target list: one SelectTarget per requested type, generated
239
+ # against the arm's scope. allow_aggregates=False because this
240
+ # path is non-aggregate.
241
+ expr_ctx = replace(ctx, allow_aggregates=False)
242
+ targets = tuple(
243
+ SelectTarget(expr=gen_expr(expr_ctx, t))
244
+ for t in target_types
245
+ )
246
+
247
+ # Optional WHERE.
248
+ where: Optional[Expr] = None
249
+ if FEATURE_WHERE in flags and rng.random() < cfg.p_where:
250
+ where = gen_expr(expr_ctx, BOOL)
251
+
252
+ return Select(
253
+ targets=targets,
254
+ from_=from_,
255
+ where=where,
256
+ )
257
+
258
+
259
+ __all__ = ["gen_setop"]
waxsql/gen/subquery.py ADDED
@@ -0,0 +1,397 @@
1
+ """Subquery generators.
2
+
3
+ Role: covers the four SQL surfaces where a SELECT appears as a value
4
+ or table source inside another query. Each public entry point hands
5
+ back an AST node that gen_expr or gen_select can splice directly into
6
+ the surrounding tree.
7
+
8
+ SCOPE HANDLING is the subtle bit. Correlated subqueries need the
9
+ outer scope visible while the inner is being built (so inner
10
+ expressions can pick outer columns); derived tables produce a fresh
11
+ scope whose *outer* names are projected back as `alias.cN` and whose
12
+ *internals* are NOT visible to siblings. `descend_subquery` on the
13
+ context is the chokepoint that decides which: passing
14
+ correlated=True keeps the outer chain reachable from the child scope;
15
+ correlated=False severs it.
16
+
17
+ Four public entry points. The first three return a complete `Expr`
18
+ for use in `gen/expr.py`'s candidate dispatch; the fourth returns a
19
+ FromItem for `gen/select.py`'s FROM-clause builder:
20
+
21
+ * `gen_scalar_subquery(ctx, target_type, *, correlated)` — returns
22
+ a `Subquery(target_type, inner)`. Inner has a single target of
23
+ `target_type` and `LIMIT 1` for runtime safety.
24
+
25
+ * `gen_exists_subquery(ctx, *, correlated)` — returns
26
+ `Exists(BOOL, inner, negated)`. Inner has the canonical
27
+ `SELECT 1 FROM ...` shape.
28
+
29
+ * `gen_in_subquery(ctx, *, correlated)` — returns
30
+ `InSubquery(BOOL, lhs, inner, negated)`. The LHS is generated
31
+ BEFORE descending (so it references the outer scope, not the
32
+ inner FROM tables); the inner produces a single column of the
33
+ LHS's type.
34
+
35
+ * `gen_derived_table(ctx, alias, *, lateral)` — returns a
36
+ `DerivedTable` FromItem (`[LATERAL ](SELECT ...) AS alias`).
37
+ Inner has 1..N targets aliased c1..cN; with `lateral=True`,
38
+ the same forced-correlation predicate gets injected so LATERAL
39
+ actually exercises its capability.
40
+
41
+ All four share `_build_subquery_select`, which handles the recurring
42
+ pattern: descend, build FROM, build target(s), optionally WHERE,
43
+ force correlation if requested.
44
+
45
+ The "force correlation" path is what gives the `correlated=True`
46
+ flag teeth — without it, the inner WHERE might happen to pick an
47
+ outer column or might not, and the test suite's
48
+ "correlated-references-outer" invariant would be flaky. With it,
49
+ every correlated subquery has at least one outer-column reference
50
+ in its WHERE.
51
+ """
52
+ from __future__ import annotations
53
+
54
+ from dataclasses import replace
55
+ from typing import Optional
56
+
57
+ from ..ast import (
58
+ BinaryOp, ColumnRef, DerivedTable, Exists, Expr, InSubquery, Literal,
59
+ Select, SelectTarget, Subquery,
60
+ )
61
+ from ..config import FEATURE_WHERE
62
+ from ..context import GenContext
63
+ from ..scope import Scope
64
+ from ..types import BOOL, INT4, INT8, NUMERIC, PgType, TEXT, TIMESTAMPTZ
65
+ from .expr import gen_expr, gen_literal
66
+
67
+
68
+ # Probability of negating an EXISTS / IN subquery. Kept low: most
69
+ # real SQL uses the affirmative form; NOT EXISTS / NOT IN appear
70
+ # as anti-joins or set differences but less commonly.
71
+ _P_NEGATE_EXISTS = 0.3
72
+ _P_NEGATE_IN = 0.3
73
+
74
+ # Type pool for the IN subquery's comparison. Chosen from types where
75
+ # `=` is in the catalog so the inner SELECT's column type aligns with
76
+ # what the outer LHS expression can produce. Excludes JSONB / arrays
77
+ # since the catalog has no `=` op for those.
78
+ _IN_COMPARISON_TYPES: tuple[PgType, ...] = (
79
+ INT4, INT8, NUMERIC, TEXT, TIMESTAMPTZ,
80
+ )
81
+
82
+
83
+ # ===========================================================================
84
+ # Public entry points
85
+ # ===========================================================================
86
+
87
+ def gen_scalar_subquery(
88
+ ctx: GenContext,
89
+ target_type: PgType,
90
+ *,
91
+ correlated: bool,
92
+ ) -> Subquery:
93
+ """Generate a scalar subquery `(SELECT col FROM ...)` of
94
+ `target_type`. The inner SELECT has exactly one target, and
95
+ `LIMIT 1` to be runtime-safe even though we don't actually run
96
+ the queries (PG accepts multi-row scalar subqueries at parse
97
+ time but errors at runtime if more than one row comes back)."""
98
+ inner = _build_subquery_select(
99
+ ctx,
100
+ correlated=correlated,
101
+ target_type=target_type,
102
+ with_limit_1=True,
103
+ )
104
+ return Subquery(target_type, inner)
105
+
106
+
107
+ def gen_exists_subquery(
108
+ ctx: GenContext,
109
+ *,
110
+ correlated: bool,
111
+ ) -> Exists:
112
+ """Generate `[NOT ]EXISTS (SELECT 1 FROM ...)`. The constant `1`
113
+ is the canonical EXISTS body — PG ignores the SELECT list at
114
+ runtime, so we don't waste generator effort building elaborate
115
+ expressions there."""
116
+ inner = _build_subquery_select(
117
+ ctx,
118
+ correlated=correlated,
119
+ target_expr=Literal(INT4, 1),
120
+ with_limit_1=False,
121
+ )
122
+ negated = ctx.rng.random() < _P_NEGATE_EXISTS
123
+ return Exists(BOOL, inner, negated=negated)
124
+
125
+
126
+ def gen_derived_table(
127
+ ctx: GenContext,
128
+ alias: str,
129
+ *,
130
+ lateral: bool,
131
+ ) -> DerivedTable:
132
+ """Generate a derived-table FromItem `[LATERAL ](SELECT ...) AS alias`.
133
+
134
+ The inner SELECT has 1..N targets, each with explicit `cN` alias
135
+ so the outer query can reference `<alias>.cN` deterministically
136
+ regardless of what expressions the targets carry. Column count
137
+ drawn from the config knob `max_derived_table_columns` (capped
138
+ by determinism — same draw on same RNG state always picks the
139
+ same count). The single-column case remains the most common
140
+ output by virtue of the small max.
141
+
142
+ With `lateral=True`, the inner SELECT can reference the outer
143
+ scope's preceding-sibling aliases. The forced-correlation
144
+ predicate inside `_build_subquery_select` (already used by
145
+ correlated scalar/EXISTS/IN subqueries in milestone 3) guarantees
146
+ at least one outer-column reference appears in the inner WHERE
147
+ — turning "LATERAL capability" into "LATERAL actually exercises
148
+ the capability."
149
+
150
+ No LIMIT 1: derived tables are virtual TABLES (multi-row by
151
+ nature), unlike scalar subqueries that need LIMIT 1 for PG's
152
+ single-row runtime constraint.
153
+ """
154
+ rng = ctx.rng
155
+ cfg = ctx.config
156
+ n_cols = rng.randint(1, cfg.max_derived_table_columns)
157
+ target_types = tuple(
158
+ rng.choice(_IN_COMPARISON_TYPES) for _ in range(n_cols)
159
+ )
160
+ inner = _build_subquery_select(
161
+ ctx,
162
+ correlated=lateral,
163
+ target_types=target_types,
164
+ with_limit_1=False,
165
+ )
166
+ # Re-wrap each target with an explicit `cN` alias for stable
167
+ # outer-side column resolution.
168
+ inner_aliased = replace(
169
+ inner,
170
+ targets=tuple(
171
+ SelectTarget(expr=t.expr, alias=f"c{i + 1}")
172
+ for i, t in enumerate(inner.targets)
173
+ ),
174
+ )
175
+ return DerivedTable(inner_aliased, alias, lateral=lateral)
176
+
177
+
178
+ def gen_in_subquery(
179
+ ctx: GenContext,
180
+ *,
181
+ correlated: bool,
182
+ ) -> InSubquery:
183
+ """Generate `<lhs> [NOT ]IN (SELECT col FROM ...)`. The LHS is
184
+ generated against the OUTER context so it references outer
185
+ columns, not the inner FROM tables. The inner produces a single
186
+ column whose type matches the LHS."""
187
+ # The InSubquery node returns BOOL to the caller, but the LHS and
188
+ # inner column share a different type — the equality-comparable
189
+ # type chosen here. Caller's "target_type=BOOL" gate (in expr.py)
190
+ # is what makes this candidate visible; this local `target_type`
191
+ # is the comparison-element type, not the outer-expression type.
192
+ target_type = ctx.rng.choice(_IN_COMPARISON_TYPES)
193
+ # CRITICAL: build LHS before descending. After descend_subquery
194
+ # the ctx.scope is the inner scope, and LHS column refs would
195
+ # come from inner tables — semantically wrong (LHS is part of
196
+ # the outer query's expression, not the inner subquery).
197
+ # ORDERING DEPENDENCY: this line MUST stay above the
198
+ # _build_subquery_select call below; reordering would silently
199
+ # produce SQL that parses but means something different.
200
+ lhs = gen_expr(ctx, target_type)
201
+ inner = _build_subquery_select(
202
+ ctx,
203
+ correlated=correlated,
204
+ target_type=target_type,
205
+ with_limit_1=False,
206
+ )
207
+ negated = ctx.rng.random() < _P_NEGATE_IN
208
+ return InSubquery(BOOL, lhs, inner, negated=negated)
209
+
210
+
211
+ # ===========================================================================
212
+ # Shared subquery body builder
213
+ # ===========================================================================
214
+
215
+ def _build_subquery_select(
216
+ ctx: GenContext,
217
+ *,
218
+ correlated: bool,
219
+ target_type: Optional[PgType] = None,
220
+ target_types: Optional[tuple[PgType, ...]] = None,
221
+ target_expr: Optional[Expr] = None,
222
+ with_limit_1: bool = False,
223
+ ) -> Select:
224
+ """Construct a SELECT for use as a subquery body.
225
+
226
+ Caller provides EXACTLY ONE target spec:
227
+ * `target_type` (singular) — generate a single target of that
228
+ type; convenience for the common single-column case.
229
+ * `target_types` (tuple) — generate len(target_types) targets,
230
+ one of each type. Used by multi-column derived tables / CTEs.
231
+ * `target_expr` — a pre-built single target expression
232
+ (used by EXISTS's constant `SELECT 1`).
233
+
234
+ The descent flow:
235
+ 1. Capture outer scope (needed by the correlation forcer).
236
+ 2. Descend via ctx.descend_subquery — fresh expression depth,
237
+ fresh aggregate flags, child scope (correlated or not).
238
+ 3. Build FROM clause; the child scope acquires the inner tables.
239
+ 4. Build the target list (generated or supplied).
240
+ 5. Optionally generate WHERE (with allow_aggregates=False —
241
+ WHERE forbids aggregates, just like in the outer query).
242
+ 6. If correlated, AND-inject an outer-referencing predicate
243
+ into WHERE.
244
+ 7. Add LIMIT 1 if requested.
245
+ """
246
+ # Lazy import to break the cycle: gen/expr.py imports the public
247
+ # entry points above; importing _gen_from_clause at module top
248
+ # would close the import loop. Lazy import is the standard Python
249
+ # answer and incurs only a sys.modules dict lookup after first call.
250
+ from .select import _gen_from_clause
251
+
252
+ # Capture outer scope BEFORE descending — `descend_subquery`
253
+ # replaces ctx.scope with the child scope, so this is the last
254
+ # chance to keep a handle on the outer bindings. The correlation
255
+ # forcer below needs that handle to pick an outer column for the
256
+ # left-hand side of its injected predicate.
257
+ outer_scope = ctx.scope
258
+ child_ctx = ctx.descend_subquery(correlated=correlated)
259
+ cfg = child_ctx.config
260
+ rng = child_ctx.rng
261
+
262
+ from_clause = _gen_from_clause(child_ctx)
263
+
264
+ # Resolve the three input forms into a single `target_exprs`
265
+ # tuple. Exactly one of (target_expr, target_type, target_types)
266
+ # must be set; combinations would be ambiguous.
267
+ set_count = sum(
268
+ 1 for x in (target_expr, target_type, target_types) if x is not None
269
+ )
270
+ if set_count != 1:
271
+ raise ValueError(
272
+ "_build_subquery_select needs exactly one of target_expr, "
273
+ f"target_type, target_types (got {set_count})"
274
+ )
275
+
276
+ if target_expr is not None:
277
+ target_exprs: tuple[Expr, ...] = (target_expr,)
278
+ else:
279
+ # Subquery targets generated with allow_aggregates=False to
280
+ # prevent the "mixed aggregate + non-aggregate-column-ref in
281
+ # the same target expression" bug class. Without GROUP BY,
282
+ # `col - max(other)` triggers implicit-grouping inference;
283
+ # the un-grouped col then errors at PARSE time (42803).
284
+ # Pure-aggregate subqueries lose direct generability here in
285
+ # exchange for PARSE-correctness; can be added back via a
286
+ # dedicated path if needed.
287
+ target_ctx = replace(child_ctx, allow_aggregates=False)
288
+ if target_types is not None:
289
+ types_tuple: tuple[PgType, ...] = target_types
290
+ else:
291
+ # Validated above: exactly-one-of (expr, type, types).
292
+ # When we reach this branch, target_expr is None and
293
+ # target_types is None, so target_type MUST be non-None.
294
+ assert target_type is not None
295
+ types_tuple = (target_type,)
296
+ target_exprs = tuple(gen_expr(target_ctx, t) for t in types_tuple)
297
+
298
+ where: Optional[Expr] = None
299
+ if FEATURE_WHERE in cfg.feature_flags and rng.random() < cfg.p_where:
300
+ where_ctx = replace(child_ctx, allow_aggregates=False)
301
+ where = gen_expr(where_ctx, BOOL)
302
+
303
+ if correlated:
304
+ where = _force_correlation_predicate(child_ctx, outer_scope, where)
305
+
306
+ return Select(
307
+ targets=tuple(SelectTarget(expr=e) for e in target_exprs),
308
+ from_=from_clause,
309
+ where=where,
310
+ limit=Literal(INT4, 1) if with_limit_1 else None,
311
+ )
312
+
313
+
314
+ # ===========================================================================
315
+ # Correlation enforcement
316
+ # ===========================================================================
317
+
318
+ def _force_correlation_predicate(
319
+ child_ctx: GenContext,
320
+ outer_scope: Scope,
321
+ existing_where: Optional[Expr],
322
+ ) -> Optional[Expr]:
323
+ """Inject `outer_col = X` into the inner WHERE, AND-combined with
324
+ any existing WHERE. Guarantees the subquery references the outer
325
+ scope at least once — the test suite's "correlated subqueries
326
+ actually correlate" invariant relies on this.
327
+
328
+ Picks an outer column whose type has a usable `=` operator (filters
329
+ out JSONB / arrays etc. that don't have catalog-registered equality).
330
+ The right-hand side is preferentially another column of the same
331
+ type from the inner scope; failing that, a literal of that type
332
+ (still satisfies "references outer" because the outer column is
333
+ on the left).
334
+ """
335
+ rng = child_ctx.rng
336
+
337
+ # Outer bindings whose types have a usable `=` in our catalog.
338
+ bool_ops = child_ctx.catalog.binary_ops_returning(BOOL)
339
+ # Set is fine for membership tests; we never iterate it.
340
+ eq_types = {
341
+ o.left for o in bool_ops
342
+ if o.symbol == "=" and o.left == o.right
343
+ }
344
+ outer_candidates = [
345
+ b for b in outer_scope.visible_columns()
346
+ if b.type in eq_types
347
+ ]
348
+ if not outer_candidates:
349
+ # Pathological — outer has no comparable columns. Skip
350
+ # correlation rather than emit invalid SQL.
351
+ return existing_where
352
+
353
+ outer = rng.choice(outer_candidates)
354
+
355
+ # RHS: prefer an inner column of the same type; fall back to a
356
+ # literal. Either way, the outer column reference on the LEFT is
357
+ # what makes this a correlated subquery.
358
+ #
359
+ # Why filter by `inner_aliases` rather than just calling
360
+ # visible_columns()? In a correlated child scope, visible_columns()
361
+ # returns BOTH inner bindings AND outer bindings (correlation lets
362
+ # outer columns leak in). If we picked an OUTER column for the RHS,
363
+ # the predicate would compare two outer columns — semantically a
364
+ # constant from the inner subquery's POV, which PG might pull out
365
+ # of the subquery as a constant filter, defeating correlation.
366
+ # Restricting to inner aliases keeps the LEFT/RIGHT asymmetry that
367
+ # makes this a real correlated reference.
368
+ inner_aliases = {a for a, _ in child_ctx.scope.aliased_tables()}
369
+ inner_candidates = [
370
+ b for b in child_ctx.scope.visible_columns()
371
+ if b.table_alias in inner_aliases and b.type == outer.type
372
+ ]
373
+ if inner_candidates:
374
+ inner = rng.choice(inner_candidates)
375
+ rhs: Expr = ColumnRef(inner.type, inner.table_alias, inner.column)
376
+ else:
377
+ rhs = gen_literal(rng, outer.type)
378
+
379
+ correlation = BinaryOp(
380
+ BOOL, "=",
381
+ ColumnRef(outer.type, outer.table_alias, outer.column),
382
+ rhs,
383
+ )
384
+
385
+ if existing_where is None:
386
+ return correlation
387
+ # AND-combine: outer-correlation predicate first, existing where
388
+ # second. Order is just for readability — both are evaluated.
389
+ return BinaryOp(BOOL, "AND", correlation, existing_where)
390
+
391
+
392
+ __all__ = [
393
+ "gen_scalar_subquery",
394
+ "gen_exists_subquery",
395
+ "gen_in_subquery",
396
+ "gen_derived_table",
397
+ ]