waxsql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
waxsql/schema.py ADDED
@@ -0,0 +1,557 @@
1
+ """Schema model and random schema generator.
2
+
3
+ The Schema is an immutable description of tables, columns, primary keys,
4
+ foreign keys, and indexes. It serves two purposes:
5
+
6
+ 1. Input to the query generator (which tables/columns exist, what
7
+ types they have, what FK relationships connect them).
8
+
9
+ 2. Source of DDL that can be loaded into PostgreSQL for PARSE/PLAN
10
+ validation modes, or just for human inspection of test scenarios.
11
+
12
+ Schema generation is deterministic in the seed and uses its own RNG
13
+ stream — the public `generate()` function in __init__.py splits the
14
+ master seed so that "same schema, different queries" is possible by
15
+ re-seeding only the query generator.
16
+
17
+ A few design choices worth flagging:
18
+
19
+ * Every table has an `id` BIGINT PK. Composite keys are interesting
20
+ but add complexity to FK generation and JOIN matching. Add later.
21
+
22
+ * FKs are emitted as separate ALTER TABLE statements after all CREATE
23
+ TABLEs. This sidesteps any topological-sort headache when the FK
24
+ graph has cycles (allowed at high complexity) and makes the DDL
25
+ work even when tables reference each other.
26
+
27
+ * Index names and FK names embed the table name, which keeps them
28
+ unique across the schema (PostgreSQL constraint and index names
29
+ are schema-scoped, not table-scoped).
30
+
31
+ * Iteration over column lists, candidate referent indices, etc. is
32
+ always done over sorted/list-typed inputs. Set iteration order is
33
+ not guaranteed to be stable across Python builds, so all RNG-
34
+ sensitive iteration goes through `sorted(...)`.
35
+ """
36
+ from __future__ import annotations
37
+
38
+ import random
39
+ from dataclasses import dataclass, field, replace
40
+ from typing import Optional
41
+
42
+ from .types import (
43
+ PgType,
44
+ INT4, INT8, NUMERIC, FLOAT8,
45
+ TEXT, VARCHAR, BOOL,
46
+ DATE, TIMESTAMPTZ, INTERVAL,
47
+ UUID, JSONB,
48
+ )
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Identifier quoting
53
+ # ---------------------------------------------------------------------------
54
+
55
+ # A small but useful set of PG reserved words. We don't try to be exhaustive
56
+ # because our name generator pulls from a curated word list, but better safe
57
+ # than sorry: any candidate name in this set gets double-quoted.
58
+ RESERVED_WORDS: frozenset[str] = frozenset({
59
+ "all", "analyse", "analyze", "and", "any", "array", "as", "asc",
60
+ "asymmetric", "both", "case", "cast", "check", "collate", "column",
61
+ "constraint", "create", "current_date", "current_role", "current_time",
62
+ "current_timestamp", "current_user", "default", "deferrable", "desc",
63
+ "distinct", "do", "else", "end", "except", "false", "fetch", "for",
64
+ "foreign", "from", "grant", "group", "having", "in", "initially",
65
+ "intersect", "into", "lateral", "leading", "limit", "localtime",
66
+ "localtimestamp", "not", "null", "offset", "on", "only", "or",
67
+ "order", "placing", "primary", "references", "returning", "select",
68
+ "session_user", "some", "symmetric", "table", "then", "to", "trailing",
69
+ "true", "union", "unique", "user", "using", "variadic", "when", "where",
70
+ "window", "with",
71
+ })
72
+
73
+
74
+ def quote_ident(name: str) -> str:
75
+ """Quote an SQL identifier if it would otherwise be ambiguous.
76
+
77
+ Bare identifiers are fine if they're lowercase, valid Python
78
+ identifier characters, and not a reserved word. Otherwise wrap in
79
+ double quotes and escape any embedded quotes.
80
+ """
81
+ if (name.isidentifier()
82
+ and name == name.lower()
83
+ and name not in RESERVED_WORDS):
84
+ return name
85
+ return '"' + name.replace('"', '""') + '"'
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Model
90
+ # ---------------------------------------------------------------------------
91
+
92
+ @dataclass(frozen=True)
93
+ class Column:
94
+ name: str
95
+ type: PgType
96
+ nullable: bool = True
97
+ default: Optional[str] = None # raw SQL expression; rarely used here
98
+
99
+ def to_sql(self) -> str:
100
+ bits = [quote_ident(self.name), self.type.sql()]
101
+ if not self.nullable:
102
+ bits.append("NOT NULL")
103
+ if self.default is not None:
104
+ bits.append(f"DEFAULT {self.default}")
105
+ return " ".join(bits)
106
+
107
+
108
+ @dataclass(frozen=True)
109
+ class ForeignKey:
110
+ name: str
111
+ columns: tuple[str, ...]
112
+ ref_table: str
113
+ ref_columns: tuple[str, ...]
114
+ on_delete: str = "NO ACTION"
115
+ on_update: str = "NO ACTION"
116
+
117
+
118
+ @dataclass(frozen=True)
119
+ class Index:
120
+ """An index. Either `columns` (plain b-tree on listed columns) or
121
+ `expressions` (functional/expression index). `where` makes it partial.
122
+ """
123
+ name: str
124
+ columns: tuple[str, ...] = ()
125
+ expressions: tuple[str, ...] = ()
126
+ unique: bool = False
127
+ where: Optional[str] = None
128
+
129
+
130
+ @dataclass(frozen=True)
131
+ class Table:
132
+ name: str
133
+ columns: tuple[Column, ...]
134
+ primary_key: tuple[str, ...] = ()
135
+ foreign_keys: tuple[ForeignKey, ...] = ()
136
+ indexes: tuple[Index, ...] = ()
137
+
138
+ def column(self, name: str) -> Column:
139
+ for c in self.columns:
140
+ if c.name == name:
141
+ return c
142
+ raise KeyError(name)
143
+
144
+
145
+ @dataclass(frozen=True)
146
+ class Schema:
147
+ tables: tuple[Table, ...]
148
+
149
+ def table(self, name: str) -> Table:
150
+ for t in self.tables:
151
+ if t.name == name:
152
+ return t
153
+ raise KeyError(name)
154
+
155
+ def emit_ddl(self) -> str:
156
+ """Emit complete CREATE TABLE / ALTER TABLE / CREATE INDEX script.
157
+
158
+ Order: all tables first, then all FKs (so cyclic FK graphs work),
159
+ then all indexes.
160
+ """
161
+ parts: list[str] = []
162
+ for t in self.tables:
163
+ parts.append(_emit_create_table(t))
164
+ for t in self.tables:
165
+ for fk in t.foreign_keys:
166
+ parts.append(_emit_alter_add_fk(t.name, fk))
167
+ for t in self.tables:
168
+ for idx in t.indexes:
169
+ parts.append(_emit_create_index(t.name, idx))
170
+ return "\n\n".join(parts) + "\n"
171
+
172
+
173
+ def _emit_create_table(t: Table) -> str:
174
+ cols_sql = [c.to_sql() for c in t.columns]
175
+ if t.primary_key:
176
+ pk_cols = ", ".join(quote_ident(c) for c in t.primary_key)
177
+ cols_sql.append(f"PRIMARY KEY ({pk_cols})")
178
+ body = ",\n ".join(cols_sql)
179
+ return f"CREATE TABLE {quote_ident(t.name)} (\n {body}\n);"
180
+
181
+
182
+ def _emit_alter_add_fk(table_name: str, fk: ForeignKey) -> str:
183
+ cols = ", ".join(quote_ident(c) for c in fk.columns)
184
+ ref_cols = ", ".join(quote_ident(c) for c in fk.ref_columns)
185
+ return (
186
+ f"ALTER TABLE {quote_ident(table_name)} "
187
+ f"ADD CONSTRAINT {quote_ident(fk.name)} "
188
+ f"FOREIGN KEY ({cols}) "
189
+ f"REFERENCES {quote_ident(fk.ref_table)} ({ref_cols}) "
190
+ f"ON DELETE {fk.on_delete} ON UPDATE {fk.on_update};"
191
+ )
192
+
193
+
194
+ def _emit_create_index(table_name: str, idx: Index) -> str:
195
+ unique = "UNIQUE " if idx.unique else ""
196
+ if idx.expressions:
197
+ cols = ", ".join(f"({e})" for e in idx.expressions)
198
+ else:
199
+ cols = ", ".join(quote_ident(c) for c in idx.columns)
200
+ where = f" WHERE {idx.where}" if idx.where else ""
201
+ return (
202
+ f"CREATE {unique}INDEX {quote_ident(idx.name)} "
203
+ f"ON {quote_ident(table_name)} ({cols}){where};"
204
+ )
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # Random schema generator
209
+ # ---------------------------------------------------------------------------
210
+
211
+ # Curated word lists. Output is dramatically more readable when tables are
212
+ # `customers` and `orders` rather than `t_47` and `t_48`. Costs nothing.
213
+ _NOUNS: tuple[str, ...] = (
214
+ "customer", "order", "product", "invoice", "shipment", "address",
215
+ "account", "transaction", "category", "tag", "comment", "review",
216
+ "ticket", "event", "session", "device", "service", "region", "country",
217
+ "city", "warehouse", "vendor", "supplier", "employee", "department",
218
+ "project", "task", "milestone", "file", "folder", "role",
219
+ "permission", "subscription", "plan", "feature", "discount", "coupon",
220
+ "payment", "refund", "channel", "post", "thread", "notification",
221
+ "alert", "metric", "report", "audit", "snapshot", "backup",
222
+ "branch", "release", "build", "artifact", "config", "policy", "rule",
223
+ "filter", "rating", "vote", "follower", "friend", "contact", "lead",
224
+ "campaign", "keyword", "asset", "license", "contract", "clause",
225
+ "schedule", "appointment", "reservation", "booking", "host",
226
+ "room", "floor", "building", "site", "zone", "lane", "route", "stop",
227
+ "trip", "vehicle", "driver", "package", "manifest", "container",
228
+ "shelf", "bin", "lot", "batch", "sku", "variant", "color", "size",
229
+ "material", "fabric", "ingredient", "recipe", "cart",
230
+ )
231
+
232
+ _ADJECTIVES: tuple[str, ...] = (
233
+ "active", "archived", "pending", "primary", "secondary", "external",
234
+ "internal", "draft", "published", "private", "public", "shared",
235
+ "default", "custom", "raw", "processed", "verified", "trial",
236
+ "premium", "legacy", "current", "historical", "scheduled",
237
+ )
238
+
239
+
240
+ @dataclass
241
+ class SchemaConfig:
242
+ """Tunables for random schema generation.
243
+
244
+ Each parameter is exposed so users can drive the generator manually
245
+ if the complexity-dial preset doesn't fit their needs.
246
+ """
247
+ table_count: int
248
+ min_columns: int
249
+ max_columns: int
250
+ fk_density: float # P(non-id int column becomes an FK), 0..1
251
+ allow_cyclic_fks: bool # if False, FKs only point at earlier tables
252
+ allow_self_fks: bool # if True, a table may FK to itself
253
+ index_density: float # mean extra indexes per table (Gaussian)
254
+ type_weights: dict[PgType, float] = field(default_factory=dict)
255
+
256
+
257
+ def schema_config_for_complexity(complexity: int) -> SchemaConfig:
258
+ """Map a 0..10 complexity dial onto a SchemaConfig.
259
+
260
+ The cyclic / self-FK thresholds are deliberately high because those
261
+ structures matter mostly to recursive CTE and graph-style query
262
+ generation, which only kicks in at the top of the dial.
263
+ """
264
+ c = max(0, min(10, complexity))
265
+ return SchemaConfig(
266
+ table_count=2 + c, # 2..12
267
+ min_columns=3,
268
+ max_columns=4 + c, # up to 14
269
+ fk_density=0.30 + 0.04 * c, # 0.30..0.70
270
+ allow_cyclic_fks=c >= 8,
271
+ allow_self_fks=c >= 5,
272
+ index_density=0.5 + 0.1 * c,
273
+ type_weights=_default_type_weights(),
274
+ )
275
+
276
+
277
+ def _default_type_weights() -> dict[PgType, float]:
278
+ """Bias toward the types real schemas use most. Arrays absent for
279
+ now — they make join/where generation more annoying than it's worth
280
+ at this stage."""
281
+ return {
282
+ INT4: 4.0, INT8: 3.0, NUMERIC: 1.5, FLOAT8: 0.5,
283
+ TEXT: 4.0, VARCHAR: 1.0, BOOL: 2.0,
284
+ DATE: 1.0, TIMESTAMPTZ: 2.5, INTERVAL: 0.3,
285
+ UUID: 1.0, JSONB: 0.8,
286
+ }
287
+
288
+
289
+ def generate_schema(seed: int, complexity: int = 5) -> Schema:
290
+ """Generate a random schema deterministic in `seed`.
291
+
292
+ Same (seed, complexity) → identical Schema object.
293
+ """
294
+ rng = random.Random(seed)
295
+ cfg = schema_config_for_complexity(complexity)
296
+ return _generate_schema_impl(rng, cfg)
297
+
298
+
299
+ def generate_schema_with_config(seed: int, cfg: SchemaConfig) -> Schema:
300
+ """Generate a random schema with an explicit SchemaConfig."""
301
+ rng = random.Random(seed)
302
+ return _generate_schema_impl(rng, cfg)
303
+
304
+
305
+ def _generate_schema_impl(rng: random.Random, cfg: SchemaConfig) -> Schema:
306
+ # Three-pass build: tables-with-columns first, then FKs (which need
307
+ # all table names to exist as referent candidates), then indexes
308
+ # (which need the FKs to exist so they can be auto-indexed). Frozen
309
+ # dataclasses mean each pass returns a fresh list of replaced Tables.
310
+ # The freshly-replaced list discipline matters: the FK pass reads
311
+ # `drafts[ref_idx]` for each new FK, so passes that mutate the
312
+ # input list during iteration would be off-by-one races. Returning
313
+ # a new list keeps each pass logically atomic.
314
+ table_names = _unique_names(rng, cfg.table_count, plural=True)
315
+
316
+ drafts: list[Table] = []
317
+ for name in table_names:
318
+ ncols = rng.randint(cfg.min_columns, cfg.max_columns)
319
+ cols = _generate_columns(rng, ncols, cfg.type_weights)
320
+ # Every table gets an `id BIGINT NOT NULL` PK, prepended.
321
+ cols = (Column("id", INT8, nullable=False),) + cols
322
+ drafts.append(Table(name=name, columns=cols, primary_key=("id",)))
323
+
324
+ drafts = _add_foreign_keys(rng, drafts, cfg)
325
+ drafts = _add_indexes(rng, drafts, cfg)
326
+ return Schema(tables=tuple(drafts))
327
+
328
+
329
+ def _unique_names(rng: random.Random, n: int, *, plural: bool) -> list[str]:
330
+ """Generate `n` distinct identifier-safe names."""
331
+ # `seen` is used only for membership tests (`in`, `add`); we never
332
+ # iterate it. That's the only reason a set is OK here under the
333
+ # determinism rules — set iteration order is unstable across Python
334
+ # builds. The RNG-affecting iteration goes through `nouns`/`adjs`,
335
+ # which are sorted lists.
336
+ seen: set[str] = set()
337
+ out: list[str] = []
338
+ nouns = sorted(_NOUNS)
339
+ adjs = sorted(_ADJECTIVES)
340
+ # Bound the number of attempts to avoid pathological cases.
341
+ attempts = 0
342
+ while len(out) < n and attempts < n * 50:
343
+ attempts += 1
344
+ noun = rng.choice(nouns)
345
+ if plural:
346
+ noun = _pluralize(noun)
347
+ # 20% of the time, prefix with an adjective for variety; also
348
+ # use the prefix as a fallback if the bare noun collides.
349
+ if rng.random() < 0.2 or noun in seen:
350
+ adj = rng.choice(adjs)
351
+ candidate = f"{adj}_{noun}"
352
+ else:
353
+ candidate = noun
354
+ if candidate not in seen:
355
+ seen.add(candidate)
356
+ out.append(candidate)
357
+ if len(out) < n:
358
+ # Fall back to numeric suffixes if we somehow exhausted the pool.
359
+ i = 0
360
+ while len(out) < n:
361
+ candidate = f"t_{i}"
362
+ if candidate not in seen:
363
+ seen.add(candidate)
364
+ out.append(candidate)
365
+ i += 1
366
+ return out
367
+
368
+
369
+ def _pluralize(noun: str) -> str:
370
+ """Trivial English pluralizer. Good enough for table names."""
371
+ if noun.endswith(("s", "x", "z", "ch", "sh")):
372
+ return noun + "es"
373
+ if noun.endswith("y") and (len(noun) < 2 or noun[-2] not in "aeiou"):
374
+ return noun[:-1] + "ies"
375
+ return noun + "s"
376
+
377
+
378
+ def _generate_columns(
379
+ rng: random.Random,
380
+ n: int,
381
+ type_weights: dict[PgType, float],
382
+ ) -> tuple[Column, ...]:
383
+ """Generate `n` columns with names not colliding with `id`."""
384
+ # `seen` is membership-only — never iterated. Set is safe here for
385
+ # the same reason as in _unique_names. The RNG-touching iteration
386
+ # is `nouns`/`adjs`/`types`, all sorted lists.
387
+ seen: set[str] = {"id"}
388
+ cols: list[Column] = []
389
+ nouns = sorted(_NOUNS)
390
+ adjs = sorted(_ADJECTIVES)
391
+ # Sort by .name (not the PgType itself) because PgType is frozen
392
+ # but doesn't define a comparison order — sorting by name gives a
393
+ # stable, build-independent order without requiring an __lt__.
394
+ types = sorted(type_weights.keys(), key=lambda t: t.name)
395
+ weights = [type_weights[t] for t in types]
396
+ attempts = 0
397
+ while len(cols) < n and attempts < n * 50:
398
+ attempts += 1
399
+ if rng.random() < 0.5:
400
+ name = rng.choice(nouns)
401
+ else:
402
+ name = f"{rng.choice(adjs)}_{rng.choice(nouns)}"
403
+ if name in seen:
404
+ continue
405
+ seen.add(name)
406
+ t = rng.choices(types, weights=weights)[0]
407
+ # 60% nullable matches typical real-world schema shape closely
408
+ # enough — and gives the query generator regular exercise of
409
+ # both NULL-aware and NULL-naive code paths. This is not the
410
+ # same as the data generator's null_fraction (which controls
411
+ # whether a nullable column actually contains NULL).
412
+ nullable = rng.random() < 0.6
413
+ cols.append(Column(name=name, type=t, nullable=nullable))
414
+ return tuple(cols)
415
+
416
+
417
+ def _add_foreign_keys(
418
+ rng: random.Random,
419
+ drafts: list[Table],
420
+ cfg: SchemaConfig,
421
+ ) -> list[Table]:
422
+ """Add FKs to each table per the config's density and cycle rules.
423
+
424
+ FK columns are selected from existing int4/int8 non-id columns; the
425
+ referent is always some other table's `id`. This keeps types
426
+ compatible without us having to re-type columns mid-generation.
427
+
428
+ Tradeoff: not retro-renaming the columns means the FK column name
429
+ (`metric`, `region`, ...) often has nothing to do with what it
430
+ references. That's a fidelity loss versus real schemas but keeps
431
+ the generation pass single-shot — column generation doesn't have
432
+ to know which columns will later become FKs. The alternative
433
+ (assign FKs first, generate columns to match) makes the dependency
434
+ direction backwards from how a schema is normally built up.
435
+ """
436
+ n = len(drafts)
437
+ new_drafts: list[Table] = []
438
+
439
+ for i, t in enumerate(drafts):
440
+ fks: list[ForeignKey] = []
441
+
442
+ # Build candidate referent index list.
443
+ #
444
+ # The non-cyclic path is "earlier tables only" because forward
445
+ # references would create cycles by definition: if table i can
446
+ # point at table j > i, and table j can point at table i, the
447
+ # graph has a 2-cycle. Restricting to j < i gives a DAG by
448
+ # construction. The data generator currently relies on this
449
+ # DAG-ness to topo-walk FKs (see ARCHITECTURE.md "out of scope": FK-
450
+ # cyclic schemas in the data generator).
451
+ if cfg.allow_cyclic_fks:
452
+ candidates = list(range(n))
453
+ else:
454
+ candidates = list(range(i)) # earlier tables only
455
+ if cfg.allow_self_fks and i not in candidates:
456
+ candidates.append(i)
457
+ # Self-FK control runs AFTER the cyclic branch added `i` — the
458
+ # cyclic case unconditionally added every index, including `i`,
459
+ # so without this filter we'd allow self-FKs at any complexity
460
+ # the moment cyclic FKs unlock. The two flags must be honored
461
+ # independently.
462
+ if not cfg.allow_self_fks:
463
+ candidates = [j for j in candidates if j != i]
464
+
465
+ for col in t.columns:
466
+ if col.name == "id":
467
+ continue
468
+ if col.type.name not in ("int4", "int8"):
469
+ continue
470
+ if rng.random() > cfg.fk_density:
471
+ continue
472
+ if not candidates:
473
+ continue
474
+ # `sorted(candidates)` looks redundant — `candidates` is
475
+ # built from range(...) which is already sorted — but the
476
+ # `if not allow_self_fks` filter above can break that
477
+ # invariant for the cyclic path (where we appended `i`
478
+ # before filtering). The sort is cheap and load-bearing
479
+ # for determinism: rng.choice consumes the iterable's
480
+ # actual order, not a canonical one.
481
+ ref_idx = rng.choice(sorted(candidates))
482
+ ref_table = drafts[ref_idx]
483
+ fks.append(ForeignKey(
484
+ name=f"fk_{t.name}_{col.name}",
485
+ columns=(col.name,),
486
+ ref_table=ref_table.name,
487
+ ref_columns=("id",),
488
+ on_delete=rng.choice((
489
+ "NO ACTION", "CASCADE", "SET NULL", "RESTRICT"
490
+ )),
491
+ ))
492
+ new_drafts.append(replace(t, foreign_keys=tuple(fks)))
493
+ return new_drafts
494
+
495
+
496
+ def _add_indexes(
497
+ rng: random.Random,
498
+ drafts: list[Table],
499
+ cfg: SchemaConfig,
500
+ ) -> list[Table]:
501
+ """Add indexes: always on FK columns, plus a few extras per density.
502
+
503
+ The "always index FK columns" rule mirrors what most real schemas
504
+ do (and what PG's planner expects for efficient joins). Without
505
+ these indexes, planner-tier validation would produce uniformly
506
+ seq-scan-heavy plans that don't exercise the index-using code
507
+ paths the generator is trying to fuzz.
508
+ """
509
+ new_drafts: list[Table] = []
510
+ for t in drafts:
511
+ idxs: list[Index] = []
512
+ for fk in t.foreign_keys:
513
+ idxs.append(Index(
514
+ name=f"ix_{t.name}_{'_'.join(fk.columns)}",
515
+ columns=fk.columns,
516
+ ))
517
+ n_extra = max(0, int(rng.gauss(cfg.index_density, 0.5)))
518
+ non_id_cols = sorted(
519
+ (c for c in t.columns if c.name != "id"),
520
+ key=lambda c: c.name,
521
+ )
522
+ for k in range(n_extra):
523
+ if not non_id_cols:
524
+ break
525
+ # Three-way taxonomy of generated indexes (cumulative roll):
526
+ # < 0.60 → single-column b-tree
527
+ # < 0.85 → two-column composite (falls through to partial
528
+ # branch if there are fewer than 2 columns)
529
+ # else → partial b-tree predicated on a bool column
530
+ # (silently produces no index if no bool columns)
531
+ # The partial branch can no-op, so n_extra is an upper bound
532
+ # on the number of extra indexes, not an exact count.
533
+ roll = rng.random()
534
+ if roll < 0.6:
535
+ col = rng.choice(non_id_cols)
536
+ idxs.append(Index(
537
+ name=f"ix_{t.name}_{col.name}_x{k}",
538
+ columns=(col.name,),
539
+ ))
540
+ elif roll < 0.85 and len(non_id_cols) >= 2:
541
+ cols = rng.sample(non_id_cols, 2)
542
+ idxs.append(Index(
543
+ name=f"ix_{t.name}_{'_'.join(c.name for c in cols)}_x{k}",
544
+ columns=tuple(c.name for c in cols),
545
+ ))
546
+ else:
547
+ bool_cols = [c for c in non_id_cols if c.type == BOOL]
548
+ if bool_cols:
549
+ pred_col = rng.choice(bool_cols)
550
+ target_col = rng.choice(non_id_cols)
551
+ idxs.append(Index(
552
+ name=f"ix_{t.name}_{target_col.name}_partial_x{k}",
553
+ columns=(target_col.name,),
554
+ where=f"{quote_ident(pred_col.name)} = TRUE",
555
+ ))
556
+ new_drafts.append(replace(t, indexes=tuple(idxs)))
557
+ return new_drafts