vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,653 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Schema-reconcile fixture worker.
4
+
5
+ Hosted inside the consolidated ``vgi-fixture-worker`` (entry point in
6
+ pyproject.toml) alongside the other reproducer catalogs. Used by the
7
+ ``test/sql/integration/schema_reconcile.test`` regression test in
8
+ ``~/Development/vgi`` to exercise the C++ ``ReconcileBatchToSchema`` helper
9
+ across INSERT, UPDATE, DELETE, and SELECT batch flows.
10
+
11
+ Three writable tables, each with a different rowid type — covering every
12
+ rowid shape that exercises a separate ReconcileBatchToSchema code path:
13
+
14
+ - ``demo`` : rowid int64 NOT NULL — primitive integer rowid.
15
+ - ``ts_demo`` : rowid timestamp[ms, tz=UTC] NOT NULL — TZ-aware
16
+ timestamp as the rowid; exercises the value cast on
17
+ the rowid itself (DuckDB collapses TIMESTAMP_TZ to
18
+ timestamp[us, tz=session]).
19
+ - ``struct_demo`` : rowid struct{a int64 NOT NULL, b string nullable} NOT NULL
20
+ — struct rowid with mixed nullability inside;
21
+ exercises recursive nullability reshape on a rowid.
22
+
23
+ User columns (id/ts/nested/tags) are identical across tables.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import pickle
30
+ import sqlite3
31
+ import sys
32
+ import threading
33
+ from dataclasses import dataclass
34
+ from typing import Any
35
+
36
+ import pyarrow as pa
37
+ from vgi_rpc.rpc import OutputCollector
38
+
39
+ from vgi import Worker
40
+ from vgi.catalog import Catalog, Schema
41
+ from vgi.catalog.catalog_interface import (
42
+ AttachOpaqueData,
43
+ ReadOnlyCatalogInterface,
44
+ ScanFunctionResult,
45
+ SchemaInfo,
46
+ SchemaObjectType,
47
+ SerializedSchema,
48
+ TableInfo,
49
+ TransactionOpaqueData,
50
+ )
51
+ from vgi.invocation import BindResponse, GlobalInitResponse
52
+ from vgi.table_function import (
53
+ BindParams,
54
+ InitParams,
55
+ ProcessParams,
56
+ TableFunctionGenerator,
57
+ )
58
+ from vgi.table_in_out_function import TableInOutGenerator
59
+
60
+ CATALOG_NAME = "schema_reconcile"
61
+ _SCHEMA_NAME = "main"
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Declared user-facing columns — identical across all three tables.
66
+ # Every facet (NOT NULL primitive, TZ-aware ms timestamp, NOT NULL leaf
67
+ # inside a struct, NOT NULL item inside list-of-struct) is something
68
+ # DuckDB's Arrow round-trip cannot preserve.
69
+ # ---------------------------------------------------------------------------
70
+
71
+ USER_FIELDS: list[pa.Field[Any]] = [
72
+ pa.field("id", pa.int64(), nullable=False),
73
+ pa.field("ts", pa.timestamp("ms", tz="UTC"), nullable=False),
74
+ pa.field(
75
+ "nested",
76
+ pa.struct(
77
+ [
78
+ pa.field("a", pa.int32(), nullable=False),
79
+ pa.field("b", pa.string(), nullable=True),
80
+ pa.field("ts2", pa.timestamp("ms", tz="UTC"), nullable=True),
81
+ ]
82
+ ),
83
+ nullable=False,
84
+ ),
85
+ pa.field(
86
+ "tags",
87
+ pa.list_(
88
+ pa.field(
89
+ "item",
90
+ pa.struct(
91
+ [
92
+ pa.field("k", pa.string(), nullable=False),
93
+ pa.field("v", pa.binary(), nullable=True),
94
+ ]
95
+ ),
96
+ nullable=False,
97
+ )
98
+ ),
99
+ nullable=False,
100
+ ),
101
+ ]
102
+ USER_SCHEMA: pa.Schema = pa.schema(USER_FIELDS)
103
+
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # Per-table specs — each table gets its own rowid type.
107
+ # ---------------------------------------------------------------------------
108
+
109
+
110
+ def _rowid_field(arrow_type: pa.DataType) -> pa.Field[Any]:
111
+ """Build a rowid field with the ``is_row_id`` metadata that the C++ side keys on.
112
+
113
+ Always declared NOT NULL to exercise the rowid reshape path in
114
+ ReconcileBatchToSchema.
115
+ """
116
+ return pa.field("rowid", arrow_type, nullable=False, metadata={b"is_row_id": b""})
117
+
118
+
119
+ _INT64_ROWID = _rowid_field(pa.int64())
120
+
121
+ _TS_ROWID = _rowid_field(pa.timestamp("ms", tz="UTC"))
122
+
123
+ _STRUCT_ROWID = _rowid_field(
124
+ pa.struct(
125
+ [
126
+ pa.field("a", pa.int64(), nullable=False),
127
+ pa.field("b", pa.string(), nullable=True),
128
+ ]
129
+ )
130
+ )
131
+
132
+
133
+ @dataclass(frozen=True)
134
+ class TableSpec:
135
+ name: str
136
+ rowid_field: pa.Field[Any]
137
+ storage_table: str # Underlying SQLite table name.
138
+
139
+ @property
140
+ def table_schema(self) -> pa.Schema:
141
+ return pa.schema(USER_FIELDS + [self.rowid_field])
142
+
143
+ @property
144
+ def delete_input_schema(self) -> pa.Schema:
145
+ return pa.schema([self.rowid_field])
146
+
147
+
148
+ TABLES: dict[str, TableSpec] = {
149
+ spec.name: spec
150
+ for spec in (
151
+ TableSpec("demo", _INT64_ROWID, "demo_rows"),
152
+ TableSpec("ts_demo", _TS_ROWID, "ts_demo_rows"),
153
+ TableSpec("struct_demo", _STRUCT_ROWID, "struct_demo_rows"),
154
+ )
155
+ }
156
+
157
+
158
+ _COUNT_SCHEMA: pa.Schema = pa.schema([pa.field("count", pa.int64(), nullable=False)])
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Storage — SQLite. Each logical table gets its own row-store SQLite table.
163
+ # Rowid is opaque (pickled tuple), so this works for int, timestamp, and
164
+ # struct rowids alike. The Arrow schema (the thing under test) is
165
+ # reconstructed from TABLES on read.
166
+ #
167
+ # The C++ extension's worker pool freely spawns multiple worker processes
168
+ # for one ATTACH (POOL_MAX caps idle pool size, not concurrency), so the
169
+ # fixture needs cross-process state; SQLite is the cheapest such store.
170
+ # To avoid leftover rows from a previous test run poisoning the next,
171
+ # the DB filename is keyed on the parent (DuckDB) PID — every test
172
+ # session gets its own file, and all worker processes spawned from the
173
+ # same DuckDB process share it without any cross-process synchronization.
174
+ # ---------------------------------------------------------------------------
175
+
176
+ _lock = threading.Lock()
177
+
178
+
179
+ def _db_path() -> str:
180
+ override = os.environ.get("VGI_SCHEMA_RECONCILE_DB")
181
+ if override:
182
+ return override
183
+ # Use the worker's process-group ID so every worker subprocess spawned
184
+ # by the same DuckDB process shares one SQLite file, while distinct
185
+ # test invocations land on different files. PPID alone is unstable
186
+ # because ``uv run`` inserts an intermediate process per worker; PGID
187
+ # propagates across fork/exec by default and stays stable for the
188
+ # life of one test session.
189
+ if sys.platform == "win32": # pragma: no cover - PGID is POSIX; PPID is the
190
+ # closest stable-per-session stand-in.
191
+ import tempfile
192
+
193
+ return os.path.join(tempfile.gettempdir(), f"vgi_schema_reconcile.{os.getppid()}.sqlite")
194
+ return f"/tmp/vgi_schema_reconcile.{os.getpgrp()}.sqlite"
195
+
196
+
197
+ def _connect() -> sqlite3.Connection:
198
+ conn = sqlite3.connect(_db_path(), timeout=30.0)
199
+ conn.execute("PRAGMA journal_mode=WAL")
200
+ for spec in TABLES.values():
201
+ conn.execute(
202
+ f"CREATE TABLE IF NOT EXISTS {spec.storage_table} ( rid_blob BLOB PRIMARY KEY, payload BLOB NOT NULL)"
203
+ )
204
+ return conn
205
+
206
+
207
+ def _rid_key(rid: Any) -> bytes:
208
+ return pickle.dumps(rid)
209
+
210
+
211
+ def _all_rows(spec: TableSpec) -> list[tuple[Any, dict[str, Any]]]:
212
+ with _lock, _connect() as conn:
213
+ out: list[tuple[Any, dict[str, Any]]] = []
214
+ for rid_blob, payload in conn.execute(f"SELECT rid_blob, payload FROM {spec.storage_table}"):
215
+ out.append((pickle.loads(rid_blob), pickle.loads(payload)))
216
+ return out
217
+
218
+
219
+ def _insert_row(spec: TableSpec, rid: Any, payload: dict[str, Any]) -> None:
220
+ with _lock, _connect() as conn:
221
+ conn.execute(
222
+ f"INSERT OR REPLACE INTO {spec.storage_table} (rid_blob, payload) VALUES (?, ?)",
223
+ (_rid_key(rid), pickle.dumps(payload)),
224
+ )
225
+
226
+
227
+ def _update_row(spec: TableSpec, rid: Any, updates: dict[str, Any]) -> bool:
228
+ with _lock, _connect() as conn:
229
+ row = conn.execute(
230
+ f"SELECT payload FROM {spec.storage_table} WHERE rid_blob = ?",
231
+ (_rid_key(rid),),
232
+ ).fetchone()
233
+ if row is None:
234
+ return False
235
+ payload = pickle.loads(row[0])
236
+ payload.update(updates)
237
+ conn.execute(
238
+ f"UPDATE {spec.storage_table} SET payload = ? WHERE rid_blob = ?",
239
+ (pickle.dumps(payload), _rid_key(rid)),
240
+ )
241
+ return True
242
+
243
+
244
+ def _delete_row(spec: TableSpec, rid: Any) -> bool:
245
+ with _lock, _connect() as conn:
246
+ cur = conn.execute(
247
+ f"DELETE FROM {spec.storage_table} WHERE rid_blob = ?",
248
+ (_rid_key(rid),),
249
+ )
250
+ return cur.rowcount > 0
251
+
252
+
253
+ def _next_int_rowid(spec: TableSpec) -> int:
254
+ """For the int64-rowid table, autoincrement-ish."""
255
+ with _lock, _connect() as conn:
256
+ rows = conn.execute(f"SELECT payload FROM {spec.storage_table}").fetchall()
257
+ # The int rowid is stored in the payload as ``__rid__`` for convenience
258
+ # of monotonic generation.
259
+ existing = [pickle.loads(p[0]).get("__rid__", 0) for p in rows]
260
+ return (max(existing) + 1) if existing else 1
261
+
262
+
263
+ # ---------------------------------------------------------------------------
264
+ # Strict schema verifier
265
+ # ---------------------------------------------------------------------------
266
+
267
+
268
+ def _strict_assert_schema(label: str, actual: pa.Schema, expected: pa.Schema) -> None:
269
+ """Hard-fail if ``actual`` doesn't bit-for-bit equal ``expected``.
270
+
271
+ The vgi C++ ``ReconcileBatchToSchema`` helper is what makes these
272
+ schemas equal — DuckDB on its own emits batches with all-nullable
273
+ fields, ``timestamp[us, tz=session]`` for TZ timestamps, and so on.
274
+ A mismatch here means reconciliation regressed.
275
+ """
276
+ if actual.equals(expected, check_metadata=False):
277
+ return
278
+
279
+ detail = []
280
+ if len(actual) != len(expected):
281
+ detail.append(f"field count: actual={len(actual)} expected={len(expected)}")
282
+ for i in range(min(len(actual), len(expected))):
283
+ af = actual.field(i)
284
+ ef = expected.field(i)
285
+ if af.name != ef.name:
286
+ detail.append(f"field[{i}].name: actual={af.name!r} expected={ef.name!r}")
287
+ if af.nullable != ef.nullable:
288
+ detail.append(f"field[{i}={af.name!r}].nullable: actual={af.nullable} expected={ef.nullable}")
289
+ if not af.type.equals(ef.type):
290
+ detail.append(f"field[{i}={af.name!r}].type: actual={af.type} expected={ef.type}")
291
+ raise ValueError(
292
+ f"[schema_reconcile] {label} batch schema mismatch (reconciliation regression?):\n"
293
+ + "\n".join(f" - {d}" for d in detail)
294
+ + f"\n actual: {actual}\n expected: {expected}"
295
+ )
296
+
297
+
298
+ # ---------------------------------------------------------------------------
299
+ # Handler functions
300
+ # ---------------------------------------------------------------------------
301
+
302
+
303
+ def _emit_count(out: OutputCollector, n: int) -> None:
304
+ out.emit(pa.RecordBatch.from_pydict({"count": [n]}, schema=_COUNT_SCHEMA))
305
+
306
+
307
+ def _spec_from_args(positional: tuple[Any, ...]) -> TableSpec:
308
+ if not positional or positional[0] is None:
309
+ raise ValueError("schema_reconcile handler: missing table_name positional[0]")
310
+ name = str(positional[0].as_py())
311
+ spec = TABLES.get(name)
312
+ if spec is None:
313
+ raise ValueError(f"schema_reconcile handler: unknown table {name!r}")
314
+ return spec
315
+
316
+
317
+ def _row_to_dict(batch: pa.RecordBatch, i: int, fields: list[str]) -> dict[str, Any]:
318
+ return {name: batch.column(name)[i].as_py() for name in fields}
319
+
320
+
321
+ def _generate_rowid(spec: TableSpec, payload: dict[str, Any]) -> Any:
322
+ """Synthesize a rowid for INSERT (no rowid column on input)."""
323
+ if spec.rowid_field.type.equals(pa.int64()):
324
+ rid = _next_int_rowid(spec)
325
+ payload["__rid__"] = rid
326
+ return rid
327
+ if isinstance(spec.rowid_field.type, pa.TimestampType):
328
+ # Use the row's `ts` column as a rowid — guaranteed unique enough
329
+ # for tests since tests insert distinct timestamps. Stored as the
330
+ # Python ``datetime`` value the user inserted.
331
+ return payload["ts"]
332
+ if isinstance(spec.rowid_field.type, pa.StructType):
333
+ # Project ``id`` -> a (NOT NULL int64) and ``nested.b`` -> b (nullable string).
334
+ return {"a": payload["id"], "b": payload["nested"].get("b")}
335
+ raise ValueError(f"unhandled rowid type: {spec.rowid_field.type}")
336
+
337
+
338
+ class SchemaReconcileInsert(TableInOutGenerator[None, None]):
339
+ """INSERT handler — asserts the input batch matches USER_SCHEMA exactly."""
340
+
341
+ class Meta:
342
+ name = "schema_reconcile_insert"
343
+ description = "INSERT handler for the schema_reconcile fixture"
344
+
345
+ @classmethod
346
+ def on_bind(cls, params: BindParams[None]) -> BindResponse:
347
+ return BindResponse(output_schema=_COUNT_SCHEMA)
348
+
349
+ @classmethod
350
+ def process(
351
+ cls,
352
+ params: ProcessParams[None],
353
+ state: None,
354
+ batch: pa.RecordBatch,
355
+ out: OutputCollector,
356
+ ) -> None:
357
+ assert params.init_call is not None
358
+ spec = _spec_from_args(params.init_call.bind_call.arguments.positional)
359
+ _strict_assert_schema(f"INSERT[{spec.name}]", batch.schema, USER_SCHEMA)
360
+ names = [f.name for f in USER_SCHEMA]
361
+ for i in range(batch.num_rows):
362
+ payload = _row_to_dict(batch, i, names)
363
+ rid = _generate_rowid(spec, payload)
364
+ _insert_row(spec, rid, payload)
365
+ _emit_count(out, batch.num_rows)
366
+
367
+
368
+ class SchemaReconcileUpdate(TableInOutGenerator[None, None]):
369
+ """UPDATE handler — assert rowid + selected user columns are present.
370
+
371
+ Asserts batch is rowid + selected user columns, every field with the
372
+ worker-declared flags/types intact.
373
+ """
374
+
375
+ class Meta:
376
+ name = "schema_reconcile_update"
377
+ description = "UPDATE handler for the schema_reconcile fixture"
378
+
379
+ @classmethod
380
+ def on_bind(cls, params: BindParams[None]) -> BindResponse:
381
+ return BindResponse(output_schema=_COUNT_SCHEMA)
382
+
383
+ @classmethod
384
+ def process(
385
+ cls,
386
+ params: ProcessParams[None],
387
+ state: None,
388
+ batch: pa.RecordBatch,
389
+ out: OutputCollector,
390
+ ) -> None:
391
+ assert params.init_call is not None
392
+ spec = _spec_from_args(params.init_call.bind_call.arguments.positional)
393
+ cols = batch.schema.names
394
+ if "rowid" not in cols:
395
+ raise ValueError(f"[schema_reconcile] UPDATE[{spec.name}] missing rowid column; got: {cols}")
396
+ full = spec.table_schema
397
+ for f in batch.schema:
398
+ expected = full.field(full.get_field_index(f.name))
399
+ if f.nullable != expected.nullable or not f.type.equals(expected.type):
400
+ raise ValueError(
401
+ f"[schema_reconcile] UPDATE[{spec.name}] field {f.name!r} mismatch "
402
+ f"(reconciliation regression?): "
403
+ f"actual=({f.type}, nullable={f.nullable}) "
404
+ f"expected=({expected.type}, nullable={expected.nullable})"
405
+ )
406
+
407
+ update_cols = [c for c in cols if c != "rowid"]
408
+ n = 0
409
+ for i in range(batch.num_rows):
410
+ rid = batch.column("rowid")[i].as_py()
411
+ updates = {c: batch.column(c)[i].as_py() for c in update_cols}
412
+ if _update_row(spec, rid, updates):
413
+ n += 1
414
+ _emit_count(out, n)
415
+
416
+
417
+ class SchemaReconcileDelete(TableInOutGenerator[None, None]):
418
+ """DELETE handler — asserts batch is rowid-only with declared flag/type."""
419
+
420
+ class Meta:
421
+ name = "schema_reconcile_delete"
422
+ description = "DELETE handler for the schema_reconcile fixture"
423
+
424
+ @classmethod
425
+ def on_bind(cls, params: BindParams[None]) -> BindResponse:
426
+ return BindResponse(output_schema=_COUNT_SCHEMA)
427
+
428
+ @classmethod
429
+ def process(
430
+ cls,
431
+ params: ProcessParams[None],
432
+ state: None,
433
+ batch: pa.RecordBatch,
434
+ out: OutputCollector,
435
+ ) -> None:
436
+ assert params.init_call is not None
437
+ spec = _spec_from_args(params.init_call.bind_call.arguments.positional)
438
+ _strict_assert_schema(f"DELETE[{spec.name}]", batch.schema, spec.delete_input_schema)
439
+ n = 0
440
+ for i in range(batch.num_rows):
441
+ rid = batch.column("rowid")[i].as_py()
442
+ if _delete_row(spec, rid):
443
+ n += 1
444
+ _emit_count(out, n)
445
+
446
+
447
+ class SchemaReconcileScan(TableFunctionGenerator[None, None]):
448
+ """SELECT handler — emits the table's stored rows in its declared schema."""
449
+
450
+ class Meta:
451
+ name = "schema_reconcile_scan"
452
+ description = "SCAN handler for the schema_reconcile fixture"
453
+ projection_pushdown = True
454
+
455
+ @classmethod
456
+ def on_bind(cls, params: BindParams[None]) -> BindResponse:
457
+ spec = _spec_from_args(params.bind_call.arguments.positional)
458
+ return BindResponse(output_schema=spec.table_schema)
459
+
460
+ @classmethod
461
+ def on_init(cls, params: InitParams[None]) -> GlobalInitResponse:
462
+ # One worker emits the full table; with parallel workers each
463
+ # would duplicate every row.
464
+ return GlobalInitResponse(max_workers=1)
465
+
466
+ @classmethod
467
+ def process(cls, params: ProcessParams[None], state: None, out: OutputCollector) -> None:
468
+ assert params.init_call is not None
469
+ spec = _spec_from_args(params.init_call.bind_call.arguments.positional)
470
+ out_schema = params.output_schema
471
+ rows: list[dict[str, Any]] = []
472
+ for rid, payload in _all_rows(spec):
473
+ full = {**payload, "rowid": rid}
474
+ # Don't emit the bookkeeping ``__rid__`` column.
475
+ full.pop("__rid__", None)
476
+ rows.append({name: full[name] for name in out_schema.names})
477
+ out.emit(pa.RecordBatch.from_pylist(rows, schema=out_schema))
478
+ out.finish()
479
+
480
+
481
+ # ---------------------------------------------------------------------------
482
+ # Catalog
483
+ # ---------------------------------------------------------------------------
484
+
485
+
486
+ def _serialize_schema(schema: pa.Schema) -> bytes:
487
+ sink = pa.BufferOutputStream()
488
+ with pa.ipc.new_stream(sink, schema):
489
+ pass
490
+ return sink.getvalue().to_pybytes()
491
+
492
+
493
+ _FUNCTIONS = [
494
+ SchemaReconcileInsert,
495
+ SchemaReconcileUpdate,
496
+ SchemaReconcileDelete,
497
+ SchemaReconcileScan,
498
+ ]
499
+
500
+
501
+ _CATALOG = Catalog(
502
+ name=CATALOG_NAME,
503
+ default_schema=_SCHEMA_NAME,
504
+ schemas=[
505
+ Schema(
506
+ name=_SCHEMA_NAME,
507
+ comment="Schema-reconcile fixture catalog",
508
+ functions=list(_FUNCTIONS),
509
+ tables=[],
510
+ ),
511
+ ],
512
+ )
513
+
514
+
515
+ class SchemaReconcileCatalog(ReadOnlyCatalogInterface):
516
+ """Catalog exposing the three writable schema-reconcile tables."""
517
+
518
+ catalog = _CATALOG
519
+ catalog_name = CATALOG_NAME
520
+
521
+ def _table_info(self, spec: TableSpec) -> TableInfo:
522
+ return TableInfo(
523
+ comment=f"Schema-reconcile {spec.name} (rowid type {spec.rowid_field.type})",
524
+ tags={},
525
+ name=spec.name,
526
+ schema_name=_SCHEMA_NAME,
527
+ columns=SerializedSchema(_serialize_schema(spec.table_schema)),
528
+ not_null_constraints=[],
529
+ unique_constraints=[],
530
+ check_constraints=[],
531
+ supports_insert=True,
532
+ supports_update=True,
533
+ supports_delete=True,
534
+ )
535
+
536
+ def schemas(
537
+ self, *, attach_opaque_data: AttachOpaqueData, transaction_opaque_data: TransactionOpaqueData | None
538
+ ) -> list[SchemaInfo]:
539
+ # The declarative ``Schema(tables=[])`` would auto-populate
540
+ # ``estimated_object_count[table] = 0``, which the C++ client treats
541
+ # as a hard guarantee and uses to skip the bulk RPC. But this catalog
542
+ # publishes tables via the ``schema_contents`` override below, not
543
+ # via the declarative ``tables=`` field — so the count is wrong.
544
+ # Override at the catalog level to report the real population.
545
+ infos = super().schemas(attach_opaque_data=attach_opaque_data, transaction_opaque_data=transaction_opaque_data)
546
+ for i, info in enumerate(infos):
547
+ if info.name == _SCHEMA_NAME:
548
+ infos[i] = SchemaInfo(
549
+ attach_opaque_data=info.attach_opaque_data,
550
+ name=info.name,
551
+ comment=info.comment,
552
+ tags=info.tags,
553
+ estimated_object_count={
554
+ **(info.estimated_object_count or {}),
555
+ "table": len(TABLES),
556
+ },
557
+ )
558
+ return infos
559
+
560
+ def schema_contents(
561
+ self,
562
+ *,
563
+ attach_opaque_data: AttachOpaqueData,
564
+ transaction_opaque_data: TransactionOpaqueData | None,
565
+ name: str,
566
+ type: Any,
567
+ ) -> Any:
568
+ if name.lower() == _SCHEMA_NAME and type == SchemaObjectType.TABLE:
569
+ return [self._table_info(spec) for spec in TABLES.values()]
570
+ return super().schema_contents(
571
+ attach_opaque_data=attach_opaque_data, transaction_opaque_data=transaction_opaque_data, name=name, type=type
572
+ )
573
+
574
+ def table_get(
575
+ self,
576
+ *,
577
+ attach_opaque_data: AttachOpaqueData,
578
+ transaction_opaque_data: TransactionOpaqueData | None,
579
+ schema_name: str,
580
+ name: str,
581
+ at_unit: str | None = None,
582
+ at_value: str | None = None,
583
+ ) -> TableInfo | None:
584
+ if schema_name.lower() != _SCHEMA_NAME:
585
+ return None
586
+ spec = TABLES.get(name.lower())
587
+ return self._table_info(spec) if spec else None
588
+
589
+ def _route(self, fn_name: str, schema_name: str, name: str) -> ScanFunctionResult:
590
+ return ScanFunctionResult(
591
+ function_name=fn_name,
592
+ positional_arguments=[pa.scalar(name, type=pa.string())],
593
+ named_arguments={},
594
+ required_extensions=[],
595
+ )
596
+
597
+ def table_scan_function_get(
598
+ self,
599
+ *,
600
+ attach_opaque_data: AttachOpaqueData,
601
+ transaction_opaque_data: TransactionOpaqueData | None,
602
+ schema_name: str,
603
+ name: str,
604
+ at_unit: str | None,
605
+ at_value: str | None,
606
+ ) -> ScanFunctionResult:
607
+ return self._route("schema_reconcile_scan", schema_name, name)
608
+
609
+ def table_insert_function_get(
610
+ self,
611
+ *,
612
+ attach_opaque_data: AttachOpaqueData,
613
+ transaction_opaque_data: TransactionOpaqueData | None,
614
+ schema_name: str,
615
+ name: str,
616
+ writable_branch_function_name: str | None = None,
617
+ ) -> ScanFunctionResult:
618
+ del writable_branch_function_name
619
+ return self._route("schema_reconcile_insert", schema_name, name)
620
+
621
+ def table_update_function_get(
622
+ self,
623
+ *,
624
+ attach_opaque_data: AttachOpaqueData,
625
+ transaction_opaque_data: TransactionOpaqueData | None,
626
+ schema_name: str,
627
+ name: str,
628
+ ) -> ScanFunctionResult:
629
+ return self._route("schema_reconcile_update", schema_name, name)
630
+
631
+ def table_delete_function_get(
632
+ self,
633
+ *,
634
+ attach_opaque_data: AttachOpaqueData,
635
+ transaction_opaque_data: TransactionOpaqueData | None,
636
+ schema_name: str,
637
+ name: str,
638
+ ) -> ScanFunctionResult:
639
+ return self._route("schema_reconcile_delete", schema_name, name)
640
+
641
+
642
+ # ---------------------------------------------------------------------------
643
+ # Worker
644
+ # ---------------------------------------------------------------------------
645
+
646
+
647
+ class SchemaReconcileWorker(Worker):
648
+ """Worker exposing the schema-reconcile fixture catalog."""
649
+
650
+ catalog_interface = SchemaReconcileCatalog
651
+ catalog_name = CATALOG_NAME
652
+ catalog = _CATALOG
653
+ functions = list(_FUNCTIONS)