vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/meta_worker.py ADDED
@@ -0,0 +1,661 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """MetaWorker — composes multiple Worker instances in a single process.
4
+
5
+ Each Worker manages its own catalog interface. The MetaWorker dispatches
6
+ VgiProtocol calls to the right Worker based on catalog name (for attach)
7
+ and wrapped attach_opaque_data (for everything else).
8
+
9
+ attach_opaque_data wrapping:
10
+ Each sub-worker may use the same underlying attach_opaque_data. The MetaWorker
11
+ prepends a 1-byte worker index to distinguish them:
12
+ wrapped = bytes([worker_index]) + original_attach_opaque_data
13
+
14
+ Usage::
15
+
16
+ MetaWorker.serve(ExampleWorker, WritableWorker)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import dataclasses
22
+ import logging
23
+ from typing import Any
24
+
25
+ from vgi_rpc.rpc import CallContext, Stream
26
+
27
+ from vgi.catalog.catalog_interface import AttachOpaqueData, CatalogAttachResult
28
+ from vgi.invocation import GlobalInitResponse
29
+ from vgi.protocol import (
30
+ BindRequest,
31
+ CatalogAttachRequest,
32
+ CatalogsResponse,
33
+ InitRequest,
34
+ ProcessState,
35
+ )
36
+ from vgi.worker import Worker
37
+
38
+ logger = logging.getLogger("vgi.meta_worker")
39
+
40
+
41
+ def _attach_opaque_data_short(attach_opaque_data: bytes | None) -> str:
42
+ """Stable, low-cardinality identifier for an attach_opaque_data, suitable for logs."""
43
+ if not attach_opaque_data:
44
+ return "-"
45
+ return attach_opaque_data.hex()[:16]
46
+
47
+
48
+ def _make_attach_delegate(name: str) -> Any:
49
+ """Create a method that unwraps attach_opaque_data and delegates to the right worker.
50
+
51
+ Copies the signature from Worker so vgi_rpc's validation passes.
52
+ """
53
+ import inspect
54
+
55
+ # Get the Worker method's signature to copy parameter names
56
+ worker_method = getattr(Worker, name)
57
+ sig = inspect.signature(worker_method)
58
+
59
+ def method(self: MetaWorker, **kwargs: Any) -> Any:
60
+ attach_opaque_data = kwargs.pop("attach_opaque_data")
61
+ worker, original_id = self._unwrap_attach_opaque_data(attach_opaque_data)
62
+ if logger.isEnabledFor(logging.DEBUG):
63
+ logger.debug(
64
+ "dispatch method=%s sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
65
+ name,
66
+ self._wrapped_index(attach_opaque_data),
67
+ _attach_opaque_data_short(attach_opaque_data),
68
+ _attach_opaque_data_short(original_id),
69
+ )
70
+ return getattr(worker, name)(attach_opaque_data=original_id, **kwargs)
71
+
72
+ # Copy the signature from the Worker method so vgi_rpc validation passes
73
+ method.__name__ = name
74
+ method.__qualname__ = f"MetaWorker.{name}"
75
+ method.__signature__ = sig # type: ignore[attr-defined]
76
+ return method
77
+
78
+
79
+ # Methods where attach_opaque_data is the first parameter (most catalog methods)
80
+ _ATTACH_ID_METHODS = [
81
+ "catalog_detach",
82
+ "catalog_version",
83
+ "catalog_transaction_begin",
84
+ "catalog_transaction_commit",
85
+ "catalog_transaction_rollback",
86
+ "catalog_schemas",
87
+ "catalog_schema_get",
88
+ "catalog_schema_create",
89
+ "catalog_schema_drop",
90
+ "catalog_schema_contents_tables",
91
+ "catalog_schema_contents_views",
92
+ "catalog_schema_contents_functions",
93
+ "catalog_schema_contents_macros",
94
+ "catalog_table_get",
95
+ "catalog_table_drop",
96
+ "catalog_table_scan_function_get",
97
+ "catalog_table_scan_branches_get",
98
+ "catalog_table_column_statistics_get",
99
+ "catalog_table_insert_function_get",
100
+ "catalog_table_update_function_get",
101
+ "catalog_table_delete_function_get",
102
+ "catalog_table_comment_set",
103
+ "catalog_table_column_comment_set",
104
+ "catalog_table_rename",
105
+ "catalog_table_column_add",
106
+ "catalog_table_column_drop",
107
+ "catalog_table_column_rename",
108
+ "catalog_table_column_default_set",
109
+ "catalog_table_column_default_drop",
110
+ "catalog_table_column_type_change",
111
+ "catalog_table_not_null_drop",
112
+ "catalog_table_not_null_set",
113
+ "catalog_view_get",
114
+ "catalog_view_create",
115
+ "catalog_view_drop",
116
+ "catalog_view_rename",
117
+ "catalog_view_comment_set",
118
+ "catalog_macro_get",
119
+ "catalog_macro_drop",
120
+ "catalog_index_get",
121
+ "catalog_index_drop",
122
+ "catalog_schema_contents_indexes",
123
+ ]
124
+
125
+
126
+ class MetaWorker:
127
+ """Composes multiple Worker instances, dispatching VgiProtocol calls.
128
+
129
+ Each Worker has its own catalog interface and function registry.
130
+ The MetaWorker wraps/unwraps attach_opaque_data values to route calls to the right worker.
131
+ """
132
+
133
+ def __init__(self, workers: list[Worker]) -> None:
134
+ """Initialize with a list of Worker instances."""
135
+ self._workers = workers
136
+ self._name_to_index: dict[str, int] = {}
137
+ # The HTTP transport's state-rehydration path expects the
138
+ # implementation to expose ``_vgi_tracer`` directly. Borrow the
139
+ # first worker's tracer; all workers in one process share whatever
140
+ # the otel config produced.
141
+ self._vgi_tracer = workers[0]._vgi_tracer
142
+
143
+ for i, w in enumerate(workers):
144
+ try:
145
+ cat = w._get_catalog()
146
+ for info in cat.catalogs():
147
+ self._name_to_index[info.name] = i
148
+ except ValueError:
149
+ pass
150
+
151
+ # Detailed startup record — each sub-worker's index → catalog mapping.
152
+ # The 1-byte index is the prefix MetaWorker prepends to attach_opaque_data values;
153
+ # making it explicit here means a stray ``wrapped_aid=…`` in a dispatch
154
+ # log can be cross-referenced without source diving.
155
+ if logger.isEnabledFor(logging.INFO):
156
+ mapping = [
157
+ {
158
+ "index": i,
159
+ "worker_class": type(w).__name__,
160
+ "catalogs": [name for name, idx in self._name_to_index.items() if idx == i],
161
+ }
162
+ for i, w in enumerate(workers)
163
+ ]
164
+ logger.info(
165
+ "MetaWorker initialized: %d workers, mapping=%s",
166
+ len(workers),
167
+ mapping,
168
+ )
169
+
170
+ def _resolve_function(self, request: BindRequest) -> Any:
171
+ """Dispatch function-class resolution to the worker that hosts it.
172
+
173
+ The HTTP state-rehydration path calls this on the implementation
174
+ without any attach_opaque_data, so route by function name across all
175
+ sub-workers.
176
+ """
177
+ for w in self._workers:
178
+ registry = type(w)._build_registry()
179
+ if request.function_name in registry:
180
+ return w._resolve_function(request)
181
+ msg = f"Unknown function: '{request.function_name}'"
182
+ raise ValueError(msg)
183
+
184
+ # ========== attach_opaque_data wrapping ==========
185
+ #
186
+ # Wire format of a MetaWorker-wrapped attach_opaque_data:
187
+ #
188
+ # [ 'M' 'W' 0x00 ][ <index byte> ][ <original attach_opaque_data bytes> ]
189
+ # ^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
190
+ # magic prefix sub-worker original attach_opaque_data as
191
+ # (3 bytes) index (1B) vended by the sub-worker
192
+ #
193
+ # The magic prefix means a wrapped attach_opaque_data is self-identifying: a
194
+ # 4-byte ``MW\0<idx>`` prefix is unmistakable in logs and storage shard
195
+ # ids (shows as ``att-4d570000...`` in hex). Without it, a bare 17-byte
196
+ # attach_opaque_data whose first byte happened to be ``\x00`` was indistinguishable
197
+ # from a wrapped ``[0][16 bytes]`` — making MetaWorker routing bugs
198
+ # silently mis-route (see the dynamic_to_string failure that motivated
199
+ # this marker).
200
+ #
201
+ # 4-byte total overhead (3 magic + 1 index). Sub-workers see the
202
+ # un-prefixed bytes and don't need to know MetaWorker exists.
203
+
204
+ _WRAP_MAGIC = b"MW\x00"
205
+ _WRAP_OVERHEAD = len(_WRAP_MAGIC) + 1 # magic + index byte
206
+
207
+ def _wrap_attach_opaque_data(self, worker_index: int, attach_opaque_data: bytes) -> bytes:
208
+ """Prepend MetaWorker magic + sub-worker index to a sub-worker's attach_opaque_data."""
209
+ return self._WRAP_MAGIC + bytes([worker_index]) + attach_opaque_data
210
+
211
+ def _is_wrapped(self, attach_opaque_data: bytes) -> bool:
212
+ """Return whether ``attach_opaque_data`` starts with the MetaWorker magic."""
213
+ return (
214
+ len(attach_opaque_data) >= self._WRAP_OVERHEAD
215
+ and attach_opaque_data[: len(self._WRAP_MAGIC)] == self._WRAP_MAGIC
216
+ )
217
+
218
+ def _wrapped_index(self, wrapped_id: bytes) -> int:
219
+ """Return the sub-worker index encoded in a wrapped attach_opaque_data.
220
+
221
+ Assumes ``_is_wrapped(wrapped_id)`` already returned True. Returns
222
+ -1 for shapes that don't match (defensive — callers use this only
223
+ for logging).
224
+ """
225
+ if not self._is_wrapped(wrapped_id):
226
+ return -1
227
+ return wrapped_id[len(self._WRAP_MAGIC)]
228
+
229
+ def _unwrap_attach_opaque_data(self, wrapped_id: bytes) -> tuple[Worker, bytes]:
230
+ """Verify the magic, then split into (sub-worker, original attach_opaque_data).
231
+
232
+ Raises ``KeyError`` when the magic is missing or the index byte is
233
+ out of range. Callers in this module catch and fall back to a
234
+ function-name-based registry scan (the legacy path for clients that
235
+ never round-tripped through ``catalog_attach``).
236
+ """
237
+ if not self._is_wrapped(wrapped_id):
238
+ raise KeyError(
239
+ f"attach_opaque_data is not MetaWorker-wrapped (missing magic): "
240
+ f"first8={wrapped_id[:8].hex() if wrapped_id else '-'}"
241
+ )
242
+ idx = wrapped_id[len(self._WRAP_MAGIC)]
243
+ original = wrapped_id[self._WRAP_OVERHEAD :]
244
+ if idx >= len(self._workers):
245
+ raise KeyError(f"MetaWorker sub-worker index {idx} out of range (have {len(self._workers)} workers)")
246
+ return self._workers[idx], original
247
+
248
+ # ========== Catalog listing ==========
249
+
250
+ def catalog_catalogs(self) -> CatalogsResponse:
251
+ """Return union of all catalog discovery records across all workers."""
252
+ infos = []
253
+ for w in self._workers:
254
+ try:
255
+ cat = w._get_catalog()
256
+ infos.extend(cat.catalogs())
257
+ except ValueError:
258
+ pass
259
+ return CatalogsResponse.from_infos(infos)
260
+
261
+ # ========== Catalog attach (dispatch by name, wrap result) ==========
262
+
263
+ def catalog_attach(
264
+ self,
265
+ request: CatalogAttachRequest,
266
+ *,
267
+ ctx: CallContext | None = None,
268
+ ) -> CatalogAttachResult:
269
+ """Attach to a catalog — dispatch by name with dynamic fallback."""
270
+ idx = self._name_to_index.get(request.name)
271
+
272
+ if idx is not None:
273
+ result = self._workers[idx].catalog_attach(request, ctx=ctx)
274
+ else:
275
+ for i, w in enumerate(self._workers):
276
+ try:
277
+ result = w.catalog_attach(request, ctx=ctx)
278
+ idx = i
279
+ self._name_to_index[request.name] = i
280
+ break
281
+ except (ValueError, NotImplementedError):
282
+ continue
283
+ else:
284
+ msg = f"No worker handles catalog '{request.name}'"
285
+ raise ValueError(msg)
286
+
287
+ wrapped = self._wrap_attach_opaque_data(idx, result.attach_opaque_data)
288
+ if logger.isEnabledFor(logging.DEBUG):
289
+ logger.debug(
290
+ "catalog_attach catalog=%r sub_worker_index=%d original_aid=%s wrapped_aid=%s",
291
+ request.name,
292
+ idx,
293
+ _attach_opaque_data_short(result.attach_opaque_data),
294
+ _attach_opaque_data_short(wrapped),
295
+ )
296
+ return dataclasses.replace(result, attach_opaque_data=AttachOpaqueData(wrapped))
297
+
298
+ # ========== Name-based dispatch (no attach_opaque_data) ==========
299
+
300
+ def catalog_create(self, request: Any) -> None:
301
+ """Create a catalog — dispatch to first worker that handles it."""
302
+ for w in self._workers:
303
+ try:
304
+ w.catalog_create(request)
305
+ return
306
+ except (ValueError, NotImplementedError):
307
+ continue
308
+ msg = f"No worker handles catalog_create for '{request.name}'"
309
+ raise ValueError(msg)
310
+
311
+ def catalog_drop(self, name: str) -> None:
312
+ """Drop a catalog — dispatch to the worker that owns it."""
313
+ idx = self._name_to_index.get(name)
314
+ if idx is not None:
315
+ self._workers[idx].catalog_drop(name=name)
316
+ else:
317
+ msg = f"No worker owns catalog '{name}'"
318
+ raise ValueError(msg)
319
+
320
+ # ========== Request-object methods (attach_opaque_data inside request) ==========
321
+
322
+ def catalog_table_create(self, request: Any) -> None:
323
+ """Create a table — dispatch via attach_opaque_data in request."""
324
+ worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
325
+ patched = dataclasses.replace(request, attach_opaque_data=original_id)
326
+ worker.catalog_table_create(patched)
327
+
328
+ def catalog_macro_create(self, request: Any) -> None:
329
+ """Create a macro — dispatch via attach_opaque_data in request."""
330
+ worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
331
+ patched = dataclasses.replace(request, attach_opaque_data=original_id)
332
+ worker.catalog_macro_create(patched)
333
+
334
+ def catalog_index_create(self, request: Any) -> None:
335
+ """Create an index — dispatch via attach_opaque_data in request."""
336
+ worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
337
+ patched = dataclasses.replace(request, attach_opaque_data=original_id)
338
+ worker.catalog_index_create(patched)
339
+
340
+ # ========== bind / init (unwrap attach_opaque_data from request) ==========
341
+
342
+ def bind(self, request: BindRequest, ctx: CallContext) -> Any:
343
+ """Dispatch bind to the right worker."""
344
+ if request.attach_opaque_data:
345
+ try:
346
+ worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
347
+ if logger.isEnabledFor(logging.DEBUG):
348
+ logger.debug(
349
+ "dispatch method=bind function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
350
+ request.function_name,
351
+ self._wrapped_index(request.attach_opaque_data),
352
+ _attach_opaque_data_short(request.attach_opaque_data),
353
+ _attach_opaque_data_short(original_id),
354
+ )
355
+ request = dataclasses.replace(request, attach_opaque_data=original_id)
356
+ return worker.bind(request, ctx=ctx)
357
+ except (IndexError, KeyError):
358
+ pass # Invalid wrapped id — fall through to registry search
359
+
360
+ for w in self._workers:
361
+ registry = type(w)._build_registry()
362
+ if request.function_name in registry:
363
+ logger.debug(
364
+ "dispatch method=bind function=%r fallback=registry_scan",
365
+ request.function_name,
366
+ )
367
+ return w.bind(request, ctx=ctx)
368
+
369
+ msg = f"Unknown function '{request.function_name}'"
370
+ raise ValueError(msg)
371
+
372
+ def init(self, request: InitRequest, ctx: CallContext) -> Stream[ProcessState, GlobalInitResponse]:
373
+ """Dispatch init to the right worker."""
374
+ if request.bind_call and request.bind_call.attach_opaque_data:
375
+ try:
376
+ worker, original_id = self._unwrap_attach_opaque_data(request.bind_call.attach_opaque_data)
377
+ if logger.isEnabledFor(logging.DEBUG):
378
+ logger.debug(
379
+ "dispatch method=init function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
380
+ request.bind_call.function_name,
381
+ self._wrapped_index(request.bind_call.attach_opaque_data),
382
+ _attach_opaque_data_short(request.bind_call.attach_opaque_data),
383
+ _attach_opaque_data_short(original_id),
384
+ )
385
+ bind_call = dataclasses.replace(request.bind_call, attach_opaque_data=original_id)
386
+ request = dataclasses.replace(request, bind_call=bind_call)
387
+ return worker.init(request, ctx=ctx)
388
+ except (IndexError, KeyError):
389
+ pass # Invalid wrapped id — fall through
390
+
391
+ fn_name = request.bind_call.function_name if request.bind_call else ""
392
+ for w in self._workers:
393
+ registry = type(w)._build_registry()
394
+ if fn_name in registry:
395
+ logger.debug(
396
+ "dispatch method=init function=%r fallback=registry_scan",
397
+ fn_name,
398
+ )
399
+ return w.init(request, ctx=ctx)
400
+
401
+ msg = f"Unknown function '{fn_name}'"
402
+ raise ValueError(msg)
403
+
404
+ def _unwrap_bind_call_attach_opaque_data(
405
+ self,
406
+ request: Any,
407
+ *,
408
+ method_name: str = "?",
409
+ ) -> tuple[Any, Any | None]:
410
+ """Resolve the target sub-worker by unwrapping ``request.bind_call.attach_opaque_data``.
411
+
412
+ Returns ``(patched_request, worker)`` where ``patched_request`` has the
413
+ unwrapped (sub-worker-relative) attach_opaque_data and ``worker`` is the matching
414
+ sub-worker. Returns ``(request, None)`` when the attach_opaque_data is missing or
415
+ unwrapping fails — caller falls back to a registry scan by function name.
416
+
417
+ Mirrors the unwrap that ``init``/``bind`` already perform for the
418
+ wrapped attach_opaque_data MetaWorker prepends. Without this, sibling RPCs that
419
+ carry ``bind_call.attach_opaque_data`` (cardinality, statistics, dynamic_to_string)
420
+ would deliver the wrapped 18-byte id to the sub-worker — which then
421
+ derives a different shard_key than ``init``/``process`` use for the
422
+ same logical attach, so storage reads land on the wrong DO.
423
+ """
424
+ bind_call = getattr(request, "bind_call", None)
425
+ if bind_call is None:
426
+ return request, None
427
+ wrapped_aid = getattr(bind_call, "attach_opaque_data", None)
428
+ if not wrapped_aid:
429
+ return request, None
430
+ try:
431
+ worker, original_id = self._unwrap_attach_opaque_data(wrapped_aid)
432
+ except (IndexError, KeyError):
433
+ return request, None
434
+ if logger.isEnabledFor(logging.DEBUG):
435
+ logger.debug(
436
+ "dispatch method=%s function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
437
+ method_name,
438
+ getattr(bind_call, "function_name", "?"),
439
+ self._wrapped_index(wrapped_aid),
440
+ _attach_opaque_data_short(wrapped_aid),
441
+ _attach_opaque_data_short(original_id),
442
+ )
443
+ patched_bind_call = dataclasses.replace(bind_call, attach_opaque_data=original_id)
444
+ patched_request = dataclasses.replace(request, bind_call=patched_bind_call)
445
+ return patched_request, worker
446
+
447
+ def table_function_cardinality(self, request: Any, ctx: CallContext) -> Any:
448
+ """Dispatch cardinality estimation to the right worker."""
449
+ patched, worker = self._unwrap_bind_call_attach_opaque_data(
450
+ request,
451
+ method_name="table_function_cardinality",
452
+ )
453
+ if worker is not None:
454
+ return worker.table_function_cardinality(patched, ctx=ctx)
455
+ fn_name = request.bind_call.function_name if request.bind_call else ""
456
+ for w in self._workers:
457
+ registry = type(w)._build_registry()
458
+ if fn_name in registry:
459
+ return w.table_function_cardinality(request, ctx=ctx)
460
+ msg = f"Unknown function '{fn_name}'"
461
+ raise ValueError(msg)
462
+
463
+ def table_function_statistics(self, request: Any, ctx: CallContext) -> Any:
464
+ """Dispatch per-column statistics lookup to the right worker."""
465
+ patched, worker = self._unwrap_bind_call_attach_opaque_data(
466
+ request,
467
+ method_name="table_function_statistics",
468
+ )
469
+ if worker is not None:
470
+ return worker.table_function_statistics(patched, ctx=ctx)
471
+ fn_name = request.bind_call.function_name if request.bind_call else ""
472
+ for w in self._workers:
473
+ registry = type(w)._build_registry()
474
+ if fn_name in registry:
475
+ return w.table_function_statistics(request, ctx=ctx)
476
+ msg = f"Unknown function '{fn_name}'"
477
+ raise ValueError(msg)
478
+
479
+ def table_function_dynamic_to_string(self, request: Any, ctx: CallContext) -> Any:
480
+ """Dispatch the dynamic_to_string profiler hook to the right worker."""
481
+ patched, worker = self._unwrap_bind_call_attach_opaque_data(
482
+ request,
483
+ method_name="table_function_dynamic_to_string",
484
+ )
485
+ if worker is not None:
486
+ return worker.table_function_dynamic_to_string(patched, ctx=ctx)
487
+ fn_name = request.bind_call.function_name if request.bind_call else ""
488
+ for w in self._workers:
489
+ registry = type(w)._build_registry()
490
+ if fn_name in registry:
491
+ return w.table_function_dynamic_to_string(request, ctx=ctx)
492
+ # Function not registered with any worker — return empty rather than
493
+ # raising. EXPLAIN ANALYZE must never break the query.
494
+ from vgi.protocol import TableFunctionDynamicToStringResponse
495
+
496
+ return TableFunctionDynamicToStringResponse(keys=[], values=[])
497
+
498
+ # ========== Aggregate function dispatch ==========
499
+
500
+ def _dispatch_aggregate(self, request: Any, method_name: str, ctx: CallContext) -> Any:
501
+ """Dispatch an aggregate RPC to the right worker by function_name."""
502
+ fn_name = getattr(request, "function_name", "")
503
+ if hasattr(request, "attach_opaque_data") and request.attach_opaque_data:
504
+ try:
505
+ worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
506
+ if logger.isEnabledFor(logging.DEBUG):
507
+ logger.debug(
508
+ "dispatch method=%s function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
509
+ method_name,
510
+ fn_name,
511
+ self._wrapped_index(request.attach_opaque_data),
512
+ _attach_opaque_data_short(request.attach_opaque_data),
513
+ _attach_opaque_data_short(original_id),
514
+ )
515
+ request = dataclasses.replace(request, attach_opaque_data=original_id)
516
+ return getattr(worker, method_name)(request, ctx=ctx)
517
+ except (IndexError, KeyError):
518
+ pass
519
+ for w in self._workers:
520
+ registry = type(w)._build_registry()
521
+ if fn_name in registry:
522
+ logger.debug(
523
+ "dispatch method=%s function=%r fallback=registry_scan",
524
+ method_name,
525
+ fn_name,
526
+ )
527
+ return getattr(w, method_name)(request, ctx=ctx)
528
+ raise ValueError(f"Unknown aggregate function '{fn_name}'")
529
+
530
+ def aggregate_bind(self, request: Any, ctx: CallContext) -> Any:
531
+ """Dispatch aggregate_bind to the right worker."""
532
+ return self._dispatch_aggregate(request, "aggregate_bind", ctx)
533
+
534
+ def aggregate_update(self, request: Any, ctx: CallContext) -> Any:
535
+ """Dispatch aggregate_update to the right worker."""
536
+ return self._dispatch_aggregate(request, "aggregate_update", ctx)
537
+
538
+ def aggregate_combine(self, request: Any, ctx: CallContext) -> Any:
539
+ """Dispatch aggregate_combine to the right worker."""
540
+ return self._dispatch_aggregate(request, "aggregate_combine", ctx)
541
+
542
+ def aggregate_finalize(self, request: Any, ctx: CallContext) -> Any:
543
+ """Dispatch aggregate_finalize to the right worker."""
544
+ return self._dispatch_aggregate(request, "aggregate_finalize", ctx)
545
+
546
+ def aggregate_destructor(self, request: Any, ctx: CallContext) -> Any:
547
+ """Dispatch aggregate_destructor to the right worker."""
548
+ return self._dispatch_aggregate(request, "aggregate_destructor", ctx)
549
+
550
+ def aggregate_window_init(self, request: Any, ctx: CallContext) -> Any:
551
+ """Dispatch aggregate_window_init to the right worker."""
552
+ return self._dispatch_aggregate(request, "aggregate_window_init", ctx)
553
+
554
+ def aggregate_window(self, request: Any, ctx: CallContext) -> Any:
555
+ """Dispatch aggregate_window to the right worker."""
556
+ return self._dispatch_aggregate(request, "aggregate_window", ctx)
557
+
558
+ def aggregate_window_destructor(self, request: Any, ctx: CallContext) -> Any:
559
+ """Dispatch aggregate_window_destructor to the right worker."""
560
+ return self._dispatch_aggregate(request, "aggregate_window_destructor", ctx)
561
+
562
+ def aggregate_window_batch(self, request: Any, ctx: CallContext) -> Any:
563
+ """Dispatch aggregate_window_batch to the right worker."""
564
+ return self._dispatch_aggregate(request, "aggregate_window_batch", ctx)
565
+
566
+ def aggregate_streaming_open(self, request: Any, ctx: CallContext) -> Any:
567
+ """Dispatch aggregate_streaming_open to the right worker."""
568
+ return self._dispatch_aggregate(request, "aggregate_streaming_open", ctx)
569
+
570
+ def aggregate_streaming_chunk(self, request: Any, ctx: CallContext) -> Any:
571
+ """Dispatch aggregate_streaming_chunk to the right worker."""
572
+ return self._dispatch_aggregate(request, "aggregate_streaming_chunk", ctx)
573
+
574
+ def aggregate_streaming_close(self, request: Any, ctx: CallContext) -> Any:
575
+ """Dispatch aggregate_streaming_close to the right worker."""
576
+ return self._dispatch_aggregate(request, "aggregate_streaming_close", ctx)
577
+
578
+ # ========== Buffered table function dispatch ==========
579
+ # Routing key is function_name (same as aggregate). The underlying
580
+ # _dispatch_aggregate helper isn't aggregate-specific — it just looks up
581
+ # the function by name in each worker's registry.
582
+
583
+ def table_buffering_process(self, request: Any, ctx: CallContext) -> Any:
584
+ """Dispatch table_buffering_process to the right worker."""
585
+ return self._dispatch_aggregate(request, "table_buffering_process", ctx)
586
+
587
+ def table_buffering_combine(self, request: Any, ctx: CallContext) -> Any:
588
+ """Dispatch table_buffering_combine to the right worker."""
589
+ return self._dispatch_aggregate(request, "table_buffering_combine", ctx)
590
+
591
+ def table_buffering_destructor(self, request: Any, ctx: CallContext) -> Any:
592
+ """Dispatch table_buffering_destructor to the right worker."""
593
+ return self._dispatch_aggregate(request, "table_buffering_destructor", ctx)
594
+
595
+ def _load_table_buffering_params(
596
+ self,
597
+ request: Any,
598
+ ctx: CallContext,
599
+ *,
600
+ attach_already_unwrapped: bool = False,
601
+ ) -> Any:
602
+ """Dispatch the finalize-tick driver's cold-load to the right worker.
603
+
604
+ ``run_table_buffering_finalize_tick`` calls this via
605
+ ``ctx.implementation._load_table_buffering_params(...)``. Under
606
+ MetaWorker, the wrapped attach_opaque_data steers us to the right
607
+ sub-worker; we unwrap the meta-prefix and delegate.
608
+
609
+ ``attach_already_unwrapped`` is forwarded to the sub-worker — see
610
+ ``Worker._load_table_buffering_params`` for semantics.
611
+ """
612
+ fn_name = getattr(request, "function_name", "")
613
+ if hasattr(request, "attach_opaque_data") and request.attach_opaque_data:
614
+ try:
615
+ worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
616
+ request = dataclasses.replace(request, attach_opaque_data=original_id)
617
+ return worker._load_table_buffering_params(
618
+ request,
619
+ ctx,
620
+ attach_already_unwrapped=attach_already_unwrapped,
621
+ )
622
+ except (IndexError, KeyError):
623
+ pass
624
+ for w in self._workers:
625
+ registry = type(w)._build_registry()
626
+ if fn_name in registry:
627
+ return w._load_table_buffering_params(
628
+ request,
629
+ ctx,
630
+ attach_already_unwrapped=attach_already_unwrapped,
631
+ )
632
+ raise ValueError(f"Unknown table_buffering function '{fn_name}'")
633
+
634
+ # ========== Serve entry point ==========
635
+
636
+ @classmethod
637
+ def serve(cls, *worker_classes: type[Worker]) -> None:
638
+ """Instantiate workers and serve via vgi_rpc.
639
+
640
+ Defaults to stdin/stdout for the subprocess transport; passes
641
+ argv through to ``run_server()`` so the worker also participates
642
+ in the AF_UNIX launcher path when launched with
643
+ ``--unix PATH --idle-timeout SEC`` (the vgi C++ extension uses
644
+ this to share warm workers across DuckDB processes).
645
+ """
646
+ from vgi_rpc.rpc import run_server
647
+
648
+ from vgi.protocol import VgiProtocol
649
+
650
+ # Log startup (some tests check that stderr has output)
651
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
652
+ logger.info("worker_starting")
653
+
654
+ workers = [wc() for wc in worker_classes]
655
+ meta = cls(workers)
656
+ run_server(VgiProtocol, meta)
657
+
658
+
659
+ # Register all attach_opaque_data-based delegate methods on MetaWorker
660
+ for _method_name in _ATTACH_ID_METHODS:
661
+ setattr(MetaWorker, _method_name, _make_attach_delegate(_method_name))