vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/meta_worker.py
ADDED
|
@@ -0,0 +1,661 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""MetaWorker — composes multiple Worker instances in a single process.
|
|
4
|
+
|
|
5
|
+
Each Worker manages its own catalog interface. The MetaWorker dispatches
|
|
6
|
+
VgiProtocol calls to the right Worker based on catalog name (for attach)
|
|
7
|
+
and wrapped attach_opaque_data (for everything else).
|
|
8
|
+
|
|
9
|
+
attach_opaque_data wrapping:
|
|
10
|
+
Each sub-worker may use the same underlying attach_opaque_data. The MetaWorker
|
|
11
|
+
prepends a 1-byte worker index to distinguish them:
|
|
12
|
+
wrapped = bytes([worker_index]) + original_attach_opaque_data
|
|
13
|
+
|
|
14
|
+
Usage::
|
|
15
|
+
|
|
16
|
+
MetaWorker.serve(ExampleWorker, WritableWorker)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import dataclasses
|
|
22
|
+
import logging
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from vgi_rpc.rpc import CallContext, Stream
|
|
26
|
+
|
|
27
|
+
from vgi.catalog.catalog_interface import AttachOpaqueData, CatalogAttachResult
|
|
28
|
+
from vgi.invocation import GlobalInitResponse
|
|
29
|
+
from vgi.protocol import (
|
|
30
|
+
BindRequest,
|
|
31
|
+
CatalogAttachRequest,
|
|
32
|
+
CatalogsResponse,
|
|
33
|
+
InitRequest,
|
|
34
|
+
ProcessState,
|
|
35
|
+
)
|
|
36
|
+
from vgi.worker import Worker
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger("vgi.meta_worker")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _attach_opaque_data_short(attach_opaque_data: bytes | None) -> str:
|
|
42
|
+
"""Stable, low-cardinality identifier for an attach_opaque_data, suitable for logs."""
|
|
43
|
+
if not attach_opaque_data:
|
|
44
|
+
return "-"
|
|
45
|
+
return attach_opaque_data.hex()[:16]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _make_attach_delegate(name: str) -> Any:
|
|
49
|
+
"""Create a method that unwraps attach_opaque_data and delegates to the right worker.
|
|
50
|
+
|
|
51
|
+
Copies the signature from Worker so vgi_rpc's validation passes.
|
|
52
|
+
"""
|
|
53
|
+
import inspect
|
|
54
|
+
|
|
55
|
+
# Get the Worker method's signature to copy parameter names
|
|
56
|
+
worker_method = getattr(Worker, name)
|
|
57
|
+
sig = inspect.signature(worker_method)
|
|
58
|
+
|
|
59
|
+
def method(self: MetaWorker, **kwargs: Any) -> Any:
|
|
60
|
+
attach_opaque_data = kwargs.pop("attach_opaque_data")
|
|
61
|
+
worker, original_id = self._unwrap_attach_opaque_data(attach_opaque_data)
|
|
62
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
63
|
+
logger.debug(
|
|
64
|
+
"dispatch method=%s sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
|
|
65
|
+
name,
|
|
66
|
+
self._wrapped_index(attach_opaque_data),
|
|
67
|
+
_attach_opaque_data_short(attach_opaque_data),
|
|
68
|
+
_attach_opaque_data_short(original_id),
|
|
69
|
+
)
|
|
70
|
+
return getattr(worker, name)(attach_opaque_data=original_id, **kwargs)
|
|
71
|
+
|
|
72
|
+
# Copy the signature from the Worker method so vgi_rpc validation passes
|
|
73
|
+
method.__name__ = name
|
|
74
|
+
method.__qualname__ = f"MetaWorker.{name}"
|
|
75
|
+
method.__signature__ = sig # type: ignore[attr-defined]
|
|
76
|
+
return method
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# Methods where attach_opaque_data is the first parameter (most catalog methods)
|
|
80
|
+
_ATTACH_ID_METHODS = [
|
|
81
|
+
"catalog_detach",
|
|
82
|
+
"catalog_version",
|
|
83
|
+
"catalog_transaction_begin",
|
|
84
|
+
"catalog_transaction_commit",
|
|
85
|
+
"catalog_transaction_rollback",
|
|
86
|
+
"catalog_schemas",
|
|
87
|
+
"catalog_schema_get",
|
|
88
|
+
"catalog_schema_create",
|
|
89
|
+
"catalog_schema_drop",
|
|
90
|
+
"catalog_schema_contents_tables",
|
|
91
|
+
"catalog_schema_contents_views",
|
|
92
|
+
"catalog_schema_contents_functions",
|
|
93
|
+
"catalog_schema_contents_macros",
|
|
94
|
+
"catalog_table_get",
|
|
95
|
+
"catalog_table_drop",
|
|
96
|
+
"catalog_table_scan_function_get",
|
|
97
|
+
"catalog_table_scan_branches_get",
|
|
98
|
+
"catalog_table_column_statistics_get",
|
|
99
|
+
"catalog_table_insert_function_get",
|
|
100
|
+
"catalog_table_update_function_get",
|
|
101
|
+
"catalog_table_delete_function_get",
|
|
102
|
+
"catalog_table_comment_set",
|
|
103
|
+
"catalog_table_column_comment_set",
|
|
104
|
+
"catalog_table_rename",
|
|
105
|
+
"catalog_table_column_add",
|
|
106
|
+
"catalog_table_column_drop",
|
|
107
|
+
"catalog_table_column_rename",
|
|
108
|
+
"catalog_table_column_default_set",
|
|
109
|
+
"catalog_table_column_default_drop",
|
|
110
|
+
"catalog_table_column_type_change",
|
|
111
|
+
"catalog_table_not_null_drop",
|
|
112
|
+
"catalog_table_not_null_set",
|
|
113
|
+
"catalog_view_get",
|
|
114
|
+
"catalog_view_create",
|
|
115
|
+
"catalog_view_drop",
|
|
116
|
+
"catalog_view_rename",
|
|
117
|
+
"catalog_view_comment_set",
|
|
118
|
+
"catalog_macro_get",
|
|
119
|
+
"catalog_macro_drop",
|
|
120
|
+
"catalog_index_get",
|
|
121
|
+
"catalog_index_drop",
|
|
122
|
+
"catalog_schema_contents_indexes",
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class MetaWorker:
|
|
127
|
+
"""Composes multiple Worker instances, dispatching VgiProtocol calls.
|
|
128
|
+
|
|
129
|
+
Each Worker has its own catalog interface and function registry.
|
|
130
|
+
The MetaWorker wraps/unwraps attach_opaque_data values to route calls to the right worker.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
def __init__(self, workers: list[Worker]) -> None:
|
|
134
|
+
"""Initialize with a list of Worker instances."""
|
|
135
|
+
self._workers = workers
|
|
136
|
+
self._name_to_index: dict[str, int] = {}
|
|
137
|
+
# The HTTP transport's state-rehydration path expects the
|
|
138
|
+
# implementation to expose ``_vgi_tracer`` directly. Borrow the
|
|
139
|
+
# first worker's tracer; all workers in one process share whatever
|
|
140
|
+
# the otel config produced.
|
|
141
|
+
self._vgi_tracer = workers[0]._vgi_tracer
|
|
142
|
+
|
|
143
|
+
for i, w in enumerate(workers):
|
|
144
|
+
try:
|
|
145
|
+
cat = w._get_catalog()
|
|
146
|
+
for info in cat.catalogs():
|
|
147
|
+
self._name_to_index[info.name] = i
|
|
148
|
+
except ValueError:
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
# Detailed startup record — each sub-worker's index → catalog mapping.
|
|
152
|
+
# The 1-byte index is the prefix MetaWorker prepends to attach_opaque_data values;
|
|
153
|
+
# making it explicit here means a stray ``wrapped_aid=…`` in a dispatch
|
|
154
|
+
# log can be cross-referenced without source diving.
|
|
155
|
+
if logger.isEnabledFor(logging.INFO):
|
|
156
|
+
mapping = [
|
|
157
|
+
{
|
|
158
|
+
"index": i,
|
|
159
|
+
"worker_class": type(w).__name__,
|
|
160
|
+
"catalogs": [name for name, idx in self._name_to_index.items() if idx == i],
|
|
161
|
+
}
|
|
162
|
+
for i, w in enumerate(workers)
|
|
163
|
+
]
|
|
164
|
+
logger.info(
|
|
165
|
+
"MetaWorker initialized: %d workers, mapping=%s",
|
|
166
|
+
len(workers),
|
|
167
|
+
mapping,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def _resolve_function(self, request: BindRequest) -> Any:
|
|
171
|
+
"""Dispatch function-class resolution to the worker that hosts it.
|
|
172
|
+
|
|
173
|
+
The HTTP state-rehydration path calls this on the implementation
|
|
174
|
+
without any attach_opaque_data, so route by function name across all
|
|
175
|
+
sub-workers.
|
|
176
|
+
"""
|
|
177
|
+
for w in self._workers:
|
|
178
|
+
registry = type(w)._build_registry()
|
|
179
|
+
if request.function_name in registry:
|
|
180
|
+
return w._resolve_function(request)
|
|
181
|
+
msg = f"Unknown function: '{request.function_name}'"
|
|
182
|
+
raise ValueError(msg)
|
|
183
|
+
|
|
184
|
+
# ========== attach_opaque_data wrapping ==========
|
|
185
|
+
#
|
|
186
|
+
# Wire format of a MetaWorker-wrapped attach_opaque_data:
|
|
187
|
+
#
|
|
188
|
+
# [ 'M' 'W' 0x00 ][ <index byte> ][ <original attach_opaque_data bytes> ]
|
|
189
|
+
# ^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
190
|
+
# magic prefix sub-worker original attach_opaque_data as
|
|
191
|
+
# (3 bytes) index (1B) vended by the sub-worker
|
|
192
|
+
#
|
|
193
|
+
# The magic prefix means a wrapped attach_opaque_data is self-identifying: a
|
|
194
|
+
# 4-byte ``MW\0<idx>`` prefix is unmistakable in logs and storage shard
|
|
195
|
+
# ids (shows as ``att-4d570000...`` in hex). Without it, a bare 17-byte
|
|
196
|
+
# attach_opaque_data whose first byte happened to be ``\x00`` was indistinguishable
|
|
197
|
+
# from a wrapped ``[0][16 bytes]`` — making MetaWorker routing bugs
|
|
198
|
+
# silently mis-route (see the dynamic_to_string failure that motivated
|
|
199
|
+
# this marker).
|
|
200
|
+
#
|
|
201
|
+
# 4-byte total overhead (3 magic + 1 index). Sub-workers see the
|
|
202
|
+
# un-prefixed bytes and don't need to know MetaWorker exists.
|
|
203
|
+
|
|
204
|
+
_WRAP_MAGIC = b"MW\x00"
|
|
205
|
+
_WRAP_OVERHEAD = len(_WRAP_MAGIC) + 1 # magic + index byte
|
|
206
|
+
|
|
207
|
+
def _wrap_attach_opaque_data(self, worker_index: int, attach_opaque_data: bytes) -> bytes:
|
|
208
|
+
"""Prepend MetaWorker magic + sub-worker index to a sub-worker's attach_opaque_data."""
|
|
209
|
+
return self._WRAP_MAGIC + bytes([worker_index]) + attach_opaque_data
|
|
210
|
+
|
|
211
|
+
def _is_wrapped(self, attach_opaque_data: bytes) -> bool:
|
|
212
|
+
"""Return whether ``attach_opaque_data`` starts with the MetaWorker magic."""
|
|
213
|
+
return (
|
|
214
|
+
len(attach_opaque_data) >= self._WRAP_OVERHEAD
|
|
215
|
+
and attach_opaque_data[: len(self._WRAP_MAGIC)] == self._WRAP_MAGIC
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def _wrapped_index(self, wrapped_id: bytes) -> int:
|
|
219
|
+
"""Return the sub-worker index encoded in a wrapped attach_opaque_data.
|
|
220
|
+
|
|
221
|
+
Assumes ``_is_wrapped(wrapped_id)`` already returned True. Returns
|
|
222
|
+
-1 for shapes that don't match (defensive — callers use this only
|
|
223
|
+
for logging).
|
|
224
|
+
"""
|
|
225
|
+
if not self._is_wrapped(wrapped_id):
|
|
226
|
+
return -1
|
|
227
|
+
return wrapped_id[len(self._WRAP_MAGIC)]
|
|
228
|
+
|
|
229
|
+
def _unwrap_attach_opaque_data(self, wrapped_id: bytes) -> tuple[Worker, bytes]:
|
|
230
|
+
"""Verify the magic, then split into (sub-worker, original attach_opaque_data).
|
|
231
|
+
|
|
232
|
+
Raises ``KeyError`` when the magic is missing or the index byte is
|
|
233
|
+
out of range. Callers in this module catch and fall back to a
|
|
234
|
+
function-name-based registry scan (the legacy path for clients that
|
|
235
|
+
never round-tripped through ``catalog_attach``).
|
|
236
|
+
"""
|
|
237
|
+
if not self._is_wrapped(wrapped_id):
|
|
238
|
+
raise KeyError(
|
|
239
|
+
f"attach_opaque_data is not MetaWorker-wrapped (missing magic): "
|
|
240
|
+
f"first8={wrapped_id[:8].hex() if wrapped_id else '-'}"
|
|
241
|
+
)
|
|
242
|
+
idx = wrapped_id[len(self._WRAP_MAGIC)]
|
|
243
|
+
original = wrapped_id[self._WRAP_OVERHEAD :]
|
|
244
|
+
if idx >= len(self._workers):
|
|
245
|
+
raise KeyError(f"MetaWorker sub-worker index {idx} out of range (have {len(self._workers)} workers)")
|
|
246
|
+
return self._workers[idx], original
|
|
247
|
+
|
|
248
|
+
# ========== Catalog listing ==========
|
|
249
|
+
|
|
250
|
+
def catalog_catalogs(self) -> CatalogsResponse:
|
|
251
|
+
"""Return union of all catalog discovery records across all workers."""
|
|
252
|
+
infos = []
|
|
253
|
+
for w in self._workers:
|
|
254
|
+
try:
|
|
255
|
+
cat = w._get_catalog()
|
|
256
|
+
infos.extend(cat.catalogs())
|
|
257
|
+
except ValueError:
|
|
258
|
+
pass
|
|
259
|
+
return CatalogsResponse.from_infos(infos)
|
|
260
|
+
|
|
261
|
+
# ========== Catalog attach (dispatch by name, wrap result) ==========
|
|
262
|
+
|
|
263
|
+
def catalog_attach(
|
|
264
|
+
self,
|
|
265
|
+
request: CatalogAttachRequest,
|
|
266
|
+
*,
|
|
267
|
+
ctx: CallContext | None = None,
|
|
268
|
+
) -> CatalogAttachResult:
|
|
269
|
+
"""Attach to a catalog — dispatch by name with dynamic fallback."""
|
|
270
|
+
idx = self._name_to_index.get(request.name)
|
|
271
|
+
|
|
272
|
+
if idx is not None:
|
|
273
|
+
result = self._workers[idx].catalog_attach(request, ctx=ctx)
|
|
274
|
+
else:
|
|
275
|
+
for i, w in enumerate(self._workers):
|
|
276
|
+
try:
|
|
277
|
+
result = w.catalog_attach(request, ctx=ctx)
|
|
278
|
+
idx = i
|
|
279
|
+
self._name_to_index[request.name] = i
|
|
280
|
+
break
|
|
281
|
+
except (ValueError, NotImplementedError):
|
|
282
|
+
continue
|
|
283
|
+
else:
|
|
284
|
+
msg = f"No worker handles catalog '{request.name}'"
|
|
285
|
+
raise ValueError(msg)
|
|
286
|
+
|
|
287
|
+
wrapped = self._wrap_attach_opaque_data(idx, result.attach_opaque_data)
|
|
288
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
289
|
+
logger.debug(
|
|
290
|
+
"catalog_attach catalog=%r sub_worker_index=%d original_aid=%s wrapped_aid=%s",
|
|
291
|
+
request.name,
|
|
292
|
+
idx,
|
|
293
|
+
_attach_opaque_data_short(result.attach_opaque_data),
|
|
294
|
+
_attach_opaque_data_short(wrapped),
|
|
295
|
+
)
|
|
296
|
+
return dataclasses.replace(result, attach_opaque_data=AttachOpaqueData(wrapped))
|
|
297
|
+
|
|
298
|
+
# ========== Name-based dispatch (no attach_opaque_data) ==========
|
|
299
|
+
|
|
300
|
+
def catalog_create(self, request: Any) -> None:
|
|
301
|
+
"""Create a catalog — dispatch to first worker that handles it."""
|
|
302
|
+
for w in self._workers:
|
|
303
|
+
try:
|
|
304
|
+
w.catalog_create(request)
|
|
305
|
+
return
|
|
306
|
+
except (ValueError, NotImplementedError):
|
|
307
|
+
continue
|
|
308
|
+
msg = f"No worker handles catalog_create for '{request.name}'"
|
|
309
|
+
raise ValueError(msg)
|
|
310
|
+
|
|
311
|
+
def catalog_drop(self, name: str) -> None:
|
|
312
|
+
"""Drop a catalog — dispatch to the worker that owns it."""
|
|
313
|
+
idx = self._name_to_index.get(name)
|
|
314
|
+
if idx is not None:
|
|
315
|
+
self._workers[idx].catalog_drop(name=name)
|
|
316
|
+
else:
|
|
317
|
+
msg = f"No worker owns catalog '{name}'"
|
|
318
|
+
raise ValueError(msg)
|
|
319
|
+
|
|
320
|
+
# ========== Request-object methods (attach_opaque_data inside request) ==========
|
|
321
|
+
|
|
322
|
+
def catalog_table_create(self, request: Any) -> None:
|
|
323
|
+
"""Create a table — dispatch via attach_opaque_data in request."""
|
|
324
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
|
|
325
|
+
patched = dataclasses.replace(request, attach_opaque_data=original_id)
|
|
326
|
+
worker.catalog_table_create(patched)
|
|
327
|
+
|
|
328
|
+
def catalog_macro_create(self, request: Any) -> None:
|
|
329
|
+
"""Create a macro — dispatch via attach_opaque_data in request."""
|
|
330
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
|
|
331
|
+
patched = dataclasses.replace(request, attach_opaque_data=original_id)
|
|
332
|
+
worker.catalog_macro_create(patched)
|
|
333
|
+
|
|
334
|
+
def catalog_index_create(self, request: Any) -> None:
|
|
335
|
+
"""Create an index — dispatch via attach_opaque_data in request."""
|
|
336
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
|
|
337
|
+
patched = dataclasses.replace(request, attach_opaque_data=original_id)
|
|
338
|
+
worker.catalog_index_create(patched)
|
|
339
|
+
|
|
340
|
+
# ========== bind / init (unwrap attach_opaque_data from request) ==========
|
|
341
|
+
|
|
342
|
+
def bind(self, request: BindRequest, ctx: CallContext) -> Any:
|
|
343
|
+
"""Dispatch bind to the right worker."""
|
|
344
|
+
if request.attach_opaque_data:
|
|
345
|
+
try:
|
|
346
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
|
|
347
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
348
|
+
logger.debug(
|
|
349
|
+
"dispatch method=bind function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
|
|
350
|
+
request.function_name,
|
|
351
|
+
self._wrapped_index(request.attach_opaque_data),
|
|
352
|
+
_attach_opaque_data_short(request.attach_opaque_data),
|
|
353
|
+
_attach_opaque_data_short(original_id),
|
|
354
|
+
)
|
|
355
|
+
request = dataclasses.replace(request, attach_opaque_data=original_id)
|
|
356
|
+
return worker.bind(request, ctx=ctx)
|
|
357
|
+
except (IndexError, KeyError):
|
|
358
|
+
pass # Invalid wrapped id — fall through to registry search
|
|
359
|
+
|
|
360
|
+
for w in self._workers:
|
|
361
|
+
registry = type(w)._build_registry()
|
|
362
|
+
if request.function_name in registry:
|
|
363
|
+
logger.debug(
|
|
364
|
+
"dispatch method=bind function=%r fallback=registry_scan",
|
|
365
|
+
request.function_name,
|
|
366
|
+
)
|
|
367
|
+
return w.bind(request, ctx=ctx)
|
|
368
|
+
|
|
369
|
+
msg = f"Unknown function '{request.function_name}'"
|
|
370
|
+
raise ValueError(msg)
|
|
371
|
+
|
|
372
|
+
def init(self, request: InitRequest, ctx: CallContext) -> Stream[ProcessState, GlobalInitResponse]:
|
|
373
|
+
"""Dispatch init to the right worker."""
|
|
374
|
+
if request.bind_call and request.bind_call.attach_opaque_data:
|
|
375
|
+
try:
|
|
376
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.bind_call.attach_opaque_data)
|
|
377
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
378
|
+
logger.debug(
|
|
379
|
+
"dispatch method=init function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
|
|
380
|
+
request.bind_call.function_name,
|
|
381
|
+
self._wrapped_index(request.bind_call.attach_opaque_data),
|
|
382
|
+
_attach_opaque_data_short(request.bind_call.attach_opaque_data),
|
|
383
|
+
_attach_opaque_data_short(original_id),
|
|
384
|
+
)
|
|
385
|
+
bind_call = dataclasses.replace(request.bind_call, attach_opaque_data=original_id)
|
|
386
|
+
request = dataclasses.replace(request, bind_call=bind_call)
|
|
387
|
+
return worker.init(request, ctx=ctx)
|
|
388
|
+
except (IndexError, KeyError):
|
|
389
|
+
pass # Invalid wrapped id — fall through
|
|
390
|
+
|
|
391
|
+
fn_name = request.bind_call.function_name if request.bind_call else ""
|
|
392
|
+
for w in self._workers:
|
|
393
|
+
registry = type(w)._build_registry()
|
|
394
|
+
if fn_name in registry:
|
|
395
|
+
logger.debug(
|
|
396
|
+
"dispatch method=init function=%r fallback=registry_scan",
|
|
397
|
+
fn_name,
|
|
398
|
+
)
|
|
399
|
+
return w.init(request, ctx=ctx)
|
|
400
|
+
|
|
401
|
+
msg = f"Unknown function '{fn_name}'"
|
|
402
|
+
raise ValueError(msg)
|
|
403
|
+
|
|
404
|
+
def _unwrap_bind_call_attach_opaque_data(
|
|
405
|
+
self,
|
|
406
|
+
request: Any,
|
|
407
|
+
*,
|
|
408
|
+
method_name: str = "?",
|
|
409
|
+
) -> tuple[Any, Any | None]:
|
|
410
|
+
"""Resolve the target sub-worker by unwrapping ``request.bind_call.attach_opaque_data``.
|
|
411
|
+
|
|
412
|
+
Returns ``(patched_request, worker)`` where ``patched_request`` has the
|
|
413
|
+
unwrapped (sub-worker-relative) attach_opaque_data and ``worker`` is the matching
|
|
414
|
+
sub-worker. Returns ``(request, None)`` when the attach_opaque_data is missing or
|
|
415
|
+
unwrapping fails — caller falls back to a registry scan by function name.
|
|
416
|
+
|
|
417
|
+
Mirrors the unwrap that ``init``/``bind`` already perform for the
|
|
418
|
+
wrapped attach_opaque_data MetaWorker prepends. Without this, sibling RPCs that
|
|
419
|
+
carry ``bind_call.attach_opaque_data`` (cardinality, statistics, dynamic_to_string)
|
|
420
|
+
would deliver the wrapped 18-byte id to the sub-worker — which then
|
|
421
|
+
derives a different shard_key than ``init``/``process`` use for the
|
|
422
|
+
same logical attach, so storage reads land on the wrong DO.
|
|
423
|
+
"""
|
|
424
|
+
bind_call = getattr(request, "bind_call", None)
|
|
425
|
+
if bind_call is None:
|
|
426
|
+
return request, None
|
|
427
|
+
wrapped_aid = getattr(bind_call, "attach_opaque_data", None)
|
|
428
|
+
if not wrapped_aid:
|
|
429
|
+
return request, None
|
|
430
|
+
try:
|
|
431
|
+
worker, original_id = self._unwrap_attach_opaque_data(wrapped_aid)
|
|
432
|
+
except (IndexError, KeyError):
|
|
433
|
+
return request, None
|
|
434
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
435
|
+
logger.debug(
|
|
436
|
+
"dispatch method=%s function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
|
|
437
|
+
method_name,
|
|
438
|
+
getattr(bind_call, "function_name", "?"),
|
|
439
|
+
self._wrapped_index(wrapped_aid),
|
|
440
|
+
_attach_opaque_data_short(wrapped_aid),
|
|
441
|
+
_attach_opaque_data_short(original_id),
|
|
442
|
+
)
|
|
443
|
+
patched_bind_call = dataclasses.replace(bind_call, attach_opaque_data=original_id)
|
|
444
|
+
patched_request = dataclasses.replace(request, bind_call=patched_bind_call)
|
|
445
|
+
return patched_request, worker
|
|
446
|
+
|
|
447
|
+
def table_function_cardinality(self, request: Any, ctx: CallContext) -> Any:
|
|
448
|
+
"""Dispatch cardinality estimation to the right worker."""
|
|
449
|
+
patched, worker = self._unwrap_bind_call_attach_opaque_data(
|
|
450
|
+
request,
|
|
451
|
+
method_name="table_function_cardinality",
|
|
452
|
+
)
|
|
453
|
+
if worker is not None:
|
|
454
|
+
return worker.table_function_cardinality(patched, ctx=ctx)
|
|
455
|
+
fn_name = request.bind_call.function_name if request.bind_call else ""
|
|
456
|
+
for w in self._workers:
|
|
457
|
+
registry = type(w)._build_registry()
|
|
458
|
+
if fn_name in registry:
|
|
459
|
+
return w.table_function_cardinality(request, ctx=ctx)
|
|
460
|
+
msg = f"Unknown function '{fn_name}'"
|
|
461
|
+
raise ValueError(msg)
|
|
462
|
+
|
|
463
|
+
def table_function_statistics(self, request: Any, ctx: CallContext) -> Any:
|
|
464
|
+
"""Dispatch per-column statistics lookup to the right worker."""
|
|
465
|
+
patched, worker = self._unwrap_bind_call_attach_opaque_data(
|
|
466
|
+
request,
|
|
467
|
+
method_name="table_function_statistics",
|
|
468
|
+
)
|
|
469
|
+
if worker is not None:
|
|
470
|
+
return worker.table_function_statistics(patched, ctx=ctx)
|
|
471
|
+
fn_name = request.bind_call.function_name if request.bind_call else ""
|
|
472
|
+
for w in self._workers:
|
|
473
|
+
registry = type(w)._build_registry()
|
|
474
|
+
if fn_name in registry:
|
|
475
|
+
return w.table_function_statistics(request, ctx=ctx)
|
|
476
|
+
msg = f"Unknown function '{fn_name}'"
|
|
477
|
+
raise ValueError(msg)
|
|
478
|
+
|
|
479
|
+
def table_function_dynamic_to_string(self, request: Any, ctx: CallContext) -> Any:
|
|
480
|
+
"""Dispatch the dynamic_to_string profiler hook to the right worker."""
|
|
481
|
+
patched, worker = self._unwrap_bind_call_attach_opaque_data(
|
|
482
|
+
request,
|
|
483
|
+
method_name="table_function_dynamic_to_string",
|
|
484
|
+
)
|
|
485
|
+
if worker is not None:
|
|
486
|
+
return worker.table_function_dynamic_to_string(patched, ctx=ctx)
|
|
487
|
+
fn_name = request.bind_call.function_name if request.bind_call else ""
|
|
488
|
+
for w in self._workers:
|
|
489
|
+
registry = type(w)._build_registry()
|
|
490
|
+
if fn_name in registry:
|
|
491
|
+
return w.table_function_dynamic_to_string(request, ctx=ctx)
|
|
492
|
+
# Function not registered with any worker — return empty rather than
|
|
493
|
+
# raising. EXPLAIN ANALYZE must never break the query.
|
|
494
|
+
from vgi.protocol import TableFunctionDynamicToStringResponse
|
|
495
|
+
|
|
496
|
+
return TableFunctionDynamicToStringResponse(keys=[], values=[])
|
|
497
|
+
|
|
498
|
+
# ========== Aggregate function dispatch ==========
|
|
499
|
+
|
|
500
|
+
def _dispatch_aggregate(self, request: Any, method_name: str, ctx: CallContext) -> Any:
|
|
501
|
+
"""Dispatch an aggregate RPC to the right worker by function_name."""
|
|
502
|
+
fn_name = getattr(request, "function_name", "")
|
|
503
|
+
if hasattr(request, "attach_opaque_data") and request.attach_opaque_data:
|
|
504
|
+
try:
|
|
505
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
|
|
506
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
507
|
+
logger.debug(
|
|
508
|
+
"dispatch method=%s function=%r sub_worker_index=%d wrapped_aid=%s unwrapped_aid=%s",
|
|
509
|
+
method_name,
|
|
510
|
+
fn_name,
|
|
511
|
+
self._wrapped_index(request.attach_opaque_data),
|
|
512
|
+
_attach_opaque_data_short(request.attach_opaque_data),
|
|
513
|
+
_attach_opaque_data_short(original_id),
|
|
514
|
+
)
|
|
515
|
+
request = dataclasses.replace(request, attach_opaque_data=original_id)
|
|
516
|
+
return getattr(worker, method_name)(request, ctx=ctx)
|
|
517
|
+
except (IndexError, KeyError):
|
|
518
|
+
pass
|
|
519
|
+
for w in self._workers:
|
|
520
|
+
registry = type(w)._build_registry()
|
|
521
|
+
if fn_name in registry:
|
|
522
|
+
logger.debug(
|
|
523
|
+
"dispatch method=%s function=%r fallback=registry_scan",
|
|
524
|
+
method_name,
|
|
525
|
+
fn_name,
|
|
526
|
+
)
|
|
527
|
+
return getattr(w, method_name)(request, ctx=ctx)
|
|
528
|
+
raise ValueError(f"Unknown aggregate function '{fn_name}'")
|
|
529
|
+
|
|
530
|
+
def aggregate_bind(self, request: Any, ctx: CallContext) -> Any:
|
|
531
|
+
"""Dispatch aggregate_bind to the right worker."""
|
|
532
|
+
return self._dispatch_aggregate(request, "aggregate_bind", ctx)
|
|
533
|
+
|
|
534
|
+
def aggregate_update(self, request: Any, ctx: CallContext) -> Any:
|
|
535
|
+
"""Dispatch aggregate_update to the right worker."""
|
|
536
|
+
return self._dispatch_aggregate(request, "aggregate_update", ctx)
|
|
537
|
+
|
|
538
|
+
def aggregate_combine(self, request: Any, ctx: CallContext) -> Any:
|
|
539
|
+
"""Dispatch aggregate_combine to the right worker."""
|
|
540
|
+
return self._dispatch_aggregate(request, "aggregate_combine", ctx)
|
|
541
|
+
|
|
542
|
+
def aggregate_finalize(self, request: Any, ctx: CallContext) -> Any:
|
|
543
|
+
"""Dispatch aggregate_finalize to the right worker."""
|
|
544
|
+
return self._dispatch_aggregate(request, "aggregate_finalize", ctx)
|
|
545
|
+
|
|
546
|
+
def aggregate_destructor(self, request: Any, ctx: CallContext) -> Any:
|
|
547
|
+
"""Dispatch aggregate_destructor to the right worker."""
|
|
548
|
+
return self._dispatch_aggregate(request, "aggregate_destructor", ctx)
|
|
549
|
+
|
|
550
|
+
def aggregate_window_init(self, request: Any, ctx: CallContext) -> Any:
|
|
551
|
+
"""Dispatch aggregate_window_init to the right worker."""
|
|
552
|
+
return self._dispatch_aggregate(request, "aggregate_window_init", ctx)
|
|
553
|
+
|
|
554
|
+
def aggregate_window(self, request: Any, ctx: CallContext) -> Any:
|
|
555
|
+
"""Dispatch aggregate_window to the right worker."""
|
|
556
|
+
return self._dispatch_aggregate(request, "aggregate_window", ctx)
|
|
557
|
+
|
|
558
|
+
def aggregate_window_destructor(self, request: Any, ctx: CallContext) -> Any:
|
|
559
|
+
"""Dispatch aggregate_window_destructor to the right worker."""
|
|
560
|
+
return self._dispatch_aggregate(request, "aggregate_window_destructor", ctx)
|
|
561
|
+
|
|
562
|
+
def aggregate_window_batch(self, request: Any, ctx: CallContext) -> Any:
|
|
563
|
+
"""Dispatch aggregate_window_batch to the right worker."""
|
|
564
|
+
return self._dispatch_aggregate(request, "aggregate_window_batch", ctx)
|
|
565
|
+
|
|
566
|
+
def aggregate_streaming_open(self, request: Any, ctx: CallContext) -> Any:
|
|
567
|
+
"""Dispatch aggregate_streaming_open to the right worker."""
|
|
568
|
+
return self._dispatch_aggregate(request, "aggregate_streaming_open", ctx)
|
|
569
|
+
|
|
570
|
+
def aggregate_streaming_chunk(self, request: Any, ctx: CallContext) -> Any:
|
|
571
|
+
"""Dispatch aggregate_streaming_chunk to the right worker."""
|
|
572
|
+
return self._dispatch_aggregate(request, "aggregate_streaming_chunk", ctx)
|
|
573
|
+
|
|
574
|
+
def aggregate_streaming_close(self, request: Any, ctx: CallContext) -> Any:
|
|
575
|
+
"""Dispatch aggregate_streaming_close to the right worker."""
|
|
576
|
+
return self._dispatch_aggregate(request, "aggregate_streaming_close", ctx)
|
|
577
|
+
|
|
578
|
+
# ========== Buffered table function dispatch ==========
|
|
579
|
+
# Routing key is function_name (same as aggregate). The underlying
|
|
580
|
+
# _dispatch_aggregate helper isn't aggregate-specific — it just looks up
|
|
581
|
+
# the function by name in each worker's registry.
|
|
582
|
+
|
|
583
|
+
def table_buffering_process(self, request: Any, ctx: CallContext) -> Any:
|
|
584
|
+
"""Dispatch table_buffering_process to the right worker."""
|
|
585
|
+
return self._dispatch_aggregate(request, "table_buffering_process", ctx)
|
|
586
|
+
|
|
587
|
+
def table_buffering_combine(self, request: Any, ctx: CallContext) -> Any:
|
|
588
|
+
"""Dispatch table_buffering_combine to the right worker."""
|
|
589
|
+
return self._dispatch_aggregate(request, "table_buffering_combine", ctx)
|
|
590
|
+
|
|
591
|
+
def table_buffering_destructor(self, request: Any, ctx: CallContext) -> Any:
|
|
592
|
+
"""Dispatch table_buffering_destructor to the right worker."""
|
|
593
|
+
return self._dispatch_aggregate(request, "table_buffering_destructor", ctx)
|
|
594
|
+
|
|
595
|
+
def _load_table_buffering_params(
|
|
596
|
+
self,
|
|
597
|
+
request: Any,
|
|
598
|
+
ctx: CallContext,
|
|
599
|
+
*,
|
|
600
|
+
attach_already_unwrapped: bool = False,
|
|
601
|
+
) -> Any:
|
|
602
|
+
"""Dispatch the finalize-tick driver's cold-load to the right worker.
|
|
603
|
+
|
|
604
|
+
``run_table_buffering_finalize_tick`` calls this via
|
|
605
|
+
``ctx.implementation._load_table_buffering_params(...)``. Under
|
|
606
|
+
MetaWorker, the wrapped attach_opaque_data steers us to the right
|
|
607
|
+
sub-worker; we unwrap the meta-prefix and delegate.
|
|
608
|
+
|
|
609
|
+
``attach_already_unwrapped`` is forwarded to the sub-worker — see
|
|
610
|
+
``Worker._load_table_buffering_params`` for semantics.
|
|
611
|
+
"""
|
|
612
|
+
fn_name = getattr(request, "function_name", "")
|
|
613
|
+
if hasattr(request, "attach_opaque_data") and request.attach_opaque_data:
|
|
614
|
+
try:
|
|
615
|
+
worker, original_id = self._unwrap_attach_opaque_data(request.attach_opaque_data)
|
|
616
|
+
request = dataclasses.replace(request, attach_opaque_data=original_id)
|
|
617
|
+
return worker._load_table_buffering_params(
|
|
618
|
+
request,
|
|
619
|
+
ctx,
|
|
620
|
+
attach_already_unwrapped=attach_already_unwrapped,
|
|
621
|
+
)
|
|
622
|
+
except (IndexError, KeyError):
|
|
623
|
+
pass
|
|
624
|
+
for w in self._workers:
|
|
625
|
+
registry = type(w)._build_registry()
|
|
626
|
+
if fn_name in registry:
|
|
627
|
+
return w._load_table_buffering_params(
|
|
628
|
+
request,
|
|
629
|
+
ctx,
|
|
630
|
+
attach_already_unwrapped=attach_already_unwrapped,
|
|
631
|
+
)
|
|
632
|
+
raise ValueError(f"Unknown table_buffering function '{fn_name}'")
|
|
633
|
+
|
|
634
|
+
# ========== Serve entry point ==========
|
|
635
|
+
|
|
636
|
+
@classmethod
|
|
637
|
+
def serve(cls, *worker_classes: type[Worker]) -> None:
|
|
638
|
+
"""Instantiate workers and serve via vgi_rpc.
|
|
639
|
+
|
|
640
|
+
Defaults to stdin/stdout for the subprocess transport; passes
|
|
641
|
+
argv through to ``run_server()`` so the worker also participates
|
|
642
|
+
in the AF_UNIX launcher path when launched with
|
|
643
|
+
``--unix PATH --idle-timeout SEC`` (the vgi C++ extension uses
|
|
644
|
+
this to share warm workers across DuckDB processes).
|
|
645
|
+
"""
|
|
646
|
+
from vgi_rpc.rpc import run_server
|
|
647
|
+
|
|
648
|
+
from vgi.protocol import VgiProtocol
|
|
649
|
+
|
|
650
|
+
# Log startup (some tests check that stderr has output)
|
|
651
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
|
|
652
|
+
logger.info("worker_starting")
|
|
653
|
+
|
|
654
|
+
workers = [wc() for wc in worker_classes]
|
|
655
|
+
meta = cls(workers)
|
|
656
|
+
run_server(VgiProtocol, meta)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# Register all attach_opaque_data-based delegate methods on MetaWorker
|
|
660
|
+
for _method_name in _ATTACH_ID_METHODS:
|
|
661
|
+
setattr(MetaWorker, _method_name, _make_attach_delegate(_method_name))
|