vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Writable test-fixture worker and helpers.
|
|
4
|
+
|
|
5
|
+
These fixtures depend on ``sqlglot`` (via ``vgi.transactor``) and live behind
|
|
6
|
+
the ``vgi[test-fixtures-writable]`` extra. Tests that exercise the write
|
|
7
|
+
subsystem (INSERT/UPDATE/DELETE/DDL) import from here.
|
|
8
|
+
"""
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Generic writable functions parameterized by table_name.
|
|
4
|
+
|
|
5
|
+
These functions extend the base writable scan/insert/update/delete classes
|
|
6
|
+
from ``writable_table`` but determine the table name dynamically from the
|
|
7
|
+
first positional argument instead of a hardcoded class variable. This allows
|
|
8
|
+
the same function classes to serve any table in the transactor's DuckDB
|
|
9
|
+
database, which is essential for DDL-created tables.
|
|
10
|
+
|
|
11
|
+
The table_name is passed as the first positional argument via the
|
|
12
|
+
``ScanFunctionResult`` when the catalog dispatches scan/insert/update/delete
|
|
13
|
+
for dynamically discovered tables.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import pyarrow as pa
|
|
21
|
+
from vgi_rpc import AnnotatedBatch
|
|
22
|
+
from vgi_rpc.rpc import OutputCollector
|
|
23
|
+
|
|
24
|
+
from vgi._test_fixtures.writable.table import (
|
|
25
|
+
_COUNT_SCHEMA,
|
|
26
|
+
WritableScanState,
|
|
27
|
+
_get_attach_opaque_data,
|
|
28
|
+
_get_pushdown_filters,
|
|
29
|
+
_get_tx_id,
|
|
30
|
+
_is_returning,
|
|
31
|
+
transactor_proxy,
|
|
32
|
+
)
|
|
33
|
+
from vgi.invocation import BindResponse, GlobalInitResponse
|
|
34
|
+
from vgi.table_function import BindParams, InitParams, ProcessParams, TableFunctionGenerator
|
|
35
|
+
from vgi.table_in_out_function import TableInOutGenerator
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"GenericTableDelete",
|
|
39
|
+
"GenericTableInsert",
|
|
40
|
+
"GenericTableScan",
|
|
41
|
+
"GenericTableUpdate",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _get_table_name_from_bind(params: BindParams[None]) -> str:
|
|
46
|
+
"""Extract the table name from the first positional argument at bind time."""
|
|
47
|
+
args = params.bind_call.arguments
|
|
48
|
+
if not args.positional or args.positional[0] is None:
|
|
49
|
+
msg = "table_name positional argument is required"
|
|
50
|
+
raise ValueError(msg)
|
|
51
|
+
return str(args.positional[0].as_py())
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _get_table_name_from_process(params: ProcessParams[None]) -> str:
|
|
55
|
+
"""Extract the table name from the first positional argument at process time."""
|
|
56
|
+
assert params.init_call is not None
|
|
57
|
+
args = params.init_call.bind_call.arguments
|
|
58
|
+
if not args.positional or args.positional[0] is None:
|
|
59
|
+
msg = "table_name positional argument is required"
|
|
60
|
+
raise ValueError(msg)
|
|
61
|
+
return str(args.positional[0].as_py())
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_table_schema_from_transactor(table_name: str, attach_opaque_data: bytes, tx_id: bytes) -> pa.Schema:
|
|
65
|
+
"""Query the transactor for the table's Arrow schema (returned as IPC bytes)."""
|
|
66
|
+
proxy = transactor_proxy._get_proxy()
|
|
67
|
+
schema_bytes = proxy.table_schema(attach_opaque_data=attach_opaque_data, table_name=table_name, tx_id=tx_id)
|
|
68
|
+
return pa.ipc.read_schema(pa.BufferReader(schema_bytes)) # type: ignore[arg-type]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ============================================================================
|
|
72
|
+
# Generic scan — dynamic table name + schema from transactor
|
|
73
|
+
# ============================================================================
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class GenericTableScan(TableFunctionGenerator[None, WritableScanState]):
|
|
77
|
+
"""Scan function for any table — determines table name from first positional arg."""
|
|
78
|
+
|
|
79
|
+
class Meta:
|
|
80
|
+
"""Metadata for GenericTableScan."""
|
|
81
|
+
|
|
82
|
+
name = "generic_writable_scan"
|
|
83
|
+
projection_pushdown = True
|
|
84
|
+
filter_pushdown = True
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def on_bind(cls, params: BindParams[None]) -> BindResponse:
|
|
88
|
+
"""Bind: query transactor for table schema (already includes rowid)."""
|
|
89
|
+
table_name = _get_table_name_from_bind(params)
|
|
90
|
+
attach_opaque_data = params.attach_opaque_data # unwrapped plaintext
|
|
91
|
+
tx_id = params.bind_call.transaction_opaque_data
|
|
92
|
+
assert attach_opaque_data is not None and tx_id is not None
|
|
93
|
+
table_schema = _get_table_schema_from_transactor(table_name, attach_opaque_data, tx_id)
|
|
94
|
+
return BindResponse(output_schema=table_schema)
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def on_init(cls, params: InitParams[None]) -> GlobalInitResponse:
|
|
98
|
+
"""Limit to a single worker."""
|
|
99
|
+
return GlobalInitResponse(max_workers=1)
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def initial_state(cls, params: ProcessParams[None]) -> WritableScanState:
|
|
103
|
+
"""Open the transactor scan stream once before processing begins."""
|
|
104
|
+
table_name = _get_table_name_from_process(params)
|
|
105
|
+
attach_opaque_data = _get_attach_opaque_data(params)
|
|
106
|
+
tx_id = _get_tx_id(params)
|
|
107
|
+
proxy = transactor_proxy._get_proxy()
|
|
108
|
+
columns = list(params.output_schema.names)
|
|
109
|
+
scan_iter = iter(
|
|
110
|
+
proxy.scan(
|
|
111
|
+
attach_opaque_data=attach_opaque_data,
|
|
112
|
+
tx_id=tx_id,
|
|
113
|
+
schema_name="",
|
|
114
|
+
table_name=table_name,
|
|
115
|
+
columns=columns,
|
|
116
|
+
pushdown_filters=_get_pushdown_filters(params),
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
return WritableScanState(scan_iter=scan_iter)
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def process(cls, params: ProcessParams[None], state: WritableScanState, out: OutputCollector) -> None:
|
|
123
|
+
"""Read the next batch from the scan stream."""
|
|
124
|
+
assert state.scan_iter is not None
|
|
125
|
+
try:
|
|
126
|
+
batch = next(state.scan_iter)
|
|
127
|
+
out.emit(batch.batch)
|
|
128
|
+
except StopIteration:
|
|
129
|
+
out.finish()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ============================================================================
|
|
133
|
+
# Generic write base — shared INSERT/UPDATE/DELETE logic
|
|
134
|
+
# ============================================================================
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class _GenericWriteBase(TableInOutGenerator[None, None]):
|
|
138
|
+
"""Base for generic write handlers. Subclasses set _operation."""
|
|
139
|
+
|
|
140
|
+
_operation: str # "insert" | "update" | "delete"
|
|
141
|
+
|
|
142
|
+
@classmethod
|
|
143
|
+
def on_bind(cls, params: BindParams[None]) -> BindResponse:
|
|
144
|
+
"""Bind: query transactor for table schema to use for RETURNING."""
|
|
145
|
+
table_name = _get_table_name_from_bind(params)
|
|
146
|
+
if _is_returning(params):
|
|
147
|
+
attach_opaque_data = params.attach_opaque_data # unwrapped plaintext
|
|
148
|
+
tx_id = params.bind_call.transaction_opaque_data
|
|
149
|
+
assert attach_opaque_data is not None and tx_id is not None
|
|
150
|
+
table_schema = _get_table_schema_from_transactor(table_name, attach_opaque_data, tx_id)
|
|
151
|
+
user_fields = [f for f in table_schema if f.name not in ("rowid", "row_id")]
|
|
152
|
+
return BindResponse(output_schema=pa.schema(user_fields))
|
|
153
|
+
return BindResponse(output_schema=_COUNT_SCHEMA)
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def _open_stream(
|
|
157
|
+
cls,
|
|
158
|
+
proxy: Any,
|
|
159
|
+
attach_opaque_data: bytes,
|
|
160
|
+
tx_id: bytes,
|
|
161
|
+
table_name: str,
|
|
162
|
+
returning: bool,
|
|
163
|
+
batch: pa.RecordBatch,
|
|
164
|
+
) -> Any:
|
|
165
|
+
"""Open a write stream. Override for operations needing extra args."""
|
|
166
|
+
return getattr(proxy, cls._operation)(
|
|
167
|
+
attach_opaque_data=attach_opaque_data,
|
|
168
|
+
tx_id=tx_id,
|
|
169
|
+
table_name=table_name,
|
|
170
|
+
returning=returning,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def process(cls, params: ProcessParams[None], state: None, batch: pa.RecordBatch, out: OutputCollector) -> None:
|
|
175
|
+
"""Forward batch to transactor write stream."""
|
|
176
|
+
table_name = _get_table_name_from_process(params)
|
|
177
|
+
attach_opaque_data = _get_attach_opaque_data(params)
|
|
178
|
+
tx_id = _get_tx_id(params)
|
|
179
|
+
returning = params.output_schema != _COUNT_SCHEMA
|
|
180
|
+
proxy = transactor_proxy._get_proxy()
|
|
181
|
+
with cls._open_stream(proxy, attach_opaque_data, tx_id, table_name, returning, batch) as stream:
|
|
182
|
+
response = stream.exchange(AnnotatedBatch(batch=batch))
|
|
183
|
+
out.emit(response.batch)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class GenericTableInsert(_GenericWriteBase):
|
|
187
|
+
"""INSERT handler for any table — determines table name from first positional arg."""
|
|
188
|
+
|
|
189
|
+
_operation = "insert"
|
|
190
|
+
|
|
191
|
+
class Meta:
|
|
192
|
+
"""Metadata for GenericTableInsert."""
|
|
193
|
+
|
|
194
|
+
name = "generic_writable_insert"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class GenericTableUpdate(_GenericWriteBase):
|
|
198
|
+
"""UPDATE handler for any table — determines table name from first positional arg."""
|
|
199
|
+
|
|
200
|
+
_operation = "update"
|
|
201
|
+
|
|
202
|
+
class Meta:
|
|
203
|
+
"""Metadata for GenericTableUpdate."""
|
|
204
|
+
|
|
205
|
+
name = "generic_writable_update"
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def _open_stream(
|
|
209
|
+
cls,
|
|
210
|
+
proxy: Any,
|
|
211
|
+
attach_opaque_data: bytes,
|
|
212
|
+
tx_id: bytes,
|
|
213
|
+
table_name: str,
|
|
214
|
+
returning: bool,
|
|
215
|
+
batch: pa.RecordBatch,
|
|
216
|
+
) -> Any:
|
|
217
|
+
"""Open an update stream with column list derived from the batch."""
|
|
218
|
+
update_cols = [name for name in batch.schema.names if name != "rowid"]
|
|
219
|
+
return proxy.update(
|
|
220
|
+
attach_opaque_data=attach_opaque_data,
|
|
221
|
+
tx_id=tx_id,
|
|
222
|
+
table_name=table_name,
|
|
223
|
+
columns=update_cols,
|
|
224
|
+
returning=returning,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class GenericTableDelete(_GenericWriteBase):
|
|
229
|
+
"""DELETE handler for any table — determines table name from first positional arg."""
|
|
230
|
+
|
|
231
|
+
_operation = "delete"
|
|
232
|
+
|
|
233
|
+
class Meta:
|
|
234
|
+
"""Metadata for GenericTableDelete."""
|
|
235
|
+
|
|
236
|
+
name = "generic_writable_delete"
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Writable table infrastructure — transactor proxy and shared helpers.
|
|
4
|
+
|
|
5
|
+
Provides the ``TransactorProxy`` for connecting to the db-transactor subprocess,
|
|
6
|
+
and helper functions used by the generic writable functions in ``writable_generic.py``.
|
|
7
|
+
All tables are created dynamically via CREATE TABLE DDL at the client side.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Iterator
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import TYPE_CHECKING, Annotated
|
|
15
|
+
|
|
16
|
+
import pyarrow as pa
|
|
17
|
+
from vgi_rpc import AnnotatedBatch, ArrowSerializableDataclass, Transient
|
|
18
|
+
|
|
19
|
+
from vgi.schema_utils import schema
|
|
20
|
+
from vgi.table_function import BindParams, ProcessParams
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from vgi.protocol import BindRequest
|
|
24
|
+
|
|
25
|
+
from vgi.transactor.client import TransactorClient
|
|
26
|
+
from vgi.transactor.protocol import TransactorProtocol
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"TransactorProxy",
|
|
30
|
+
"WritableScanState",
|
|
31
|
+
"transactor_proxy",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# Output schema for write functions returning affected row counts.
|
|
35
|
+
_COUNT_SCHEMA = schema(count=pa.int64())
|
|
36
|
+
|
|
37
|
+
# DuckDB's native rowid pseudocolumn, marked with is_row_id metadata so the
|
|
38
|
+
# C++ extension knows which column carries the physical row identifier.
|
|
39
|
+
_ROWID_FIELD = pa.field("rowid", pa.int64(), metadata={b"is_row_id": b""})
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_write_options(bind_call: BindRequest) -> dict[str, bool | str | list[str]]:
|
|
43
|
+
"""Parse the write_options RecordBatch from the bind call's named arguments."""
|
|
44
|
+
defaults: dict[str, bool | str | list[str]] = {
|
|
45
|
+
"return_chunks": False,
|
|
46
|
+
"on_conflict": "throw",
|
|
47
|
+
"on_conflict_columns": [],
|
|
48
|
+
}
|
|
49
|
+
if not (bind_call.arguments and bind_call.arguments.named):
|
|
50
|
+
return defaults
|
|
51
|
+
val = bind_call.arguments.named.get("write_options")
|
|
52
|
+
if val is None:
|
|
53
|
+
return defaults
|
|
54
|
+
from vgi_rpc.utils import deserialize_record_batch
|
|
55
|
+
|
|
56
|
+
options_bytes = val.as_py()
|
|
57
|
+
batch, _ = deserialize_record_batch(options_bytes)
|
|
58
|
+
result = dict(defaults)
|
|
59
|
+
if "return_chunks" in batch.schema.names:
|
|
60
|
+
result["return_chunks"] = batch.column("return_chunks")[0].as_py()
|
|
61
|
+
if "on_conflict" in batch.schema.names:
|
|
62
|
+
result["on_conflict"] = batch.column("on_conflict")[0].as_py()
|
|
63
|
+
if "on_conflict_columns" in batch.schema.names:
|
|
64
|
+
result["on_conflict_columns"] = batch.column("on_conflict_columns")[0].as_py()
|
|
65
|
+
return result
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _is_returning(params: BindParams[None]) -> bool:
|
|
69
|
+
"""Check if the C++ operator requested RETURNING rows."""
|
|
70
|
+
opts = _parse_write_options(params.bind_call)
|
|
71
|
+
return bool(opts.get("return_chunks", False))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _get_tx_id(params: ProcessParams[None]) -> bytes:
|
|
75
|
+
"""Get transaction_opaque_data from the bind request."""
|
|
76
|
+
assert params.init_call is not None
|
|
77
|
+
tx_id = params.init_call.bind_call.transaction_opaque_data
|
|
78
|
+
if tx_id:
|
|
79
|
+
return tx_id
|
|
80
|
+
msg = "transaction_opaque_data is required but was not provided in the bind request"
|
|
81
|
+
raise ValueError(msg)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _get_attach_opaque_data(params: ProcessParams[None]) -> bytes:
|
|
85
|
+
"""Get the unwrapped plaintext attach (storage shards on the sealed form)."""
|
|
86
|
+
attach_opaque_data = params.attach_opaque_data
|
|
87
|
+
if attach_opaque_data:
|
|
88
|
+
return attach_opaque_data
|
|
89
|
+
msg = "attach_opaque_data is required but was not provided in the bind request"
|
|
90
|
+
raise ValueError(msg)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _get_pushdown_filters(params: ProcessParams[None]) -> bytes | None:
|
|
94
|
+
"""Get pushdown_filters as serialized IPC bytes from params (or None)."""
|
|
95
|
+
assert params.init_call is not None
|
|
96
|
+
pf_batch = params.init_call.pushdown_filters
|
|
97
|
+
if pf_batch is None:
|
|
98
|
+
return None
|
|
99
|
+
sink = pa.BufferOutputStream()
|
|
100
|
+
writer = pa.ipc.new_stream(sink, pf_batch.schema)
|
|
101
|
+
writer.write_batch(pf_batch)
|
|
102
|
+
writer.close()
|
|
103
|
+
return sink.getvalue().to_pybytes()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass(kw_only=True)
|
|
107
|
+
class WritableScanState(ArrowSerializableDataclass):
|
|
108
|
+
"""State for writable table scans — holds the live transactor scan iterator."""
|
|
109
|
+
|
|
110
|
+
scan_iter: Annotated[Iterator[AnnotatedBatch] | None, Transient()] = None
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ============================================================================
|
|
114
|
+
# TransactorProxy — manages the db-transactor connection
|
|
115
|
+
# ============================================================================
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class TransactorProxy:
|
|
119
|
+
"""Manages connections to the shared db-transactor subprocess.
|
|
120
|
+
|
|
121
|
+
The transactor manages multiple databases internally (one per attach_opaque_data).
|
|
122
|
+
DDL statements are run during register() for each new catalog attachment.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def __init__(self, ddl_statements: list[str] | None = None) -> None:
|
|
126
|
+
"""Initialize the proxy."""
|
|
127
|
+
self._ddl = ddl_statements or []
|
|
128
|
+
self._client: TransactorClient | None = None
|
|
129
|
+
|
|
130
|
+
def _get_proxy(self) -> TransactorProtocol:
|
|
131
|
+
"""Get the transactor RPC proxy (auto-spawn if needed)."""
|
|
132
|
+
if self._client is None:
|
|
133
|
+
self._client = TransactorClient()
|
|
134
|
+
return self._client.get_proxy() # type: ignore[no-any-return]
|
|
135
|
+
|
|
136
|
+
def register(self, attach_opaque_data: bytes, catalog_name: str = "") -> None:
|
|
137
|
+
"""Register a new database for this attach_opaque_data and run initial DDL."""
|
|
138
|
+
proxy = self._get_proxy()
|
|
139
|
+
proxy.register(attach_opaque_data=attach_opaque_data, catalog_name=catalog_name, ddl_statements=self._ddl)
|
|
140
|
+
|
|
141
|
+
def close(self) -> None:
|
|
142
|
+
"""Close the transactor connection."""
|
|
143
|
+
if self._client is not None:
|
|
144
|
+
self._client.close()
|
|
145
|
+
self._client = None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# Module-level proxy — all tables created dynamically via DDL.
|
|
149
|
+
transactor_proxy = TransactorProxy()
|