vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,1148 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Writable worker with transactional INSERT, UPDATE, DELETE, and DDL support.
|
|
4
|
+
|
|
5
|
+
This worker exposes a fully dynamic writable catalog backed by a db-transactor
|
|
6
|
+
subprocess. All tables are created via CREATE TABLE DDL — there are no static
|
|
7
|
+
table definitions. Generic scan/insert/update/delete functions serve any table.
|
|
8
|
+
|
|
9
|
+
Usage::
|
|
10
|
+
|
|
11
|
+
vgi-fixture-writable-worker
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import uuid
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Literal, overload
|
|
20
|
+
|
|
21
|
+
import pyarrow as pa
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from vgi_rpc.rpc import CallContext
|
|
25
|
+
|
|
26
|
+
from vgi._test_fixtures.writable.generic import (
|
|
27
|
+
GenericTableDelete,
|
|
28
|
+
GenericTableInsert,
|
|
29
|
+
GenericTableScan,
|
|
30
|
+
GenericTableUpdate,
|
|
31
|
+
)
|
|
32
|
+
from vgi._test_fixtures.writable.table import transactor_proxy
|
|
33
|
+
from vgi.catalog import (
|
|
34
|
+
AttachOpaqueData,
|
|
35
|
+
Catalog,
|
|
36
|
+
CatalogAttachResult,
|
|
37
|
+
FunctionInfo,
|
|
38
|
+
IndexInfo,
|
|
39
|
+
MacroInfo,
|
|
40
|
+
OnConflict,
|
|
41
|
+
ReadOnlyCatalogInterface,
|
|
42
|
+
ScanFunctionResult,
|
|
43
|
+
Schema,
|
|
44
|
+
SchemaInfo,
|
|
45
|
+
SchemaObjectType,
|
|
46
|
+
SerializedSchema,
|
|
47
|
+
TableInfo,
|
|
48
|
+
TransactionOpaqueData,
|
|
49
|
+
ViewInfo,
|
|
50
|
+
)
|
|
51
|
+
from vgi.worker import Worker
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger("vgi.writable_worker")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _qi(name: str) -> str:
|
|
57
|
+
"""Quote a SQL identifier with double quotes, escaping internal double quotes."""
|
|
58
|
+
return '"' + name.replace('"', '""') + '"'
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _qn(schema_name: str, name: str) -> str:
|
|
62
|
+
"""Build a schema-qualified, quoted identifier."""
|
|
63
|
+
return f"{_qi(schema_name)}.{_qi(name)}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _comment_sql(target: str, comment: str | None) -> str:
|
|
67
|
+
"""Build COMMENT ON <target> IS ... SQL."""
|
|
68
|
+
if comment is None:
|
|
69
|
+
return f"COMMENT ON {target} IS NULL;"
|
|
70
|
+
escaped = comment.replace("'", "''")
|
|
71
|
+
return f"COMMENT ON {target} IS '{escaped}';"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ============================================================================
|
|
75
|
+
# Arrow type to DuckDB SQL type mapping
|
|
76
|
+
# ============================================================================
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _arrow_type_to_sql(arrow_type: pa.DataType) -> str:
|
|
80
|
+
"""Map a PyArrow type to a DuckDB SQL type string."""
|
|
81
|
+
_SIMPLE_MAP: dict[pa.DataType, str] = {
|
|
82
|
+
pa.int8(): "TINYINT",
|
|
83
|
+
pa.int16(): "SMALLINT",
|
|
84
|
+
pa.int32(): "INTEGER",
|
|
85
|
+
pa.int64(): "BIGINT",
|
|
86
|
+
pa.uint8(): "UTINYINT",
|
|
87
|
+
pa.uint16(): "USMALLINT",
|
|
88
|
+
pa.uint32(): "UINTEGER",
|
|
89
|
+
pa.uint64(): "UBIGINT",
|
|
90
|
+
pa.float16(): "FLOAT",
|
|
91
|
+
pa.float32(): "FLOAT",
|
|
92
|
+
pa.float64(): "DOUBLE",
|
|
93
|
+
pa.string(): "VARCHAR",
|
|
94
|
+
pa.utf8(): "VARCHAR",
|
|
95
|
+
pa.large_utf8(): "VARCHAR",
|
|
96
|
+
pa.binary(): "BLOB",
|
|
97
|
+
pa.large_binary(): "BLOB",
|
|
98
|
+
pa.bool_(): "BOOLEAN",
|
|
99
|
+
pa.date32(): "DATE",
|
|
100
|
+
pa.date64(): "DATE",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if arrow_type in _SIMPLE_MAP:
|
|
104
|
+
return _SIMPLE_MAP[arrow_type]
|
|
105
|
+
|
|
106
|
+
# Timestamp types
|
|
107
|
+
if pa.types.is_timestamp(arrow_type):
|
|
108
|
+
return "TIMESTAMP"
|
|
109
|
+
|
|
110
|
+
# Time types
|
|
111
|
+
if pa.types.is_time(arrow_type):
|
|
112
|
+
return "TIME"
|
|
113
|
+
|
|
114
|
+
# Duration types
|
|
115
|
+
if pa.types.is_duration(arrow_type):
|
|
116
|
+
return "INTERVAL"
|
|
117
|
+
|
|
118
|
+
# Decimal types
|
|
119
|
+
if pa.types.is_decimal(arrow_type):
|
|
120
|
+
return f"DECIMAL({arrow_type.precision}, {arrow_type.scale})"
|
|
121
|
+
|
|
122
|
+
# List types
|
|
123
|
+
if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
|
|
124
|
+
inner = _arrow_type_to_sql(arrow_type.value_type)
|
|
125
|
+
return f"{inner}[]"
|
|
126
|
+
|
|
127
|
+
# Struct types
|
|
128
|
+
if pa.types.is_struct(arrow_type):
|
|
129
|
+
fields = []
|
|
130
|
+
for i in range(arrow_type.num_fields):
|
|
131
|
+
f = arrow_type.field(i)
|
|
132
|
+
fields.append(f"{f.name} {_arrow_type_to_sql(f.type)}")
|
|
133
|
+
return "STRUCT(" + ", ".join(fields) + ")"
|
|
134
|
+
|
|
135
|
+
# Map types
|
|
136
|
+
if pa.types.is_map(arrow_type):
|
|
137
|
+
key_type = _arrow_type_to_sql(arrow_type.key_type)
|
|
138
|
+
item_type = _arrow_type_to_sql(arrow_type.item_type)
|
|
139
|
+
return f"MAP({key_type}, {item_type})"
|
|
140
|
+
|
|
141
|
+
# Fallback
|
|
142
|
+
logger.warning("_arrow_type_to_sql: unmapped Arrow type %s, falling back to VARCHAR", arrow_type)
|
|
143
|
+
return "VARCHAR"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ============================================================================
|
|
147
|
+
# FK constraint deserialization
|
|
148
|
+
# ============================================================================
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _deserialize_fk(fk_bytes: bytes) -> dict[str, Any]:
|
|
152
|
+
"""Deserialize a foreign key constraint from IPC bytes.
|
|
153
|
+
|
|
154
|
+
Returns a dict with keys: fk_columns, pk_columns, referenced_table, referenced_schema.
|
|
155
|
+
"""
|
|
156
|
+
reader = pa.ipc.open_stream(fk_bytes)
|
|
157
|
+
batch = reader.read_next_batch()
|
|
158
|
+
return {
|
|
159
|
+
"fk_columns": batch.column("fk_columns")[0].as_py(),
|
|
160
|
+
"pk_columns": batch.column("pk_columns")[0].as_py(),
|
|
161
|
+
"referenced_table": batch.column("referenced_table")[0].as_py(),
|
|
162
|
+
"referenced_schema": batch.column("referenced_schema")[0].as_py(),
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ============================================================================
|
|
167
|
+
# Catalog definition — generic functions only, no static tables
|
|
168
|
+
# ============================================================================
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
_WRITABLE_CATALOG = Catalog(
|
|
172
|
+
name="writable",
|
|
173
|
+
default_schema="main",
|
|
174
|
+
schemas=[
|
|
175
|
+
Schema(
|
|
176
|
+
name="main",
|
|
177
|
+
functions=[
|
|
178
|
+
GenericTableScan,
|
|
179
|
+
GenericTableInsert,
|
|
180
|
+
GenericTableUpdate,
|
|
181
|
+
GenericTableDelete,
|
|
182
|
+
],
|
|
183
|
+
tables=[],
|
|
184
|
+
),
|
|
185
|
+
],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ============================================================================
|
|
190
|
+
# WritableCatalog — fully dynamic catalog interface
|
|
191
|
+
# ============================================================================
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class WritableCatalog(ReadOnlyCatalogInterface):
|
|
195
|
+
"""Fully dynamic catalog — all tables created via DDL, served by generic functions."""
|
|
196
|
+
|
|
197
|
+
catalog = _WRITABLE_CATALOG
|
|
198
|
+
supports_transactions = True
|
|
199
|
+
catalog_version_frozen = False
|
|
200
|
+
|
|
201
|
+
def catalog_attach(
|
|
202
|
+
self,
|
|
203
|
+
*,
|
|
204
|
+
name: str,
|
|
205
|
+
options: dict[str, Any],
|
|
206
|
+
data_version_spec: str | None,
|
|
207
|
+
implementation_version: str | None,
|
|
208
|
+
ctx: CallContext | None = None,
|
|
209
|
+
) -> CatalogAttachResult:
|
|
210
|
+
"""Attach: generate unique attach_opaque_data and register a fresh database in the transactor."""
|
|
211
|
+
del data_version_spec, implementation_version, ctx
|
|
212
|
+
if name != _WRITABLE_CATALOG.name:
|
|
213
|
+
raise ValueError(f"Unknown catalog: {name!r}. Available: {_WRITABLE_CATALOG.name}")
|
|
214
|
+
attach_opaque_data = AttachOpaqueData(uuid.uuid4().bytes)
|
|
215
|
+
transactor_proxy.register(attach_opaque_data=attach_opaque_data, catalog_name=name)
|
|
216
|
+
return CatalogAttachResult(
|
|
217
|
+
attach_opaque_data=attach_opaque_data,
|
|
218
|
+
supports_transactions=True,
|
|
219
|
+
supports_time_travel=False,
|
|
220
|
+
catalog_version_frozen=False,
|
|
221
|
+
catalog_version=1,
|
|
222
|
+
attach_opaque_data_required=True,
|
|
223
|
+
default_schema="main",
|
|
224
|
+
settings=[],
|
|
225
|
+
secret_types=[],
|
|
226
|
+
resolved_data_version=None,
|
|
227
|
+
resolved_implementation_version=None,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def catalog_version(
|
|
231
|
+
self,
|
|
232
|
+
*,
|
|
233
|
+
attach_opaque_data: AttachOpaqueData,
|
|
234
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
235
|
+
ctx: CallContext | None = None,
|
|
236
|
+
) -> int:
|
|
237
|
+
"""Return the current catalog version from the transactor."""
|
|
238
|
+
del ctx
|
|
239
|
+
proxy = transactor_proxy._get_proxy()
|
|
240
|
+
return proxy.catalog_version(attach_opaque_data=attach_opaque_data)
|
|
241
|
+
|
|
242
|
+
# ========== Transaction lifecycle ==========
|
|
243
|
+
|
|
244
|
+
def catalog_transaction_begin(self, *, attach_opaque_data: AttachOpaqueData) -> TransactionOpaqueData | None:
|
|
245
|
+
"""Begin a transaction — transactor generates the tx_id."""
|
|
246
|
+
proxy = transactor_proxy._get_proxy()
|
|
247
|
+
tx_id = proxy.begin(attach_opaque_data=attach_opaque_data)
|
|
248
|
+
return TransactionOpaqueData(tx_id)
|
|
249
|
+
|
|
250
|
+
def catalog_transaction_commit(
|
|
251
|
+
self, *, attach_opaque_data: AttachOpaqueData, transaction_opaque_data: TransactionOpaqueData
|
|
252
|
+
) -> None:
|
|
253
|
+
"""Commit a transaction in the transactor."""
|
|
254
|
+
proxy = transactor_proxy._get_proxy()
|
|
255
|
+
proxy.commit(attach_opaque_data=attach_opaque_data, tx_id=transaction_opaque_data)
|
|
256
|
+
|
|
257
|
+
def catalog_transaction_rollback(
|
|
258
|
+
self, *, attach_opaque_data: AttachOpaqueData, transaction_opaque_data: TransactionOpaqueData
|
|
259
|
+
) -> None:
|
|
260
|
+
"""Roll back a transaction in the transactor."""
|
|
261
|
+
proxy = transactor_proxy._get_proxy()
|
|
262
|
+
proxy.rollback(attach_opaque_data=attach_opaque_data, tx_id=transaction_opaque_data)
|
|
263
|
+
|
|
264
|
+
# ========== DDL helpers ==========
|
|
265
|
+
|
|
266
|
+
def _execute_ddl(
|
|
267
|
+
self, attach_opaque_data: AttachOpaqueData, transaction_opaque_data: TransactionOpaqueData | None, sql: str
|
|
268
|
+
) -> None:
|
|
269
|
+
"""Validate transaction and execute DDL. Version is tracked by the transactor."""
|
|
270
|
+
if not transaction_opaque_data:
|
|
271
|
+
raise ValueError("transaction_opaque_data is required for DDL operations")
|
|
272
|
+
proxy = transactor_proxy._get_proxy()
|
|
273
|
+
proxy.execute_ddl_tx(attach_opaque_data=attach_opaque_data, tx_id=transaction_opaque_data, sql=sql)
|
|
274
|
+
|
|
275
|
+
# ========== DDL: Table operations ==========
|
|
276
|
+
|
|
277
|
+
def table_create(
|
|
278
|
+
self,
|
|
279
|
+
*,
|
|
280
|
+
attach_opaque_data: AttachOpaqueData,
|
|
281
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
282
|
+
schema_name: str,
|
|
283
|
+
name: str,
|
|
284
|
+
columns: SerializedSchema,
|
|
285
|
+
on_conflict: OnConflict,
|
|
286
|
+
not_null_constraints: list[int],
|
|
287
|
+
unique_constraints: list[list[int]],
|
|
288
|
+
check_constraints: list[str],
|
|
289
|
+
primary_key_constraints: list[list[int]] | None = None,
|
|
290
|
+
foreign_key_constraints: list[bytes] | None = None,
|
|
291
|
+
) -> None:
|
|
292
|
+
"""Create a new table in the transactor's DuckDB database.
|
|
293
|
+
|
|
294
|
+
Builds CREATE TABLE DDL from column definitions and constraints.
|
|
295
|
+
DuckDB's built-in rowid pseudocolumn provides the row identifier
|
|
296
|
+
automatically — no extra column or sequence needed.
|
|
297
|
+
"""
|
|
298
|
+
# Deserialize columns schema
|
|
299
|
+
col_schema = pa.ipc.read_schema(pa.BufferReader(columns)) # type: ignore[arg-type]
|
|
300
|
+
|
|
301
|
+
if_not_exists = " IF NOT EXISTS" if on_conflict == OnConflict.IGNORE else ""
|
|
302
|
+
|
|
303
|
+
# Build column definitions (including defaults from Arrow field metadata)
|
|
304
|
+
col_defs: list[str] = []
|
|
305
|
+
for i, field in enumerate(col_schema):
|
|
306
|
+
col_def = f"{_qi(field.name)} {_arrow_type_to_sql(field.type)}"
|
|
307
|
+
if i in not_null_constraints:
|
|
308
|
+
col_def += " NOT NULL"
|
|
309
|
+
# Default values are passed as Arrow field metadata with key "default"
|
|
310
|
+
if field.metadata and b"default" in field.metadata:
|
|
311
|
+
default_expr = field.metadata[b"default"].decode("utf-8")
|
|
312
|
+
col_def += f" DEFAULT {default_expr}"
|
|
313
|
+
col_defs.append(col_def)
|
|
314
|
+
|
|
315
|
+
# Table-level constraints
|
|
316
|
+
constraints: list[str] = []
|
|
317
|
+
|
|
318
|
+
# Primary key
|
|
319
|
+
if primary_key_constraints:
|
|
320
|
+
for pk_group in primary_key_constraints:
|
|
321
|
+
pk_cols = ", ".join(_qi(col_schema.field(i).name) for i in pk_group)
|
|
322
|
+
constraints.append(f"PRIMARY KEY ({pk_cols})")
|
|
323
|
+
|
|
324
|
+
# Unique constraints
|
|
325
|
+
for unique_group in unique_constraints:
|
|
326
|
+
uq_cols = ", ".join(_qi(col_schema.field(i).name) for i in unique_group)
|
|
327
|
+
constraints.append(f"UNIQUE ({uq_cols})")
|
|
328
|
+
|
|
329
|
+
# Check constraints
|
|
330
|
+
for check_expr in check_constraints:
|
|
331
|
+
constraints.append(f"CHECK ({check_expr})")
|
|
332
|
+
|
|
333
|
+
# Foreign key constraints
|
|
334
|
+
if foreign_key_constraints:
|
|
335
|
+
for fk_bytes in foreign_key_constraints:
|
|
336
|
+
fk = _deserialize_fk(fk_bytes)
|
|
337
|
+
fk_cols = ", ".join(_qi(c) for c in fk["fk_columns"])
|
|
338
|
+
pk_cols = ", ".join(_qi(c) for c in fk["pk_columns"])
|
|
339
|
+
ref_table = _qi(fk["referenced_table"])
|
|
340
|
+
constraints.append(f"FOREIGN KEY ({fk_cols}) REFERENCES {ref_table}({pk_cols})")
|
|
341
|
+
|
|
342
|
+
# Combine into CREATE TABLE
|
|
343
|
+
all_parts = col_defs + constraints
|
|
344
|
+
columns_sql = ",\n ".join(all_parts)
|
|
345
|
+
ddl = f"CREATE TABLE{if_not_exists} {_qn(schema_name, name)} (\n {columns_sql}\n);"
|
|
346
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
347
|
+
logger.info("table_create: %s (on_conflict=%s)", name, on_conflict.value)
|
|
348
|
+
|
|
349
|
+
def table_drop(
|
|
350
|
+
self,
|
|
351
|
+
*,
|
|
352
|
+
attach_opaque_data: AttachOpaqueData,
|
|
353
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
354
|
+
schema_name: str,
|
|
355
|
+
name: str,
|
|
356
|
+
ignore_not_found: bool,
|
|
357
|
+
cascade: bool = False,
|
|
358
|
+
) -> None:
|
|
359
|
+
"""Drop a table."""
|
|
360
|
+
if_exists = " IF EXISTS" if ignore_not_found else ""
|
|
361
|
+
cascade_sql = " CASCADE" if cascade else ""
|
|
362
|
+
self._execute_ddl(
|
|
363
|
+
attach_opaque_data, transaction_opaque_data, f"DROP TABLE{if_exists} {_qn(schema_name, name)}{cascade_sql};"
|
|
364
|
+
)
|
|
365
|
+
logger.info("table_drop: %s", name)
|
|
366
|
+
|
|
367
|
+
def table_rename(
|
|
368
|
+
self,
|
|
369
|
+
*,
|
|
370
|
+
attach_opaque_data: AttachOpaqueData,
|
|
371
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
372
|
+
schema_name: str,
|
|
373
|
+
name: str,
|
|
374
|
+
new_name: str,
|
|
375
|
+
ignore_not_found: bool,
|
|
376
|
+
) -> None:
|
|
377
|
+
"""Rename a table."""
|
|
378
|
+
self._execute_ddl(
|
|
379
|
+
attach_opaque_data,
|
|
380
|
+
transaction_opaque_data,
|
|
381
|
+
f"ALTER TABLE {_qn(schema_name, name)} RENAME TO {_qi(new_name)};",
|
|
382
|
+
)
|
|
383
|
+
logger.info("table_rename: %s -> %s", name, new_name)
|
|
384
|
+
|
|
385
|
+
def table_column_add(
|
|
386
|
+
self,
|
|
387
|
+
*,
|
|
388
|
+
attach_opaque_data: AttachOpaqueData,
|
|
389
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
390
|
+
schema_name: str,
|
|
391
|
+
name: str,
|
|
392
|
+
column_definition: SerializedSchema,
|
|
393
|
+
ignore_not_found: bool,
|
|
394
|
+
if_column_not_exists: bool,
|
|
395
|
+
) -> None:
|
|
396
|
+
"""Add a column to a table."""
|
|
397
|
+
col_schema = pa.ipc.read_schema(pa.BufferReader(column_definition)) # type: ignore[arg-type]
|
|
398
|
+
field = col_schema.field(0)
|
|
399
|
+
if_not_exists = " IF NOT EXISTS" if if_column_not_exists else ""
|
|
400
|
+
col_sql = f"{_qi(field.name)} {_arrow_type_to_sql(field.type)}"
|
|
401
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} ADD COLUMN{if_not_exists} {col_sql};"
|
|
402
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
403
|
+
logger.info("table_column_add: %s.%s", name, field.name)
|
|
404
|
+
|
|
405
|
+
def table_column_drop(
|
|
406
|
+
self,
|
|
407
|
+
*,
|
|
408
|
+
attach_opaque_data: AttachOpaqueData,
|
|
409
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
410
|
+
schema_name: str,
|
|
411
|
+
name: str,
|
|
412
|
+
column_name: str,
|
|
413
|
+
ignore_not_found: bool,
|
|
414
|
+
if_column_exists: bool,
|
|
415
|
+
cascade: bool,
|
|
416
|
+
) -> None:
|
|
417
|
+
"""Drop a column from a table."""
|
|
418
|
+
if_exists = " IF EXISTS" if if_column_exists else ""
|
|
419
|
+
cascade_sql = " CASCADE" if cascade else ""
|
|
420
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} DROP COLUMN{if_exists} {_qi(column_name)}{cascade_sql};"
|
|
421
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
422
|
+
logger.info("table_column_drop: %s.%s", name, column_name)
|
|
423
|
+
|
|
424
|
+
def table_column_rename(
|
|
425
|
+
self,
|
|
426
|
+
*,
|
|
427
|
+
attach_opaque_data: AttachOpaqueData,
|
|
428
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
429
|
+
schema_name: str,
|
|
430
|
+
name: str,
|
|
431
|
+
column_name: str,
|
|
432
|
+
new_column_name: str,
|
|
433
|
+
ignore_not_found: bool,
|
|
434
|
+
) -> None:
|
|
435
|
+
"""Rename a column in a table."""
|
|
436
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} RENAME COLUMN {_qi(column_name)} TO {_qi(new_column_name)};"
|
|
437
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
438
|
+
logger.info("table_column_rename: %s.%s -> %s", name, column_name, new_column_name)
|
|
439
|
+
|
|
440
|
+
def table_comment_set(
|
|
441
|
+
self,
|
|
442
|
+
*,
|
|
443
|
+
attach_opaque_data: AttachOpaqueData,
|
|
444
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
445
|
+
schema_name: str,
|
|
446
|
+
name: str,
|
|
447
|
+
comment: str | None,
|
|
448
|
+
ignore_not_found: bool,
|
|
449
|
+
) -> None:
|
|
450
|
+
"""Set or clear the comment on a table."""
|
|
451
|
+
self._execute_ddl(
|
|
452
|
+
attach_opaque_data, transaction_opaque_data, _comment_sql(f"TABLE {_qn(schema_name, name)}", comment)
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
def table_column_comment_set(
|
|
456
|
+
self,
|
|
457
|
+
*,
|
|
458
|
+
attach_opaque_data: AttachOpaqueData,
|
|
459
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
460
|
+
schema_name: str,
|
|
461
|
+
name: str,
|
|
462
|
+
column_name: str,
|
|
463
|
+
comment: str | None,
|
|
464
|
+
ignore_not_found: bool,
|
|
465
|
+
) -> None:
|
|
466
|
+
"""Set or clear the comment on a table column."""
|
|
467
|
+
target = f"COLUMN {_qn(schema_name, name)}.{_qi(column_name)}"
|
|
468
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, _comment_sql(target, comment))
|
|
469
|
+
|
|
470
|
+
def table_column_type_change(
|
|
471
|
+
self,
|
|
472
|
+
*,
|
|
473
|
+
attach_opaque_data: AttachOpaqueData,
|
|
474
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
475
|
+
schema_name: str,
|
|
476
|
+
name: str,
|
|
477
|
+
column_definition: SerializedSchema,
|
|
478
|
+
expression: str | None,
|
|
479
|
+
ignore_not_found: bool,
|
|
480
|
+
) -> None:
|
|
481
|
+
"""Change the type of a column in a table."""
|
|
482
|
+
col_schema = pa.ipc.read_schema(pa.BufferReader(column_definition)) # type: ignore[arg-type]
|
|
483
|
+
field = col_schema.field(0)
|
|
484
|
+
col_type = _arrow_type_to_sql(field.type)
|
|
485
|
+
sql = f"ALTER TABLE {_qn(schema_name, name)} ALTER COLUMN {_qi(field.name)} TYPE {col_type}"
|
|
486
|
+
if expression:
|
|
487
|
+
# expression comes from DuckDB's binder (serialized AST), not raw user input
|
|
488
|
+
sql += f" USING {expression}"
|
|
489
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, sql + ";")
|
|
490
|
+
|
|
491
|
+
def table_column_default_set(
|
|
492
|
+
self,
|
|
493
|
+
*,
|
|
494
|
+
attach_opaque_data: AttachOpaqueData,
|
|
495
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
496
|
+
schema_name: str,
|
|
497
|
+
name: str,
|
|
498
|
+
column_name: str,
|
|
499
|
+
expression: str,
|
|
500
|
+
ignore_not_found: bool,
|
|
501
|
+
) -> None:
|
|
502
|
+
"""Set the default expression for a column."""
|
|
503
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} ALTER COLUMN {_qi(column_name)} SET DEFAULT {expression};"
|
|
504
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
505
|
+
|
|
506
|
+
def table_column_default_drop(
|
|
507
|
+
self,
|
|
508
|
+
*,
|
|
509
|
+
attach_opaque_data: AttachOpaqueData,
|
|
510
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
511
|
+
schema_name: str,
|
|
512
|
+
name: str,
|
|
513
|
+
column_name: str,
|
|
514
|
+
ignore_not_found: bool,
|
|
515
|
+
) -> None:
|
|
516
|
+
"""Drop the default expression for a column."""
|
|
517
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} ALTER COLUMN {_qi(column_name)} DROP DEFAULT;"
|
|
518
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
519
|
+
|
|
520
|
+
def table_not_null_set(
|
|
521
|
+
self,
|
|
522
|
+
*,
|
|
523
|
+
attach_opaque_data: AttachOpaqueData,
|
|
524
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
525
|
+
schema_name: str,
|
|
526
|
+
name: str,
|
|
527
|
+
column_name: str,
|
|
528
|
+
ignore_not_found: bool,
|
|
529
|
+
) -> None:
|
|
530
|
+
"""Set NOT NULL constraint on a column."""
|
|
531
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} ALTER COLUMN {_qi(column_name)} SET NOT NULL;"
|
|
532
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
533
|
+
|
|
534
|
+
def table_not_null_drop(
|
|
535
|
+
self,
|
|
536
|
+
*,
|
|
537
|
+
attach_opaque_data: AttachOpaqueData,
|
|
538
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
539
|
+
schema_name: str,
|
|
540
|
+
name: str,
|
|
541
|
+
column_name: str,
|
|
542
|
+
ignore_not_found: bool,
|
|
543
|
+
) -> None:
|
|
544
|
+
"""Drop NOT NULL constraint from a column."""
|
|
545
|
+
ddl = f"ALTER TABLE {_qn(schema_name, name)} ALTER COLUMN {_qi(column_name)} DROP NOT NULL;"
|
|
546
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, ddl)
|
|
547
|
+
|
|
548
|
+
# ========== Schema discovery (merge static + dynamic) ==========
|
|
549
|
+
|
|
550
|
+
def schemas(
|
|
551
|
+
self,
|
|
552
|
+
*,
|
|
553
|
+
attach_opaque_data: AttachOpaqueData,
|
|
554
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
555
|
+
) -> list[SchemaInfo]:
|
|
556
|
+
"""List schemas — merge static catalog schemas with transactor schemas."""
|
|
557
|
+
static_schemas = super().schemas(
|
|
558
|
+
attach_opaque_data=attach_opaque_data, transaction_opaque_data=transaction_opaque_data
|
|
559
|
+
)
|
|
560
|
+
# Static ``Schema(tables=[])`` declaration would auto-populate
|
|
561
|
+
# ``estimated_object_count[table] = 0``, which the C++ client treats
|
|
562
|
+
# as a hard guarantee and skips the bulk RPC. But this catalog adds
|
|
563
|
+
# tables dynamically via the transactor, so the static zero is wrong.
|
|
564
|
+
# Strip estimated_object_count entirely — the client falls back to its
|
|
565
|
+
# default of 1 (unknown / eager-load), which restores correct lookups.
|
|
566
|
+
static_schemas = [
|
|
567
|
+
SchemaInfo(
|
|
568
|
+
attach_opaque_data=s.attach_opaque_data,
|
|
569
|
+
name=s.name,
|
|
570
|
+
comment=s.comment,
|
|
571
|
+
tags=s.tags,
|
|
572
|
+
estimated_object_count=None,
|
|
573
|
+
)
|
|
574
|
+
for s in static_schemas
|
|
575
|
+
]
|
|
576
|
+
static_names = {s.name.lower() for s in static_schemas}
|
|
577
|
+
|
|
578
|
+
if not transaction_opaque_data:
|
|
579
|
+
return static_schemas
|
|
580
|
+
|
|
581
|
+
try:
|
|
582
|
+
proxy = transactor_proxy._get_proxy()
|
|
583
|
+
dynamic_names = proxy.list_schemas(attach_opaque_data=attach_opaque_data, tx_id=transaction_opaque_data)
|
|
584
|
+
except Exception:
|
|
585
|
+
logger.debug("schemas: failed to list schemas from transactor")
|
|
586
|
+
return static_schemas
|
|
587
|
+
|
|
588
|
+
result = list(static_schemas)
|
|
589
|
+
for name in dynamic_names:
|
|
590
|
+
if name.lower() not in static_names:
|
|
591
|
+
result.append(
|
|
592
|
+
SchemaInfo(
|
|
593
|
+
attach_opaque_data=attach_opaque_data,
|
|
594
|
+
name=name,
|
|
595
|
+
comment=None,
|
|
596
|
+
tags={},
|
|
597
|
+
)
|
|
598
|
+
)
|
|
599
|
+
return result
|
|
600
|
+
|
|
601
|
+
def schema_get(
|
|
602
|
+
self,
|
|
603
|
+
*,
|
|
604
|
+
attach_opaque_data: AttachOpaqueData,
|
|
605
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
606
|
+
name: str,
|
|
607
|
+
) -> SchemaInfo | None:
|
|
608
|
+
"""Get schema info — check static first, then transactor."""
|
|
609
|
+
result = super().schema_get(
|
|
610
|
+
attach_opaque_data=attach_opaque_data, transaction_opaque_data=transaction_opaque_data, name=name
|
|
611
|
+
)
|
|
612
|
+
if result is not None:
|
|
613
|
+
return result
|
|
614
|
+
|
|
615
|
+
if not transaction_opaque_data:
|
|
616
|
+
return None
|
|
617
|
+
|
|
618
|
+
try:
|
|
619
|
+
proxy = transactor_proxy._get_proxy()
|
|
620
|
+
schema_names = proxy.list_schemas(attach_opaque_data=attach_opaque_data, tx_id=transaction_opaque_data)
|
|
621
|
+
except Exception:
|
|
622
|
+
return None
|
|
623
|
+
|
|
624
|
+
if name.lower() in {n.lower() for n in schema_names}:
|
|
625
|
+
return SchemaInfo(
|
|
626
|
+
attach_opaque_data=attach_opaque_data,
|
|
627
|
+
name=name,
|
|
628
|
+
comment=None,
|
|
629
|
+
tags={},
|
|
630
|
+
)
|
|
631
|
+
return None
|
|
632
|
+
|
|
633
|
+
# ========== DDL: Schema operations ==========
|
|
634
|
+
|
|
635
|
+
def schema_create(
|
|
636
|
+
self,
|
|
637
|
+
*,
|
|
638
|
+
attach_opaque_data: AttachOpaqueData,
|
|
639
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
640
|
+
name: str,
|
|
641
|
+
on_conflict: OnConflict = OnConflict.ERROR,
|
|
642
|
+
comment: str | None,
|
|
643
|
+
tags: dict[str, str] | None,
|
|
644
|
+
) -> None:
|
|
645
|
+
"""Create a new schema in the transactor's DuckDB database."""
|
|
646
|
+
if_not_exists = " IF NOT EXISTS" if on_conflict == OnConflict.IGNORE else ""
|
|
647
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, f"CREATE SCHEMA{if_not_exists} {_qi(name)};")
|
|
648
|
+
|
|
649
|
+
def schema_drop(
|
|
650
|
+
self,
|
|
651
|
+
*,
|
|
652
|
+
attach_opaque_data: AttachOpaqueData,
|
|
653
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
654
|
+
name: str,
|
|
655
|
+
ignore_not_found: bool,
|
|
656
|
+
cascade: bool,
|
|
657
|
+
) -> None:
|
|
658
|
+
"""Drop a schema from the transactor's DuckDB database."""
|
|
659
|
+
if_exists = " IF EXISTS" if ignore_not_found else ""
|
|
660
|
+
cascade_sql = " CASCADE" if cascade else ""
|
|
661
|
+
self._execute_ddl(
|
|
662
|
+
attach_opaque_data, transaction_opaque_data, f"DROP SCHEMA{if_exists} {_qi(name)}{cascade_sql};"
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# ========== DDL: View operations ==========
|
|
666
|
+
|
|
667
|
+
def view_create(
|
|
668
|
+
self,
|
|
669
|
+
*,
|
|
670
|
+
attach_opaque_data: AttachOpaqueData,
|
|
671
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
672
|
+
schema_name: str,
|
|
673
|
+
name: str,
|
|
674
|
+
definition: str,
|
|
675
|
+
on_conflict: OnConflict,
|
|
676
|
+
) -> None:
|
|
677
|
+
"""Create a new view in the transactor's DuckDB database."""
|
|
678
|
+
if_replace = " OR REPLACE" if on_conflict == OnConflict.REPLACE else ""
|
|
679
|
+
if_not_exists = " IF NOT EXISTS" if on_conflict == OnConflict.IGNORE else ""
|
|
680
|
+
sql = f"CREATE{if_replace} VIEW{if_not_exists} {_qn(schema_name, name)} AS {definition};"
|
|
681
|
+
self._execute_ddl(attach_opaque_data, transaction_opaque_data, sql)
|
|
682
|
+
|
|
683
|
+
def view_drop(
|
|
684
|
+
self,
|
|
685
|
+
*,
|
|
686
|
+
attach_opaque_data: AttachOpaqueData,
|
|
687
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
688
|
+
schema_name: str,
|
|
689
|
+
name: str,
|
|
690
|
+
ignore_not_found: bool,
|
|
691
|
+
cascade: bool = False,
|
|
692
|
+
) -> None:
|
|
693
|
+
"""Drop a view."""
|
|
694
|
+
if_exists = " IF EXISTS" if ignore_not_found else ""
|
|
695
|
+
cascade_sql = " CASCADE" if cascade else ""
|
|
696
|
+
self._execute_ddl(
|
|
697
|
+
attach_opaque_data, transaction_opaque_data, f"DROP VIEW{if_exists} {_qn(schema_name, name)}{cascade_sql};"
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
def view_rename(
|
|
701
|
+
self,
|
|
702
|
+
*,
|
|
703
|
+
attach_opaque_data: AttachOpaqueData,
|
|
704
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
705
|
+
schema_name: str,
|
|
706
|
+
name: str,
|
|
707
|
+
new_name: str,
|
|
708
|
+
ignore_not_found: bool,
|
|
709
|
+
) -> None:
|
|
710
|
+
"""Rename a view."""
|
|
711
|
+
self._execute_ddl(
|
|
712
|
+
attach_opaque_data,
|
|
713
|
+
transaction_opaque_data,
|
|
714
|
+
f"ALTER VIEW {_qn(schema_name, name)} RENAME TO {_qi(new_name)};",
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
def view_comment_set(
|
|
718
|
+
self,
|
|
719
|
+
*,
|
|
720
|
+
attach_opaque_data: AttachOpaqueData,
|
|
721
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
722
|
+
schema_name: str,
|
|
723
|
+
name: str,
|
|
724
|
+
comment: str | None,
|
|
725
|
+
ignore_not_found: bool,
|
|
726
|
+
) -> None:
|
|
727
|
+
"""Set or clear the comment on a view."""
|
|
728
|
+
self._execute_ddl(
|
|
729
|
+
attach_opaque_data, transaction_opaque_data, _comment_sql(f"VIEW {_qn(schema_name, name)}", comment)
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
# ========== Dynamic view discovery ==========
|
|
733
|
+
|
|
734
|
+
def view_get(
|
|
735
|
+
self,
|
|
736
|
+
*,
|
|
737
|
+
attach_opaque_data: AttachOpaqueData,
|
|
738
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
739
|
+
schema_name: str,
|
|
740
|
+
name: str,
|
|
741
|
+
) -> ViewInfo | None:
|
|
742
|
+
"""Get view info — check static catalog first, then transactor."""
|
|
743
|
+
result = super().view_get(
|
|
744
|
+
attach_opaque_data=attach_opaque_data,
|
|
745
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
746
|
+
schema_name=schema_name,
|
|
747
|
+
name=name,
|
|
748
|
+
)
|
|
749
|
+
if result is not None:
|
|
750
|
+
return result
|
|
751
|
+
|
|
752
|
+
if not transaction_opaque_data:
|
|
753
|
+
return None
|
|
754
|
+
|
|
755
|
+
try:
|
|
756
|
+
proxy = transactor_proxy._get_proxy()
|
|
757
|
+
import json
|
|
758
|
+
|
|
759
|
+
info_json = proxy.view_info(
|
|
760
|
+
attach_opaque_data=attach_opaque_data, view_name=name, tx_id=transaction_opaque_data
|
|
761
|
+
)
|
|
762
|
+
info = json.loads(info_json)
|
|
763
|
+
except ValueError:
|
|
764
|
+
logger.debug("view_get: dynamic view '%s' not found in transactor", name)
|
|
765
|
+
return None
|
|
766
|
+
except Exception:
|
|
767
|
+
logger.warning("view_get: unexpected error for dynamic view '%s'", name, exc_info=True)
|
|
768
|
+
return None
|
|
769
|
+
|
|
770
|
+
return ViewInfo(
|
|
771
|
+
name=name,
|
|
772
|
+
schema_name=schema_name,
|
|
773
|
+
definition=info["definition"],
|
|
774
|
+
comment=info.get("comment"),
|
|
775
|
+
tags={},
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
# ========== Dynamic table discovery ==========
|
|
779
|
+
|
|
780
|
+
def table_get(
|
|
781
|
+
self,
|
|
782
|
+
*,
|
|
783
|
+
attach_opaque_data: AttachOpaqueData,
|
|
784
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
785
|
+
schema_name: str,
|
|
786
|
+
name: str,
|
|
787
|
+
at_unit: str | None = None,
|
|
788
|
+
at_value: str | None = None,
|
|
789
|
+
) -> TableInfo | None:
|
|
790
|
+
"""Get table info — check static catalog first, then transactor."""
|
|
791
|
+
# Try static catalog first
|
|
792
|
+
result = super().table_get(
|
|
793
|
+
attach_opaque_data=attach_opaque_data,
|
|
794
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
795
|
+
schema_name=schema_name,
|
|
796
|
+
name=name,
|
|
797
|
+
at_unit=at_unit,
|
|
798
|
+
at_value=at_value,
|
|
799
|
+
)
|
|
800
|
+
if result is not None:
|
|
801
|
+
return result
|
|
802
|
+
|
|
803
|
+
# Query transactor for dynamic table (requires transaction context)
|
|
804
|
+
if not transaction_opaque_data:
|
|
805
|
+
return None
|
|
806
|
+
try:
|
|
807
|
+
proxy = transactor_proxy._get_proxy()
|
|
808
|
+
# Use schema-qualified name for non-default schemas
|
|
809
|
+
# Don't quote — the transactor handles its own quoting internally
|
|
810
|
+
tx_table_name = f"{schema_name}.{name}" if schema_name else name
|
|
811
|
+
schema_bytes = proxy.table_schema(
|
|
812
|
+
attach_opaque_data=attach_opaque_data,
|
|
813
|
+
table_name=tx_table_name,
|
|
814
|
+
tx_id=transaction_opaque_data,
|
|
815
|
+
)
|
|
816
|
+
table_schema = pa.ipc.read_schema(pa.BufferReader(schema_bytes)) # type: ignore[arg-type]
|
|
817
|
+
except ValueError:
|
|
818
|
+
logger.debug("table_get: dynamic table '%s' not found in transactor", name)
|
|
819
|
+
return None
|
|
820
|
+
except Exception:
|
|
821
|
+
logger.warning("table_get: unexpected error for dynamic table '%s'", name, exc_info=True)
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
# Build TableInfo from the transactor schema.
|
|
825
|
+
# The columns schema includes rowid with is_row_id metadata from the transactor.
|
|
826
|
+
# Constraints are embedded in schema-level metadata as JSON.
|
|
827
|
+
serialized = SerializedSchema(table_schema.serialize().to_pybytes())
|
|
828
|
+
|
|
829
|
+
# Parse constraints from schema-level metadata
|
|
830
|
+
not_null_constraints: list[int] = []
|
|
831
|
+
unique_constraints: list[list[int]] = []
|
|
832
|
+
check_constraints: list[str] = []
|
|
833
|
+
primary_key_constraints: list[list[int]] = []
|
|
834
|
+
foreign_key_constraints: list[bytes] = []
|
|
835
|
+
|
|
836
|
+
schema_meta = table_schema.metadata or {}
|
|
837
|
+
if b"vgi.constraints" in schema_meta:
|
|
838
|
+
import json
|
|
839
|
+
|
|
840
|
+
from vgi_rpc.utils import serialize_record_batch_bytes
|
|
841
|
+
|
|
842
|
+
constraints = json.loads(schema_meta[b"vgi.constraints"].decode("utf-8"))
|
|
843
|
+
# Build column name → index map in Arrow schema space (including rowid at index 0).
|
|
844
|
+
# Constraint indices must be in Arrow space so the C++ adjust_col lambda
|
|
845
|
+
# can correctly shift them to physical space (excluding rowid).
|
|
846
|
+
col_index = {f.name: i for i, f in enumerate(table_schema) if f.name != "rowid"}
|
|
847
|
+
for c in constraints:
|
|
848
|
+
ctype = c["type"]
|
|
849
|
+
cols = c.get("columns") or []
|
|
850
|
+
text = c.get("text") or ""
|
|
851
|
+
col_indices = [col_index[cn] for cn in cols if cn in col_index]
|
|
852
|
+
if ctype == "NOT NULL":
|
|
853
|
+
not_null_constraints.extend(col_indices)
|
|
854
|
+
elif ctype == "UNIQUE":
|
|
855
|
+
unique_constraints.append(col_indices)
|
|
856
|
+
elif ctype == "PRIMARY KEY":
|
|
857
|
+
primary_key_constraints.append(col_indices)
|
|
858
|
+
elif ctype == "CHECK":
|
|
859
|
+
# Extract the check expression from constraint_text
|
|
860
|
+
# Format: "CHECK(expr)"
|
|
861
|
+
if text.startswith("CHECK(") and text.endswith(")"):
|
|
862
|
+
check_constraints.append(text[6:-1])
|
|
863
|
+
elif text:
|
|
864
|
+
check_constraints.append(text)
|
|
865
|
+
elif ctype == "FOREIGN KEY":
|
|
866
|
+
ref_table = c.get("referenced_table") or ""
|
|
867
|
+
ref_cols = c.get("referenced_columns") or []
|
|
868
|
+
if ref_table and cols and ref_cols:
|
|
869
|
+
fk_batch = pa.RecordBatch.from_pydict(
|
|
870
|
+
{
|
|
871
|
+
"fk_columns": [list(cols)],
|
|
872
|
+
"pk_columns": [list(ref_cols)],
|
|
873
|
+
"referenced_table": [ref_table],
|
|
874
|
+
"referenced_schema": [schema_name],
|
|
875
|
+
},
|
|
876
|
+
schema=pa.schema(
|
|
877
|
+
[
|
|
878
|
+
("fk_columns", pa.list_(pa.utf8())),
|
|
879
|
+
("pk_columns", pa.list_(pa.utf8())),
|
|
880
|
+
("referenced_table", pa.utf8()),
|
|
881
|
+
("referenced_schema", pa.utf8()),
|
|
882
|
+
]
|
|
883
|
+
),
|
|
884
|
+
)
|
|
885
|
+
foreign_key_constraints.append(serialize_record_batch_bytes(fk_batch))
|
|
886
|
+
|
|
887
|
+
# Also fetch the table comment
|
|
888
|
+
try:
|
|
889
|
+
table_comment = proxy.table_comment(
|
|
890
|
+
attach_opaque_data=attach_opaque_data, table_name=name, tx_id=transaction_opaque_data
|
|
891
|
+
)
|
|
892
|
+
except Exception:
|
|
893
|
+
table_comment = None
|
|
894
|
+
|
|
895
|
+
return TableInfo(
|
|
896
|
+
name=name,
|
|
897
|
+
schema_name=schema_name,
|
|
898
|
+
columns=serialized,
|
|
899
|
+
not_null_constraints=not_null_constraints,
|
|
900
|
+
unique_constraints=unique_constraints,
|
|
901
|
+
check_constraints=check_constraints,
|
|
902
|
+
primary_key_constraints=primary_key_constraints,
|
|
903
|
+
foreign_key_constraints=foreign_key_constraints,
|
|
904
|
+
supports_insert=True,
|
|
905
|
+
supports_update=True,
|
|
906
|
+
supports_delete=True,
|
|
907
|
+
supports_returning=True,
|
|
908
|
+
comment=table_comment,
|
|
909
|
+
tags={},
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
def _merge_dynamic_contents(
|
|
913
|
+
self,
|
|
914
|
+
*,
|
|
915
|
+
attach_opaque_data: AttachOpaqueData,
|
|
916
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
917
|
+
schema_name: str,
|
|
918
|
+
type: SchemaObjectType,
|
|
919
|
+
info_type: type,
|
|
920
|
+
list_method: str,
|
|
921
|
+
get_method: str,
|
|
922
|
+
) -> list[Any]:
|
|
923
|
+
"""Merge static catalog contents with dynamic entries from the transactor."""
|
|
924
|
+
static_results: list[Any] = list(
|
|
925
|
+
super().schema_contents( # type: ignore[call-overload]
|
|
926
|
+
attach_opaque_data=attach_opaque_data,
|
|
927
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
928
|
+
name=schema_name,
|
|
929
|
+
type=type,
|
|
930
|
+
)
|
|
931
|
+
)
|
|
932
|
+
static_names = {
|
|
933
|
+
r.name.lower() # type: ignore[attr-defined]
|
|
934
|
+
for r in static_results
|
|
935
|
+
if isinstance(r, info_type)
|
|
936
|
+
}
|
|
937
|
+
try:
|
|
938
|
+
proxy = transactor_proxy._get_proxy()
|
|
939
|
+
if transaction_opaque_data:
|
|
940
|
+
dynamic_names = getattr(proxy, list_method)(
|
|
941
|
+
attach_opaque_data=attach_opaque_data,
|
|
942
|
+
tx_id=transaction_opaque_data,
|
|
943
|
+
schema_name=schema_name,
|
|
944
|
+
)
|
|
945
|
+
else:
|
|
946
|
+
dynamic_names = []
|
|
947
|
+
except ValueError:
|
|
948
|
+
dynamic_names = []
|
|
949
|
+
except Exception:
|
|
950
|
+
logger.warning("schema_contents: error listing dynamic %s", type, exc_info=True)
|
|
951
|
+
dynamic_names = []
|
|
952
|
+
for item_name in dynamic_names:
|
|
953
|
+
if item_name.lower() not in static_names:
|
|
954
|
+
item = getattr(self, get_method)(
|
|
955
|
+
attach_opaque_data=attach_opaque_data,
|
|
956
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
957
|
+
schema_name=schema_name,
|
|
958
|
+
name=item_name,
|
|
959
|
+
)
|
|
960
|
+
if item is not None:
|
|
961
|
+
static_results.append(item)
|
|
962
|
+
return static_results
|
|
963
|
+
|
|
964
|
+
@overload
|
|
965
|
+
def schema_contents(
|
|
966
|
+
self,
|
|
967
|
+
*,
|
|
968
|
+
attach_opaque_data: AttachOpaqueData,
|
|
969
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
970
|
+
name: str,
|
|
971
|
+
type: Literal[SchemaObjectType.TABLE],
|
|
972
|
+
) -> Sequence[TableInfo]: ...
|
|
973
|
+
|
|
974
|
+
@overload
|
|
975
|
+
def schema_contents(
|
|
976
|
+
self,
|
|
977
|
+
*,
|
|
978
|
+
attach_opaque_data: AttachOpaqueData,
|
|
979
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
980
|
+
name: str,
|
|
981
|
+
type: Literal[SchemaObjectType.VIEW],
|
|
982
|
+
) -> Sequence[ViewInfo]: ...
|
|
983
|
+
|
|
984
|
+
@overload
|
|
985
|
+
def schema_contents(
|
|
986
|
+
self,
|
|
987
|
+
*,
|
|
988
|
+
attach_opaque_data: AttachOpaqueData,
|
|
989
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
990
|
+
name: str,
|
|
991
|
+
type: Literal[
|
|
992
|
+
SchemaObjectType.SCALAR_FUNCTION,
|
|
993
|
+
SchemaObjectType.TABLE_FUNCTION,
|
|
994
|
+
SchemaObjectType.AGGREGATE_FUNCTION,
|
|
995
|
+
],
|
|
996
|
+
) -> Sequence[FunctionInfo]: ...
|
|
997
|
+
|
|
998
|
+
@overload
|
|
999
|
+
def schema_contents(
|
|
1000
|
+
self,
|
|
1001
|
+
*,
|
|
1002
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1003
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1004
|
+
name: str,
|
|
1005
|
+
type: Literal[SchemaObjectType.SCALAR_MACRO, SchemaObjectType.TABLE_MACRO],
|
|
1006
|
+
) -> Sequence[MacroInfo]: ...
|
|
1007
|
+
|
|
1008
|
+
@overload
|
|
1009
|
+
def schema_contents(
|
|
1010
|
+
self,
|
|
1011
|
+
*,
|
|
1012
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1013
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1014
|
+
name: str,
|
|
1015
|
+
type: Literal[SchemaObjectType.INDEX],
|
|
1016
|
+
) -> Sequence[IndexInfo]: ...
|
|
1017
|
+
|
|
1018
|
+
def schema_contents(
|
|
1019
|
+
self,
|
|
1020
|
+
*,
|
|
1021
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1022
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1023
|
+
name: str,
|
|
1024
|
+
type: SchemaObjectType,
|
|
1025
|
+
) -> Sequence[TableInfo | ViewInfo | FunctionInfo | MacroInfo | IndexInfo]:
|
|
1026
|
+
"""List schema contents, merging static + dynamic entries."""
|
|
1027
|
+
type_enum = type if isinstance(type, SchemaObjectType) else SchemaObjectType(type)
|
|
1028
|
+
|
|
1029
|
+
if type_enum == SchemaObjectType.TABLE:
|
|
1030
|
+
return self._merge_dynamic_contents(
|
|
1031
|
+
attach_opaque_data=attach_opaque_data,
|
|
1032
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
1033
|
+
schema_name=name,
|
|
1034
|
+
type=type,
|
|
1035
|
+
info_type=TableInfo,
|
|
1036
|
+
list_method="list_user_tables",
|
|
1037
|
+
get_method="table_get",
|
|
1038
|
+
)
|
|
1039
|
+
if type_enum == SchemaObjectType.VIEW:
|
|
1040
|
+
return self._merge_dynamic_contents(
|
|
1041
|
+
attach_opaque_data=attach_opaque_data,
|
|
1042
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
1043
|
+
schema_name=name,
|
|
1044
|
+
type=type,
|
|
1045
|
+
info_type=ViewInfo,
|
|
1046
|
+
list_method="list_user_views",
|
|
1047
|
+
get_method="view_get",
|
|
1048
|
+
)
|
|
1049
|
+
return super().schema_contents( # type: ignore[call-overload, no-any-return]
|
|
1050
|
+
attach_opaque_data=attach_opaque_data,
|
|
1051
|
+
transaction_opaque_data=transaction_opaque_data,
|
|
1052
|
+
name=name,
|
|
1053
|
+
type=type,
|
|
1054
|
+
)
|
|
1055
|
+
|
|
1056
|
+
# ========== Dynamic scan/write function dispatch ==========
|
|
1057
|
+
|
|
1058
|
+
def _function_get(self, kind: str, *, schema_name: str, name: str, **kwargs: Any) -> ScanFunctionResult:
|
|
1059
|
+
"""Dispatch all tables to generic functions."""
|
|
1060
|
+
qualified = f"{schema_name}.{name}" if schema_name else name
|
|
1061
|
+
return ScanFunctionResult(
|
|
1062
|
+
function_name=f"generic_writable_{kind}",
|
|
1063
|
+
positional_arguments=[pa.scalar(qualified)],
|
|
1064
|
+
named_arguments={},
|
|
1065
|
+
)
|
|
1066
|
+
|
|
1067
|
+
def table_scan_function_get(
|
|
1068
|
+
self,
|
|
1069
|
+
*,
|
|
1070
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1071
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1072
|
+
schema_name: str,
|
|
1073
|
+
name: str,
|
|
1074
|
+
at_unit: str | None,
|
|
1075
|
+
at_value: str | None,
|
|
1076
|
+
) -> ScanFunctionResult:
|
|
1077
|
+
"""Return the generic scan function for a table."""
|
|
1078
|
+
return self._function_get("scan", schema_name=schema_name, name=name)
|
|
1079
|
+
|
|
1080
|
+
def table_insert_function_get(
|
|
1081
|
+
self,
|
|
1082
|
+
*,
|
|
1083
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1084
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1085
|
+
schema_name: str,
|
|
1086
|
+
name: str,
|
|
1087
|
+
writable_branch_function_name: str | None = None,
|
|
1088
|
+
) -> ScanFunctionResult:
|
|
1089
|
+
"""Return the generic insert function for a table."""
|
|
1090
|
+
# writable_branch_function_name is unused — the WritableCatalog is
|
|
1091
|
+
# single-branch only.
|
|
1092
|
+
del writable_branch_function_name
|
|
1093
|
+
return self._function_get("insert", schema_name=schema_name, name=name)
|
|
1094
|
+
|
|
1095
|
+
def table_update_function_get(
|
|
1096
|
+
self,
|
|
1097
|
+
*,
|
|
1098
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1099
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1100
|
+
schema_name: str,
|
|
1101
|
+
name: str,
|
|
1102
|
+
) -> ScanFunctionResult:
|
|
1103
|
+
"""Return the generic update function for a table."""
|
|
1104
|
+
return self._function_get("update", schema_name=schema_name, name=name)
|
|
1105
|
+
|
|
1106
|
+
def table_delete_function_get(
|
|
1107
|
+
self,
|
|
1108
|
+
*,
|
|
1109
|
+
attach_opaque_data: AttachOpaqueData,
|
|
1110
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
1111
|
+
schema_name: str,
|
|
1112
|
+
name: str,
|
|
1113
|
+
) -> ScanFunctionResult:
|
|
1114
|
+
"""Return the generic delete function for a table."""
|
|
1115
|
+
return self._function_get("delete", schema_name=schema_name, name=name)
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
class WritableWorker(Worker):
|
|
1119
|
+
"""Worker with transactional writable tables and DDL support.
|
|
1120
|
+
|
|
1121
|
+
Exposes writable_data, writable_products, and writable_orders tables
|
|
1122
|
+
via the WritableCatalog. Also supports CREATE TABLE, DROP TABLE, and
|
|
1123
|
+
ALTER TABLE for dynamically created tables.
|
|
1124
|
+
"""
|
|
1125
|
+
|
|
1126
|
+
catalog_interface = WritableCatalog
|
|
1127
|
+
catalog = _WRITABLE_CATALOG
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
def main() -> None:
|
|
1131
|
+
"""Run the writable worker process."""
|
|
1132
|
+
# Surface a friendly install message instead of crashing mid-DDL
|
|
1133
|
+
# when sqlglot (the transactor's lazy dep) is missing.
|
|
1134
|
+
try:
|
|
1135
|
+
import sqlglot # noqa: F401
|
|
1136
|
+
except ImportError:
|
|
1137
|
+
import sys as _sys
|
|
1138
|
+
|
|
1139
|
+
_sys.exit(
|
|
1140
|
+
"vgi-fixture-writable-worker requires the test-fixtures-writable extra. "
|
|
1141
|
+
"Install with: pip install 'vgi-python[test-fixtures-writable]'"
|
|
1142
|
+
)
|
|
1143
|
+
|
|
1144
|
+
WritableWorker.main()
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
if __name__ == "__main__":
|
|
1148
|
+
main()
|