vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,8 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Writable test-fixture worker and helpers.
4
+
5
+ These fixtures depend on ``sqlglot`` (via ``vgi.transactor``) and live behind
6
+ the ``vgi[test-fixtures-writable]`` extra. Tests that exercise the write
7
+ subsystem (INSERT/UPDATE/DELETE/DDL) import from here.
8
+ """
@@ -0,0 +1,236 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Generic writable functions parameterized by table_name.
4
+
5
+ These functions extend the base writable scan/insert/update/delete classes
6
+ from ``writable_table`` but determine the table name dynamically from the
7
+ first positional argument instead of a hardcoded class variable. This allows
8
+ the same function classes to serve any table in the transactor's DuckDB
9
+ database, which is essential for DDL-created tables.
10
+
11
+ The table_name is passed as the first positional argument via the
12
+ ``ScanFunctionResult`` when the catalog dispatches scan/insert/update/delete
13
+ for dynamically discovered tables.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any
19
+
20
+ import pyarrow as pa
21
+ from vgi_rpc import AnnotatedBatch
22
+ from vgi_rpc.rpc import OutputCollector
23
+
24
+ from vgi._test_fixtures.writable.table import (
25
+ _COUNT_SCHEMA,
26
+ WritableScanState,
27
+ _get_attach_opaque_data,
28
+ _get_pushdown_filters,
29
+ _get_tx_id,
30
+ _is_returning,
31
+ transactor_proxy,
32
+ )
33
+ from vgi.invocation import BindResponse, GlobalInitResponse
34
+ from vgi.table_function import BindParams, InitParams, ProcessParams, TableFunctionGenerator
35
+ from vgi.table_in_out_function import TableInOutGenerator
36
+
37
+ __all__ = [
38
+ "GenericTableDelete",
39
+ "GenericTableInsert",
40
+ "GenericTableScan",
41
+ "GenericTableUpdate",
42
+ ]
43
+
44
+
45
+ def _get_table_name_from_bind(params: BindParams[None]) -> str:
46
+ """Extract the table name from the first positional argument at bind time."""
47
+ args = params.bind_call.arguments
48
+ if not args.positional or args.positional[0] is None:
49
+ msg = "table_name positional argument is required"
50
+ raise ValueError(msg)
51
+ return str(args.positional[0].as_py())
52
+
53
+
54
+ def _get_table_name_from_process(params: ProcessParams[None]) -> str:
55
+ """Extract the table name from the first positional argument at process time."""
56
+ assert params.init_call is not None
57
+ args = params.init_call.bind_call.arguments
58
+ if not args.positional or args.positional[0] is None:
59
+ msg = "table_name positional argument is required"
60
+ raise ValueError(msg)
61
+ return str(args.positional[0].as_py())
62
+
63
+
64
+ def _get_table_schema_from_transactor(table_name: str, attach_opaque_data: bytes, tx_id: bytes) -> pa.Schema:
65
+ """Query the transactor for the table's Arrow schema (returned as IPC bytes)."""
66
+ proxy = transactor_proxy._get_proxy()
67
+ schema_bytes = proxy.table_schema(attach_opaque_data=attach_opaque_data, table_name=table_name, tx_id=tx_id)
68
+ return pa.ipc.read_schema(pa.BufferReader(schema_bytes)) # type: ignore[arg-type]
69
+
70
+
71
+ # ============================================================================
72
+ # Generic scan — dynamic table name + schema from transactor
73
+ # ============================================================================
74
+
75
+
76
+ class GenericTableScan(TableFunctionGenerator[None, WritableScanState]):
77
+ """Scan function for any table — determines table name from first positional arg."""
78
+
79
+ class Meta:
80
+ """Metadata for GenericTableScan."""
81
+
82
+ name = "generic_writable_scan"
83
+ projection_pushdown = True
84
+ filter_pushdown = True
85
+
86
+ @classmethod
87
+ def on_bind(cls, params: BindParams[None]) -> BindResponse:
88
+ """Bind: query transactor for table schema (already includes rowid)."""
89
+ table_name = _get_table_name_from_bind(params)
90
+ attach_opaque_data = params.attach_opaque_data # unwrapped plaintext
91
+ tx_id = params.bind_call.transaction_opaque_data
92
+ assert attach_opaque_data is not None and tx_id is not None
93
+ table_schema = _get_table_schema_from_transactor(table_name, attach_opaque_data, tx_id)
94
+ return BindResponse(output_schema=table_schema)
95
+
96
+ @classmethod
97
+ def on_init(cls, params: InitParams[None]) -> GlobalInitResponse:
98
+ """Limit to a single worker."""
99
+ return GlobalInitResponse(max_workers=1)
100
+
101
+ @classmethod
102
+ def initial_state(cls, params: ProcessParams[None]) -> WritableScanState:
103
+ """Open the transactor scan stream once before processing begins."""
104
+ table_name = _get_table_name_from_process(params)
105
+ attach_opaque_data = _get_attach_opaque_data(params)
106
+ tx_id = _get_tx_id(params)
107
+ proxy = transactor_proxy._get_proxy()
108
+ columns = list(params.output_schema.names)
109
+ scan_iter = iter(
110
+ proxy.scan(
111
+ attach_opaque_data=attach_opaque_data,
112
+ tx_id=tx_id,
113
+ schema_name="",
114
+ table_name=table_name,
115
+ columns=columns,
116
+ pushdown_filters=_get_pushdown_filters(params),
117
+ )
118
+ )
119
+ return WritableScanState(scan_iter=scan_iter)
120
+
121
+ @classmethod
122
+ def process(cls, params: ProcessParams[None], state: WritableScanState, out: OutputCollector) -> None:
123
+ """Read the next batch from the scan stream."""
124
+ assert state.scan_iter is not None
125
+ try:
126
+ batch = next(state.scan_iter)
127
+ out.emit(batch.batch)
128
+ except StopIteration:
129
+ out.finish()
130
+
131
+
132
+ # ============================================================================
133
+ # Generic write base — shared INSERT/UPDATE/DELETE logic
134
+ # ============================================================================
135
+
136
+
137
+ class _GenericWriteBase(TableInOutGenerator[None, None]):
138
+ """Base for generic write handlers. Subclasses set _operation."""
139
+
140
+ _operation: str # "insert" | "update" | "delete"
141
+
142
+ @classmethod
143
+ def on_bind(cls, params: BindParams[None]) -> BindResponse:
144
+ """Bind: query transactor for table schema to use for RETURNING."""
145
+ table_name = _get_table_name_from_bind(params)
146
+ if _is_returning(params):
147
+ attach_opaque_data = params.attach_opaque_data # unwrapped plaintext
148
+ tx_id = params.bind_call.transaction_opaque_data
149
+ assert attach_opaque_data is not None and tx_id is not None
150
+ table_schema = _get_table_schema_from_transactor(table_name, attach_opaque_data, tx_id)
151
+ user_fields = [f for f in table_schema if f.name not in ("rowid", "row_id")]
152
+ return BindResponse(output_schema=pa.schema(user_fields))
153
+ return BindResponse(output_schema=_COUNT_SCHEMA)
154
+
155
+ @classmethod
156
+ def _open_stream(
157
+ cls,
158
+ proxy: Any,
159
+ attach_opaque_data: bytes,
160
+ tx_id: bytes,
161
+ table_name: str,
162
+ returning: bool,
163
+ batch: pa.RecordBatch,
164
+ ) -> Any:
165
+ """Open a write stream. Override for operations needing extra args."""
166
+ return getattr(proxy, cls._operation)(
167
+ attach_opaque_data=attach_opaque_data,
168
+ tx_id=tx_id,
169
+ table_name=table_name,
170
+ returning=returning,
171
+ )
172
+
173
+ @classmethod
174
+ def process(cls, params: ProcessParams[None], state: None, batch: pa.RecordBatch, out: OutputCollector) -> None:
175
+ """Forward batch to transactor write stream."""
176
+ table_name = _get_table_name_from_process(params)
177
+ attach_opaque_data = _get_attach_opaque_data(params)
178
+ tx_id = _get_tx_id(params)
179
+ returning = params.output_schema != _COUNT_SCHEMA
180
+ proxy = transactor_proxy._get_proxy()
181
+ with cls._open_stream(proxy, attach_opaque_data, tx_id, table_name, returning, batch) as stream:
182
+ response = stream.exchange(AnnotatedBatch(batch=batch))
183
+ out.emit(response.batch)
184
+
185
+
186
+ class GenericTableInsert(_GenericWriteBase):
187
+ """INSERT handler for any table — determines table name from first positional arg."""
188
+
189
+ _operation = "insert"
190
+
191
+ class Meta:
192
+ """Metadata for GenericTableInsert."""
193
+
194
+ name = "generic_writable_insert"
195
+
196
+
197
+ class GenericTableUpdate(_GenericWriteBase):
198
+ """UPDATE handler for any table — determines table name from first positional arg."""
199
+
200
+ _operation = "update"
201
+
202
+ class Meta:
203
+ """Metadata for GenericTableUpdate."""
204
+
205
+ name = "generic_writable_update"
206
+
207
+ @classmethod
208
+ def _open_stream(
209
+ cls,
210
+ proxy: Any,
211
+ attach_opaque_data: bytes,
212
+ tx_id: bytes,
213
+ table_name: str,
214
+ returning: bool,
215
+ batch: pa.RecordBatch,
216
+ ) -> Any:
217
+ """Open an update stream with column list derived from the batch."""
218
+ update_cols = [name for name in batch.schema.names if name != "rowid"]
219
+ return proxy.update(
220
+ attach_opaque_data=attach_opaque_data,
221
+ tx_id=tx_id,
222
+ table_name=table_name,
223
+ columns=update_cols,
224
+ returning=returning,
225
+ )
226
+
227
+
228
+ class GenericTableDelete(_GenericWriteBase):
229
+ """DELETE handler for any table — determines table name from first positional arg."""
230
+
231
+ _operation = "delete"
232
+
233
+ class Meta:
234
+ """Metadata for GenericTableDelete."""
235
+
236
+ name = "generic_writable_delete"
@@ -0,0 +1,149 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Writable table infrastructure — transactor proxy and shared helpers.
4
+
5
+ Provides the ``TransactorProxy`` for connecting to the db-transactor subprocess,
6
+ and helper functions used by the generic writable functions in ``writable_generic.py``.
7
+ All tables are created dynamically via CREATE TABLE DDL at the client side.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Iterator
13
+ from dataclasses import dataclass
14
+ from typing import TYPE_CHECKING, Annotated
15
+
16
+ import pyarrow as pa
17
+ from vgi_rpc import AnnotatedBatch, ArrowSerializableDataclass, Transient
18
+
19
+ from vgi.schema_utils import schema
20
+ from vgi.table_function import BindParams, ProcessParams
21
+
22
+ if TYPE_CHECKING:
23
+ from vgi.protocol import BindRequest
24
+
25
+ from vgi.transactor.client import TransactorClient
26
+ from vgi.transactor.protocol import TransactorProtocol
27
+
28
+ __all__ = [
29
+ "TransactorProxy",
30
+ "WritableScanState",
31
+ "transactor_proxy",
32
+ ]
33
+
34
+ # Output schema for write functions returning affected row counts.
35
+ _COUNT_SCHEMA = schema(count=pa.int64())
36
+
37
+ # DuckDB's native rowid pseudocolumn, marked with is_row_id metadata so the
38
+ # C++ extension knows which column carries the physical row identifier.
39
+ _ROWID_FIELD = pa.field("rowid", pa.int64(), metadata={b"is_row_id": b""})
40
+
41
+
42
+ def _parse_write_options(bind_call: BindRequest) -> dict[str, bool | str | list[str]]:
43
+ """Parse the write_options RecordBatch from the bind call's named arguments."""
44
+ defaults: dict[str, bool | str | list[str]] = {
45
+ "return_chunks": False,
46
+ "on_conflict": "throw",
47
+ "on_conflict_columns": [],
48
+ }
49
+ if not (bind_call.arguments and bind_call.arguments.named):
50
+ return defaults
51
+ val = bind_call.arguments.named.get("write_options")
52
+ if val is None:
53
+ return defaults
54
+ from vgi_rpc.utils import deserialize_record_batch
55
+
56
+ options_bytes = val.as_py()
57
+ batch, _ = deserialize_record_batch(options_bytes)
58
+ result = dict(defaults)
59
+ if "return_chunks" in batch.schema.names:
60
+ result["return_chunks"] = batch.column("return_chunks")[0].as_py()
61
+ if "on_conflict" in batch.schema.names:
62
+ result["on_conflict"] = batch.column("on_conflict")[0].as_py()
63
+ if "on_conflict_columns" in batch.schema.names:
64
+ result["on_conflict_columns"] = batch.column("on_conflict_columns")[0].as_py()
65
+ return result
66
+
67
+
68
+ def _is_returning(params: BindParams[None]) -> bool:
69
+ """Check if the C++ operator requested RETURNING rows."""
70
+ opts = _parse_write_options(params.bind_call)
71
+ return bool(opts.get("return_chunks", False))
72
+
73
+
74
+ def _get_tx_id(params: ProcessParams[None]) -> bytes:
75
+ """Get transaction_opaque_data from the bind request."""
76
+ assert params.init_call is not None
77
+ tx_id = params.init_call.bind_call.transaction_opaque_data
78
+ if tx_id:
79
+ return tx_id
80
+ msg = "transaction_opaque_data is required but was not provided in the bind request"
81
+ raise ValueError(msg)
82
+
83
+
84
+ def _get_attach_opaque_data(params: ProcessParams[None]) -> bytes:
85
+ """Get the unwrapped plaintext attach (storage shards on the sealed form)."""
86
+ attach_opaque_data = params.attach_opaque_data
87
+ if attach_opaque_data:
88
+ return attach_opaque_data
89
+ msg = "attach_opaque_data is required but was not provided in the bind request"
90
+ raise ValueError(msg)
91
+
92
+
93
+ def _get_pushdown_filters(params: ProcessParams[None]) -> bytes | None:
94
+ """Get pushdown_filters as serialized IPC bytes from params (or None)."""
95
+ assert params.init_call is not None
96
+ pf_batch = params.init_call.pushdown_filters
97
+ if pf_batch is None:
98
+ return None
99
+ sink = pa.BufferOutputStream()
100
+ writer = pa.ipc.new_stream(sink, pf_batch.schema)
101
+ writer.write_batch(pf_batch)
102
+ writer.close()
103
+ return sink.getvalue().to_pybytes()
104
+
105
+
106
+ @dataclass(kw_only=True)
107
+ class WritableScanState(ArrowSerializableDataclass):
108
+ """State for writable table scans — holds the live transactor scan iterator."""
109
+
110
+ scan_iter: Annotated[Iterator[AnnotatedBatch] | None, Transient()] = None
111
+
112
+
113
+ # ============================================================================
114
+ # TransactorProxy — manages the db-transactor connection
115
+ # ============================================================================
116
+
117
+
118
+ class TransactorProxy:
119
+ """Manages connections to the shared db-transactor subprocess.
120
+
121
+ The transactor manages multiple databases internally (one per attach_opaque_data).
122
+ DDL statements are run during register() for each new catalog attachment.
123
+ """
124
+
125
+ def __init__(self, ddl_statements: list[str] | None = None) -> None:
126
+ """Initialize the proxy."""
127
+ self._ddl = ddl_statements or []
128
+ self._client: TransactorClient | None = None
129
+
130
+ def _get_proxy(self) -> TransactorProtocol:
131
+ """Get the transactor RPC proxy (auto-spawn if needed)."""
132
+ if self._client is None:
133
+ self._client = TransactorClient()
134
+ return self._client.get_proxy() # type: ignore[no-any-return]
135
+
136
+ def register(self, attach_opaque_data: bytes, catalog_name: str = "") -> None:
137
+ """Register a new database for this attach_opaque_data and run initial DDL."""
138
+ proxy = self._get_proxy()
139
+ proxy.register(attach_opaque_data=attach_opaque_data, catalog_name=catalog_name, ddl_statements=self._ddl)
140
+
141
+ def close(self) -> None:
142
+ """Close the transactor connection."""
143
+ if self._client is not None:
144
+ self._client.close()
145
+ self._client = None
146
+
147
+
148
+ # Module-level proxy — all tables created dynamically via DDL.
149
+ transactor_proxy = TransactorProxy()