vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,308 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Example worker that declares attach-time options of many types and echoes them back.
4
+
5
+ This worker exists to exercise the attach-time options pipeline end-to-end:
6
+ - Declared options (``AttachOptions`` inner class) advertised via the ``catalogs()``
7
+ RPC for pre-attach discovery.
8
+ - Values received at ``catalog_attach`` are serialized into the returned
9
+ ``attach_opaque_data`` so they survive pooled-worker reuse (subprocess) and stateless
10
+ transports (HTTP). Nothing is stored on ``self``.
11
+ - The ``echo_attach_options`` table function decodes ``attach_opaque_data`` on every
12
+ invocation and returns a one-row batch containing every declared option.
13
+
14
+ Run directly as a worker::
15
+
16
+ vgi-fixture-attach-options-worker
17
+
18
+ Or serve over HTTP via ``vgi-serve``.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import datetime
24
+ import uuid
25
+ from dataclasses import dataclass
26
+ from decimal import Decimal
27
+ from typing import Annotated, Any, ClassVar
28
+
29
+ import pyarrow as pa
30
+ from vgi_rpc import ArrowSerializableDataclass
31
+ from vgi_rpc.rpc import CallContext, OutputCollector
32
+ from vgi_rpc.utils import deserialize_record_batch, serialize_record_batch_bytes
33
+
34
+ from vgi.catalog.attach_option import AttachOption, AttachOptionSpec, extract_attach_option_specs
35
+ from vgi.catalog.catalog_interface import (
36
+ AttachOpaqueData,
37
+ CatalogAttachResult,
38
+ CatalogInfo,
39
+ ReadOnlyCatalogInterface,
40
+ TransactionOpaqueData,
41
+ )
42
+ from vgi.catalog.descriptors import Catalog, Schema
43
+ from vgi.invocation import BindResponse
44
+ from vgi.schema_utils import schema
45
+ from vgi.table_function import BindParams, ProcessParams, TableFunctionGenerator, init_single_worker
46
+ from vgi.worker import Worker
47
+
48
+ __all__ = [
49
+ "AttachOptionsWorker",
50
+ "EchoAttachOptionsFunction",
51
+ "main",
52
+ ]
53
+
54
+
55
+ CATALOG_NAME = "attach_options"
56
+ _ATTACH_ID_SEP = b"\x00"
57
+ _UUID_BYTES = 16
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Declared attach-time options: one per supported type
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ class AttachOptions:
66
+ """Attach-time options covering the supported Arrow/DuckDB type space."""
67
+
68
+ # Scalar primitives
69
+ opt_bool: Annotated[bool, AttachOption(desc="Boolean option")] = True
70
+ opt_int8: Annotated[int, AttachOption(desc="int8", arrow_type=pa.int8())] = -8
71
+ opt_int16: Annotated[int, AttachOption(desc="int16", arrow_type=pa.int16())] = -16
72
+ opt_int32: Annotated[int, AttachOption(desc="int32", arrow_type=pa.int32())] = -32
73
+ opt_int64: Annotated[int, AttachOption(desc="int64")] = -64
74
+ opt_uint8: Annotated[int, AttachOption(desc="uint8", arrow_type=pa.uint8())] = 8
75
+ opt_uint16: Annotated[int, AttachOption(desc="uint16", arrow_type=pa.uint16())] = 16
76
+ opt_uint32: Annotated[int, AttachOption(desc="uint32", arrow_type=pa.uint32())] = 32
77
+ opt_uint64: Annotated[int, AttachOption(desc="uint64", arrow_type=pa.uint64())] = 64
78
+ opt_float32: Annotated[float, AttachOption(desc="float32", arrow_type=pa.float32())] = 1.5
79
+ opt_float64: Annotated[float, AttachOption(desc="float64")] = 2.5
80
+ opt_string: Annotated[str, AttachOption(desc="UTF-8 string")] = "hello"
81
+ opt_blob: Annotated[bytes, AttachOption(desc="Binary blob")] = b"\x00\x01\x02"
82
+
83
+ # Temporal
84
+ opt_date: Annotated[datetime.date, AttachOption(desc="Date", arrow_type=pa.date32())] = datetime.date(2026, 4, 24)
85
+ opt_time: Annotated[datetime.time, AttachOption(desc="Time of day", arrow_type=pa.time64("us"))] = datetime.time(
86
+ 12, 34, 56
87
+ )
88
+ opt_timestamp: Annotated[datetime.datetime, AttachOption(desc="Naive timestamp", arrow_type=pa.timestamp("us"))] = (
89
+ datetime.datetime(2026, 4, 24, 12, 34, 56)
90
+ )
91
+ opt_timestamp_tz: Annotated[
92
+ datetime.datetime,
93
+ AttachOption(desc="Timestamp with UTC tz", arrow_type=pa.timestamp("us", tz="UTC")),
94
+ ] = datetime.datetime(2026, 4, 24, 12, 34, 56, tzinfo=datetime.UTC)
95
+
96
+ # Precision
97
+ opt_decimal: Annotated[Decimal, AttachOption(desc="Decimal(18,4)", arrow_type=pa.decimal128(18, 4))] = Decimal(
98
+ "123.4500"
99
+ )
100
+
101
+ # Nested
102
+ opt_list: Annotated[list[int], AttachOption(desc="List of int64", arrow_type=pa.list_(pa.int64()))] = [1, 2, 3]
103
+ opt_struct: Annotated[
104
+ dict[str, object],
105
+ AttachOption(
106
+ desc="Struct",
107
+ arrow_type=pa.struct([pa.field("a", pa.int64()), pa.field("b", pa.string())]),
108
+ ),
109
+ ] = {"a": 1, "b": "x"}
110
+
111
+
112
+ # Resolve once at import time; used both to build the echo function's output schema
113
+ # and to backfill defaults in catalog_attach.
114
+ _ATTACH_OPTION_SPECS: list[AttachOptionSpec] = extract_attach_option_specs(AttachOptions)
115
+
116
+ _ECHO_SCHEMA: pa.Schema = schema({spec.name: spec.type for spec in _ATTACH_OPTION_SPECS})
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # attach_opaque_data encoding / decoding
121
+ # ---------------------------------------------------------------------------
122
+
123
+
124
+ def _build_echo_batch(received: dict[str, Any]) -> pa.RecordBatch:
125
+ """Merge received option values with declared defaults, return a one-row batch."""
126
+ row: dict[str, Any] = {spec.name: spec.default for spec in _ATTACH_OPTION_SPECS}
127
+ for name, value in received.items():
128
+ if name in row:
129
+ row[name] = value
130
+ return pa.RecordBatch.from_pylist([row], schema=_ECHO_SCHEMA)
131
+
132
+
133
+ def _encode_attach_opaque_data(received: dict[str, Any]) -> AttachOpaqueData:
134
+ batch = _build_echo_batch(received)
135
+ ipc_bytes = serialize_record_batch_bytes(batch)
136
+ return AttachOpaqueData(uuid.uuid4().bytes + _ATTACH_ID_SEP + ipc_bytes)
137
+
138
+
139
+ def _decode_attach_opaque_data(attach_opaque_data: bytes) -> pa.RecordBatch:
140
+ raw = bytes(attach_opaque_data)
141
+ if len(raw) <= _UUID_BYTES + 1 or raw[_UUID_BYTES : _UUID_BYTES + 1] != _ATTACH_ID_SEP:
142
+ raise ValueError("attach_opaque_data does not carry an options payload")
143
+ ipc_bytes = raw[_UUID_BYTES + 1 :]
144
+ batch, _ = deserialize_record_batch(ipc_bytes)
145
+ return batch
146
+
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # Echo table function
150
+ # ---------------------------------------------------------------------------
151
+
152
+
153
+ @dataclass(slots=True, frozen=True)
154
+ class _EchoArgs:
155
+ """No arguments — the echo function reads state from ``attach_opaque_data``."""
156
+
157
+
158
+ @dataclass(kw_only=True)
159
+ class _EchoState(ArrowSerializableDataclass):
160
+ emitted: bool = False
161
+
162
+
163
+ @init_single_worker
164
+ class EchoAttachOptionsFunction(TableFunctionGenerator[_EchoArgs, _EchoState]):
165
+ """Return the attach-time option values that were passed at ATTACH.
166
+
167
+ One row, one column per declared option. The values come from ``attach_opaque_data``
168
+ so the function is safe under pool reuse (subprocess) and stateless
169
+ dispatch (HTTP): no per-attach state lives on ``self``.
170
+ """
171
+
172
+ FunctionArguments = _EchoArgs
173
+
174
+ class Meta:
175
+ name = "echo_attach_options"
176
+ description = "Echo the attach-time option values carried in attach_opaque_data"
177
+ categories = ["generator", "testing"]
178
+
179
+ FIXED_SCHEMA: ClassVar[pa.Schema] = _ECHO_SCHEMA
180
+
181
+ @classmethod
182
+ def on_bind(cls, params: BindParams[_EchoArgs]) -> BindResponse:
183
+ return BindResponse(output_schema=cls.FIXED_SCHEMA)
184
+
185
+ @classmethod
186
+ def initial_state(cls, params: ProcessParams[_EchoArgs]) -> _EchoState:
187
+ return _EchoState()
188
+
189
+ @classmethod
190
+ def process(
191
+ cls,
192
+ params: ProcessParams[_EchoArgs],
193
+ state: _EchoState,
194
+ out: OutputCollector,
195
+ ) -> None:
196
+ if state.emitted:
197
+ out.finish()
198
+ return
199
+
200
+ # params.attach_opaque_data is the unwrapped plaintext attach (storage
201
+ # shards on the sealed form via the request); see InitParams.
202
+ attach_opaque_data = params.attach_opaque_data
203
+ if attach_opaque_data is None:
204
+ raise ValueError("echo_attach_options requires an attach_opaque_data")
205
+
206
+ batch = _decode_attach_opaque_data(attach_opaque_data)
207
+ # Re-cast to the declared schema so column order matches what bind promised.
208
+ batch = batch.select(_ECHO_SCHEMA.names)
209
+ out.emit(batch)
210
+ state.emitted = True
211
+
212
+
213
+ # ---------------------------------------------------------------------------
214
+ # Catalog interface
215
+ # ---------------------------------------------------------------------------
216
+
217
+
218
+ _CATALOG_DESCRIPTOR = Catalog(
219
+ name=CATALOG_NAME,
220
+ schemas=[
221
+ Schema(
222
+ name="main",
223
+ tables=(),
224
+ views=(),
225
+ functions=(EchoAttachOptionsFunction,),
226
+ ),
227
+ ],
228
+ )
229
+
230
+
231
+ class AttachOptionsCatalog(ReadOnlyCatalogInterface):
232
+ """Catalog that advertises AttachOptions and echoes values via attach_opaque_data."""
233
+
234
+ catalog = _CATALOG_DESCRIPTOR
235
+ catalog_name = CATALOG_NAME
236
+
237
+ def catalog_attach(
238
+ self,
239
+ *,
240
+ name: str,
241
+ options: dict[str, Any],
242
+ data_version_spec: str | None,
243
+ implementation_version: str | None,
244
+ ctx: CallContext | None = None,
245
+ ) -> CatalogAttachResult:
246
+ del data_version_spec, implementation_version, ctx
247
+ if name != CATALOG_NAME:
248
+ raise ValueError(f"Unknown catalog: {name!r}. Available: {CATALOG_NAME}")
249
+
250
+ attach_opaque_data = _encode_attach_opaque_data(options)
251
+
252
+ return CatalogAttachResult(
253
+ attach_opaque_data=attach_opaque_data,
254
+ supports_transactions=False,
255
+ supports_time_travel=False,
256
+ catalog_version_frozen=True,
257
+ catalog_version=1,
258
+ attach_opaque_data_required=True,
259
+ default_schema="main",
260
+ settings=[],
261
+ resolved_data_version=None,
262
+ resolved_implementation_version=None,
263
+ )
264
+
265
+ def catalogs(self) -> list[CatalogInfo]:
266
+ return [
267
+ CatalogInfo(
268
+ name=CATALOG_NAME,
269
+ implementation_version=None,
270
+ data_version_spec=None,
271
+ attach_option_specs=[spec.serialize() for spec in _ATTACH_OPTION_SPECS],
272
+ ),
273
+ ]
274
+
275
+ def catalog_version(
276
+ self,
277
+ *,
278
+ attach_opaque_data: AttachOpaqueData,
279
+ transaction_opaque_data: TransactionOpaqueData | None,
280
+ ctx: CallContext | None = None,
281
+ ) -> int:
282
+ del attach_opaque_data, transaction_opaque_data, ctx
283
+ return 1
284
+
285
+
286
+ # ---------------------------------------------------------------------------
287
+ # Worker + entry point
288
+ # ---------------------------------------------------------------------------
289
+
290
+
291
+ class AttachOptionsWorker(Worker):
292
+ """Worker exposing :class:`AttachOptionsCatalog`."""
293
+
294
+ # The AttachOptions inner class is picked up by Worker.__init_subclass__,
295
+ # which extracts specs and injects them into the catalog interface.
296
+ AttachOptions = AttachOptions
297
+
298
+ catalog_interface = AttachOptionsCatalog
299
+ catalog_name = CATALOG_NAME
300
+ functions = [EchoAttachOptionsFunction]
301
+
302
+
303
+ def main() -> None:
304
+ AttachOptionsWorker.main()
305
+
306
+
307
+ if __name__ == "__main__":
308
+ main()
@@ -0,0 +1,62 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Fixture worker that deliberately advertises a mismatched protocol_version.
4
+
5
+ This fixture exists to exercise the vgi-rpc framework's *application protocol
6
+ version* enforcement (added in vgi-rpc 0.18.0) end-to-end, across a real
7
+ transport, from both the Python ``Client`` and the C++ DuckDB extension.
8
+
9
+ The trick is entirely Python-side and needs no extension rebuild: the worker
10
+ hands :class:`BadProtocol` (a :class:`~vgi.protocol.VgiProtocol` subclass that
11
+ redeclares ``protocol_version`` to an impossible major version) to
12
+ ``RpcServer`` via :attr:`~vgi.worker.Worker.protocol_class`. vgi-rpc reads the
13
+ version with ``vars(protocol).get("protocol_version")`` — reading the class's
14
+ own ``__dict__``, not an inherited attribute — so the redeclaration on this
15
+ subclass's body is what takes effect.
16
+
17
+ A normal client (Python ``Client`` or the C++ extension) declares
18
+ ``protocol_version = "1.0.0"`` and sends it on every request. Because this
19
+ worker enforces ``"99.0.0"``, the major versions differ and the dispatch
20
+ boundary raises ``ProtocolVersionError`` with a directional "upgrade the
21
+ client" message that round-trips back to the caller.
22
+
23
+ Otherwise this is a drop-in replacement for ``vgi-fixture-worker``: it
24
+ inherits every function and the catalog from :class:`ExampleWorker`, so any
25
+ request reaches the dispatch boundary (and trips the version check) using the
26
+ same SQL the example worker accepts.
27
+
28
+ Registered as the ``vgi-fixture-bad-protocol-worker`` entry point.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ from typing import ClassVar
34
+
35
+ from vgi._test_fixtures.worker import ExampleWorker
36
+ from vgi.protocol import VgiProtocol
37
+
38
+ # A major-version bump guarantees a mismatch against any real client's
39
+ # "1.0.0" (vgi-rpc compares major+minor exactly, ignoring patch). Declared on
40
+ # this class body so ``vars(BadProtocol)["protocol_version"]`` resolves to it.
41
+ BAD_PROTOCOL_VERSION = "99.0.0"
42
+
43
+
44
+ class BadProtocol(VgiProtocol):
45
+ """VgiProtocol surface with a deliberately incompatible version."""
46
+
47
+ protocol_version: ClassVar[str] = BAD_PROTOCOL_VERSION
48
+
49
+
50
+ class BadProtocolWorker(ExampleWorker):
51
+ """ExampleWorker that serves the example catalog under a bad protocol version."""
52
+
53
+ protocol_class: ClassVar[type[VgiProtocol]] = BadProtocol # type: ignore[type-abstract]
54
+
55
+
56
+ def main() -> None:
57
+ """Run the mismatched-protocol fixture worker process."""
58
+ BadProtocolWorker.main()
59
+
60
+
61
+ if __name__ == "__main__":
62
+ main()