vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,336 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ # ruff: noqa: D102, D106
4
+ """Instrumented fixture functions for the `on_cancel` contract.
5
+
6
+ Two example functions registered in the example worker so that
7
+ integration tests (notably the destructor-side cancel path in the VGI
8
+ C++ extension) can observe whether the cancel signal actually reached
9
+ the Python worker.
10
+
11
+ Both functions are test fixtures — not production-useful — so they
12
+ live here next to the other example-worker scaffolding rather than
13
+ under a generic location.
14
+
15
+ AVAILABLE FUNCTIONS
16
+ -------------------
17
+ SlowCancellableFunction — source-only table function that produces
18
+ one row per batch with a configurable per-batch sleep. When the
19
+ C++ extension tears down the stream early (LIMIT, Ctrl-C, etc.)
20
+ its ``on_cancel`` appends a line to a caller-supplied file path.
21
+ SlowCancellableInOutFunction — table-in-out variant, used to
22
+ exercise the ``VgiTableInOutLocalState`` destructor path once PR 2
23
+ wires it.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import time
30
+ from dataclasses import dataclass
31
+ from typing import Annotated, ClassVar
32
+
33
+ import pyarrow as pa
34
+ from vgi_rpc import ArrowSerializableDataclass
35
+ from vgi_rpc.rpc import OutputCollector
36
+
37
+ from vgi.arguments import Arg, TableInput
38
+ from vgi.invocation import BindResponse
39
+ from vgi.schema_utils import schema
40
+ from vgi.table_buffering_function import (
41
+ TableBufferingFunction,
42
+ TableBufferingParams,
43
+ )
44
+ from vgi.table_function import (
45
+ BindParams,
46
+ ProcessParams,
47
+ TableFunctionGenerator,
48
+ bind_fixed_schema,
49
+ init_single_worker,
50
+ )
51
+ from vgi.table_in_out_function import TableInOutGenerator
52
+
53
+ __all__ = [
54
+ "SlowCancellableBufferingFunction",
55
+ "SlowCancellableFunction",
56
+ "SlowCancellableInOutFunction",
57
+ ]
58
+
59
+
60
+ def _append_cancel_probe(path: str, **fields: int) -> None:
61
+ # Opened O_APPEND so concurrent writers (HTTP-pool case) serialise
62
+ # naturally at the OS level.
63
+ parts = [f"pid={os.getpid()}"] + [f"{k}={v}" for k, v in fields.items()]
64
+ with open(path, "a", encoding="utf-8") as fh:
65
+ fh.write(" ".join(parts) + "\n")
66
+
67
+
68
+ @dataclass(slots=True, frozen=True, kw_only=True)
69
+ class SlowCancellableArgs:
70
+ """Arguments for :class:`SlowCancellableFunction`."""
71
+
72
+ probe_path: Annotated[str, Arg(0, doc="Path to append to when on_cancel fires")]
73
+ sleep_ms: Annotated[int, Arg("sleep_ms", default=50, doc="Sleep per batch (ms)", ge=0)] = 50
74
+ count: Annotated[
75
+ int,
76
+ Arg("count", default=1_000_000, doc="Total rows to produce (caps the source)", ge=0),
77
+ ] = 1_000_000
78
+
79
+
80
+ @dataclass(kw_only=True)
81
+ class SlowCancellableState(ArrowSerializableDataclass):
82
+ """Counter of rows already emitted; survives HTTP state round-trips."""
83
+
84
+ emitted: int = 0
85
+
86
+
87
+ @init_single_worker
88
+ @bind_fixed_schema
89
+ class SlowCancellableFunction(TableFunctionGenerator[SlowCancellableArgs, SlowCancellableState]):
90
+ """Slow producer that records every ``on_cancel`` invocation to a file.
91
+
92
+ SQL::
93
+
94
+ SELECT * FROM slow_cancellable('/tmp/probe.txt', sleep_ms := 100) LIMIT 2;
95
+
96
+ Each tick produces one row ``(n INTEGER)`` after a short sleep.
97
+ When the stream is torn down early, ``on_cancel`` appends a single
98
+ line to ``probe_path``. The line includes the PID so multi-worker
99
+ pools can be disambiguated in tests:
100
+
101
+ pid=12345 emitted=2
102
+ """
103
+
104
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema({"n": pa.int64()})
105
+
106
+ class Meta:
107
+ name = "slow_cancellable"
108
+ description = "Slow producer with an on_cancel file-writing probe (test fixture)"
109
+ categories = ["test"]
110
+
111
+ @classmethod
112
+ def initial_state(cls, params: ProcessParams[SlowCancellableArgs]) -> SlowCancellableState:
113
+ return SlowCancellableState()
114
+
115
+ @classmethod
116
+ def process(
117
+ cls,
118
+ params: ProcessParams[SlowCancellableArgs],
119
+ state: SlowCancellableState,
120
+ out: OutputCollector,
121
+ ) -> None:
122
+ if state.emitted >= params.args.count:
123
+ out.finish()
124
+ return
125
+ if params.args.sleep_ms > 0:
126
+ time.sleep(params.args.sleep_ms / 1000.0)
127
+ batch = pa.RecordBatch.from_pydict({"n": [state.emitted]}, schema=params.output_schema)
128
+ state.emitted += 1
129
+ out.emit(batch)
130
+
131
+ @classmethod
132
+ def on_cancel(
133
+ cls,
134
+ params: ProcessParams[SlowCancellableArgs],
135
+ state: SlowCancellableState,
136
+ ) -> None:
137
+ _append_cancel_probe(params.args.probe_path, emitted=state.emitted)
138
+
139
+
140
+ @dataclass(slots=True, frozen=True, kw_only=True)
141
+ class SlowCancellableInOutArgs:
142
+ """Arguments for :class:`SlowCancellableInOutFunction`."""
143
+
144
+ probe_path: Annotated[str, Arg(0, doc="Path to append to when on_cancel fires")]
145
+ data: Annotated[TableInput, Arg(1, doc="Input table")]
146
+ sleep_ms: Annotated[int, Arg("sleep_ms", default=50, doc="Sleep per batch (ms)", ge=0)] = 50
147
+
148
+
149
+ @dataclass(kw_only=True)
150
+ class SlowCancellableInOutState(ArrowSerializableDataclass):
151
+ """Counter of batches seen; survives HTTP state round-trips."""
152
+
153
+ processed: int = 0
154
+
155
+
156
+ class SlowCancellableInOutFunction(TableInOutGenerator[SlowCancellableInOutArgs, SlowCancellableInOutState]):
157
+ """Slow table-in-out variant of :class:`SlowCancellableFunction`."""
158
+
159
+ class Meta:
160
+ name = "slow_cancellable_inout"
161
+ description = "Slow table-in-out with on_cancel probe (test fixture)"
162
+ categories = ["test"]
163
+
164
+ @classmethod
165
+ def on_bind(cls, params: BindParams[SlowCancellableInOutArgs]) -> BindResponse:
166
+ assert params.bind_call.input_schema is not None
167
+ return BindResponse(output_schema=params.bind_call.input_schema)
168
+
169
+ @classmethod
170
+ def initial_state(cls, params: ProcessParams[SlowCancellableInOutArgs]) -> SlowCancellableInOutState:
171
+ return SlowCancellableInOutState()
172
+
173
+ @classmethod
174
+ def process(
175
+ cls,
176
+ params: ProcessParams[SlowCancellableInOutArgs],
177
+ state: SlowCancellableInOutState,
178
+ batch: pa.RecordBatch,
179
+ out: OutputCollector,
180
+ ) -> None:
181
+ if params.args.sleep_ms > 0:
182
+ time.sleep(params.args.sleep_ms / 1000.0)
183
+ state.processed += 1
184
+ out.emit(batch)
185
+
186
+ @classmethod
187
+ def on_cancel(
188
+ cls,
189
+ params: ProcessParams[SlowCancellableInOutArgs],
190
+ state: SlowCancellableInOutState | None,
191
+ ) -> None:
192
+ processed = state.processed if state is not None else 0
193
+ _append_cancel_probe(params.args.probe_path, processed=processed)
194
+
195
+
196
+ # ============================================================================
197
+ # TableBufferingFunction variant — exercises the on_cancel wiring on
198
+ # ``TableBufferingFinalizeState`` (the Sink+Source path). Mirrors the
199
+ # streaming fixtures above; the only structural difference is that
200
+ # emission lives in ``finalize()`` rather than ``process()``, so cancel
201
+ # fires through the producer-mode stream-cancel path on the Source side.
202
+ # ============================================================================
203
+
204
+
205
+ @dataclass(slots=True, frozen=True, kw_only=True)
206
+ class SlowCancellableBufferingArgs:
207
+ """Arguments for :class:`SlowCancellableBufferingFunction`."""
208
+
209
+ probe_path: Annotated[str, Arg(0, doc="Path to append to when on_cancel fires")]
210
+ # TableBufferingFunction must accept a TABLE input — the operator's
211
+ # Sink phase wraps the input pipeline. We ignore the rows themselves
212
+ # (the test is purely about Source-side cancel), but DuckDB's binder
213
+ # requires the function to declare TableInput for subquery args.
214
+ data: Annotated[TableInput, Arg(1, doc="Input table (rows ignored)")]
215
+ count: Annotated[
216
+ int,
217
+ Arg("count", default=1_000, doc="Total rows to emit during finalize", ge=1),
218
+ ] = 1_000
219
+ sleep_ms: Annotated[
220
+ int,
221
+ Arg("sleep_ms", default=10, doc="Sleep per emitted row (ms)", ge=0),
222
+ ] = 10
223
+
224
+
225
+ @dataclass(kw_only=True)
226
+ class SlowCancellableBufferingState(ArrowSerializableDataclass):
227
+ """Per-tick state — counter survives wire round-trips for HTTP rehydration."""
228
+
229
+ emitted: int = 0
230
+ # We snapshot probe_path and total count here so on_cancel doesn't need
231
+ # to chase them off ``params.args`` (which is fine on subprocess but
232
+ # forces an extra cold-load round-trip on HTTP rehydration).
233
+ probe_path: str = ""
234
+ total: int = 0
235
+
236
+
237
+ class SlowCancellableBufferingFunction(
238
+ TableBufferingFunction[SlowCancellableBufferingArgs, SlowCancellableBufferingState],
239
+ ):
240
+ """Slow buffered producer that records ``on_cancel`` to a file.
241
+
242
+ Sink absorbs all input (we don't actually use the input data — this
243
+ fixture is purely about exercising the Source-side cancel path).
244
+ ``finalize()`` then emits ``count`` rows with a per-row sleep so a
245
+ LIMIT 1 query reliably triggers cancel before EOS. ``on_cancel``
246
+ appends ``pid=<n> emitted=<m>`` to the probe path so the integration
247
+ test can assert that the cancel hook actually fired.
248
+
249
+ SQL::
250
+
251
+ SELECT n FROM slow_cancellable_buffering('/tmp/probe.txt',
252
+ (SELECT 1 AS x),
253
+ sleep_ms := 20,
254
+ count := 1000)
255
+ LIMIT 1;
256
+ """
257
+
258
+ class Meta:
259
+ name = "slow_cancellable_buffering"
260
+ description = "Slow buffered table function with an on_cancel file probe (test fixture)"
261
+ categories = ["test"]
262
+
263
+ @classmethod
264
+ def on_bind(
265
+ cls,
266
+ params: BindParams[SlowCancellableBufferingArgs],
267
+ ) -> BindResponse:
268
+ # Emit a single-column INT64 output regardless of input schema;
269
+ # the input is ignored (Sink absorbs but doesn't accumulate).
270
+ return BindResponse(output_schema=schema({"n": pa.int64()}))
271
+
272
+ @classmethod
273
+ def process(
274
+ cls,
275
+ batch: pa.RecordBatch, # noqa: ARG003 — sink absorbs but ignores
276
+ params: TableBufferingParams[SlowCancellableBufferingArgs],
277
+ ) -> bytes:
278
+ # We don't store anything; just return the execution_id so
279
+ # combine() sees a stable bucket. Cancel testing is a Source-side
280
+ # concern, not a Sink-side one.
281
+ return params.execution_id
282
+
283
+ @classmethod
284
+ def combine(
285
+ cls,
286
+ state_ids: list[bytes], # noqa: ARG003
287
+ params: TableBufferingParams[SlowCancellableBufferingArgs],
288
+ ) -> list[bytes]:
289
+ return [params.execution_id]
290
+
291
+ @classmethod
292
+ def initial_finalize_state(
293
+ cls,
294
+ finalize_state_id: bytes, # noqa: ARG003
295
+ params: TableBufferingParams[SlowCancellableBufferingArgs],
296
+ ) -> SlowCancellableBufferingState:
297
+ return SlowCancellableBufferingState(
298
+ probe_path=params.args.probe_path,
299
+ total=params.args.count,
300
+ )
301
+
302
+ @classmethod
303
+ def finalize(
304
+ cls,
305
+ params: TableBufferingParams[SlowCancellableBufferingArgs],
306
+ finalize_state_id: bytes, # noqa: ARG002 — unused; producer-mode tick
307
+ state: SlowCancellableBufferingState,
308
+ out: OutputCollector,
309
+ ) -> None:
310
+ if state.emitted >= state.total:
311
+ out.finish()
312
+ return
313
+ if params.args.sleep_ms > 0:
314
+ time.sleep(params.args.sleep_ms / 1000.0)
315
+ batch = pa.RecordBatch.from_pydict(
316
+ {"n": [state.emitted]},
317
+ schema=params.output_schema,
318
+ )
319
+ state.emitted += 1
320
+ out.emit(batch)
321
+
322
+ @classmethod
323
+ def on_cancel(
324
+ cls,
325
+ params: TableBufferingParams[SlowCancellableBufferingArgs], # noqa: ARG003
326
+ finalize_state_id: bytes, # noqa: ARG003
327
+ state: SlowCancellableBufferingState | None,
328
+ ) -> None:
329
+ # ``state`` is None when cancel fires before initial_finalize_state
330
+ # ran. In that case there's nothing to attribute (we never reached
331
+ # the user's setup), but we still write a probe line so the test
332
+ # can distinguish "cancel never fired" from "cancel fired pre-init".
333
+ if state is None:
334
+ _append_cancel_probe("/dev/null", emitted=-1)
335
+ return
336
+ _append_cancel_probe(state.probe_path, emitted=state.emitted)