vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vgi-python
|
|
3
|
+
Version: 0.8.0
|
|
4
|
+
Summary: Vector Gateway Interface - Connect DuckDB to external programs via Apache Arrow
|
|
5
|
+
Project-URL: Homepage, https://query.farm
|
|
6
|
+
Project-URL: Repository, https://github.com/Query-farm/vgi-python
|
|
7
|
+
Project-URL: Documentation, https://github.com/Query-farm/vgi-python/tree/main/docs
|
|
8
|
+
Project-URL: Issues, https://github.com/Query-farm/vgi-python/issues
|
|
9
|
+
Author-email: Rusty Conover <rusty@query.farm>
|
|
10
|
+
Maintainer-email: Query Farm LLC <hello@query.farm>
|
|
11
|
+
License: Query Farm Source-Available License, Version 1.0
|
|
12
|
+
|
|
13
|
+
Copyright (c) 2025, 2026 Query Farm LLC. All rights reserved.
|
|
14
|
+
|
|
15
|
+
## 1. Definitions
|
|
16
|
+
|
|
17
|
+
"Licensor" means Query Farm LLC (http://query.farm, hello@query.farm) and its
|
|
18
|
+
affiliates under common control.
|
|
19
|
+
|
|
20
|
+
"VGI" means the Vector Gateway Interface, the DuckDB extension technology developed
|
|
21
|
+
by the Licensor, also referred to by the Licensor as its "Hyperfederation" database
|
|
22
|
+
technology.
|
|
23
|
+
|
|
24
|
+
"Licensed Work" means VGI, including its source code, object code, and any
|
|
25
|
+
documentation distributed with it, in each version made available by the Licensor
|
|
26
|
+
under this License.
|
|
27
|
+
|
|
28
|
+
"You" (or "Your") means the individual or legal entity exercising rights under this
|
|
29
|
+
License, together with all affiliates under common control with that entity.
|
|
30
|
+
|
|
31
|
+
"Production Use" means any use of the Licensed Work other than for development,
|
|
32
|
+
testing, evaluation, experimentation, or other non-production purposes.
|
|
33
|
+
|
|
34
|
+
"Hyperfederation Services" means services relating to the federation, gateway,
|
|
35
|
+
integration, querying, or interoperation of data sources using VGI or
|
|
36
|
+
functionally equivalent technology, including services that expose, broker, or
|
|
37
|
+
provide access to such federated or gateway capabilities.
|
|
38
|
+
|
|
39
|
+
"Commercial Marketplace" means any platform, exchange, or intermediary service,
|
|
40
|
+
whether or not operated for a fee, that connects providers and consumers of
|
|
41
|
+
Hyperfederation Services, or that facilitates the offering, discovery, exchange,
|
|
42
|
+
sale, or licensing of Hyperfederation Services among third parties.
|
|
43
|
+
|
|
44
|
+
"Competing Offering" means a product or service that You make available to third
|
|
45
|
+
parties, on a paid basis (including through paid support, subscription, or hosting
|
|
46
|
+
arrangements), whose capabilities significantly overlap with those of the Licensor's
|
|
47
|
+
version(s) of the Licensed Work.
|
|
48
|
+
|
|
49
|
+
## 2. Grant of Rights
|
|
50
|
+
|
|
51
|
+
Subject to the terms and limitations of this License, the Licensor grants You a
|
|
52
|
+
worldwide, royalty-free, non-exclusive license to:
|
|
53
|
+
|
|
54
|
+
(a) use, copy, and run the Licensed Work for any non-production purpose;
|
|
55
|
+
|
|
56
|
+
(b) modify the Licensed Work and create derivative works of it;
|
|
57
|
+
|
|
58
|
+
(c) redistribute the Licensed Work and Your derivative works, provided You comply
|
|
59
|
+
with Section 5; and
|
|
60
|
+
|
|
61
|
+
(d) make Production Use of the Licensed Work, except where such use is restricted by
|
|
62
|
+
Section 3 or reserved to the Licensor by Section 4.
|
|
63
|
+
|
|
64
|
+
## 3. Production Use Conditions
|
|
65
|
+
|
|
66
|
+
The grant of Production Use in Section 2(d) does not extend to, and You may not
|
|
67
|
+
without a separate commercial license from the Licensor:
|
|
68
|
+
|
|
69
|
+
(a) provide a Competing Offering to third parties; or
|
|
70
|
+
|
|
71
|
+
(b) offer the Licensed Work, or any derivative work of it, to third parties on a
|
|
72
|
+
hosted, embedded, or as-a-service basis where doing so competes with the Licensor's
|
|
73
|
+
commercial interests in the Licensed Work.
|
|
74
|
+
|
|
75
|
+
"Embedded" includes incorporating the source or object code of the Licensed Work
|
|
76
|
+
into a Competing Offering, and packaging a Competing Offering such that the Licensed
|
|
77
|
+
Work must be accessed or downloaded for that offering to function.
|
|
78
|
+
|
|
79
|
+
Hosting or using the Licensed Work for Your own internal purposes is not a Competing
|
|
80
|
+
Offering and is permitted, including across Your affiliates under common control.
|
|
81
|
+
|
|
82
|
+
## 4. Reserved Rights
|
|
83
|
+
|
|
84
|
+
Notwithstanding any other provision of this License, the Licensor reserves to itself
|
|
85
|
+
the exclusive right to build, operate, offer, or authorize a Commercial Marketplace
|
|
86
|
+
that incorporates, integrates, is built upon, or otherwise uses the Licensed Work.
|
|
87
|
+
|
|
88
|
+
This License grants You no right to construct, operate, or enable a Commercial
|
|
89
|
+
Marketplace using the Licensed Work, whether on a commercial or non-commercial basis,
|
|
90
|
+
and any such use requires a separate written agreement with the Licensor.
|
|
91
|
+
|
|
92
|
+
## 5. Redistribution
|
|
93
|
+
|
|
94
|
+
If You redistribute the Licensed Work or any derivative work of it, in original or
|
|
95
|
+
modified form, You must:
|
|
96
|
+
|
|
97
|
+
(a) include a complete, unmodified copy of this License with each copy; and
|
|
98
|
+
|
|
99
|
+
(b) cause any recipient to receive the Licensed Work subject to the terms of this
|
|
100
|
+
License.
|
|
101
|
+
|
|
102
|
+
The conditions in Sections 3 and 4 apply to every recipient of the Licensed Work,
|
|
103
|
+
whether received directly from the Licensor or through a third party.
|
|
104
|
+
|
|
105
|
+
## 6. Conversion to Open Source
|
|
106
|
+
|
|
107
|
+
For each version of the Licensed Work, on the tenth anniversary of the date the
|
|
108
|
+
Licensor first made that version publicly available (the "Change Date" for that
|
|
109
|
+
version), the Licensor additionally grants You the right to use that version under
|
|
110
|
+
the terms of the Apache License, Version 2.0, and on and after that version's Change
|
|
111
|
+
Date the restrictions in Sections 3 and 4 no longer apply to that version.
|
|
112
|
+
|
|
113
|
+
This License applies separately to each version of the Licensed Work, and the Change
|
|
114
|
+
Date may differ between versions.
|
|
115
|
+
|
|
116
|
+
## 7. Commercial Licensing
|
|
117
|
+
|
|
118
|
+
If Your intended use is not permitted under this License, You may obtain a separate
|
|
119
|
+
commercial license from the Licensor by contacting hello@query.farm. Absent such a
|
|
120
|
+
license, You must refrain from the restricted use.
|
|
121
|
+
|
|
122
|
+
## 8. Trademarks
|
|
123
|
+
|
|
124
|
+
This License does not grant You any right to use the names, trademarks, service
|
|
125
|
+
marks, or logos of the Licensor, including "Vector Gateway Interface," "VGI," and
|
|
126
|
+
"Hyperfederation," except as required for reasonable and customary use in describing
|
|
127
|
+
the origin of the Licensed Work.
|
|
128
|
+
|
|
129
|
+
## 9. Termination
|
|
130
|
+
|
|
131
|
+
Any use of the Licensed Work in violation of this License automatically terminates
|
|
132
|
+
Your rights under this License for the current and all other versions of the Licensed
|
|
133
|
+
Work. Your rights may be reinstated only by a writing signed by the Licensor.
|
|
134
|
+
|
|
135
|
+
## 10. Disclaimer of Warranty and Limitation of Liability
|
|
136
|
+
|
|
137
|
+
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
|
138
|
+
AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EXPRESS OR IMPLIED,
|
|
139
|
+
INCLUDING WITHOUT LIMITATION ANY WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS
|
|
140
|
+
FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, OR TITLE.
|
|
141
|
+
|
|
142
|
+
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL THE LICENSOR BE
|
|
143
|
+
LIABLE TO YOU FOR ANY DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR THE USE
|
|
144
|
+
OF THE LICENSED WORK, WHETHER IN CONTRACT, TORT, OR OTHERWISE.
|
|
145
|
+
License-File: LICENSE
|
|
146
|
+
Classifier: Development Status :: 4 - Beta
|
|
147
|
+
Classifier: Intended Audience :: Developers
|
|
148
|
+
Classifier: License :: Other/Proprietary License
|
|
149
|
+
Classifier: Operating System :: OS Independent
|
|
150
|
+
Classifier: Programming Language :: Python :: 3
|
|
151
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
152
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
153
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
154
|
+
Classifier: Topic :: Database
|
|
155
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
156
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
157
|
+
Classifier: Typing :: Typed
|
|
158
|
+
Requires-Python: >=3.13
|
|
159
|
+
Requires-Dist: click
|
|
160
|
+
Requires-Dist: httpx>=0.24
|
|
161
|
+
Requires-Dist: platformdirs
|
|
162
|
+
Requires-Dist: pyarrow
|
|
163
|
+
Requires-Dist: typer>=0.9
|
|
164
|
+
Requires-Dist: vgi-rpc>=0.20.0
|
|
165
|
+
Provides-Extra: azure
|
|
166
|
+
Requires-Dist: azure-identity>=1.16.0; extra == 'azure'
|
|
167
|
+
Requires-Dist: pymssql>=2.3.0; extra == 'azure'
|
|
168
|
+
Provides-Extra: dev
|
|
169
|
+
Requires-Dist: azure-identity>=1.16.0; extra == 'dev'
|
|
170
|
+
Requires-Dist: duckdb>=1.5.3; extra == 'dev'
|
|
171
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
172
|
+
Requires-Dist: numpy>=2.4.1; extra == 'dev'
|
|
173
|
+
Requires-Dist: pyarrow-stubs; extra == 'dev'
|
|
174
|
+
Requires-Dist: pymssql>=2.3.0; extra == 'dev'
|
|
175
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
176
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
177
|
+
Requires-Dist: pytest-examples; extra == 'dev'
|
|
178
|
+
Requires-Dist: pytest-ruff; extra == 'dev'
|
|
179
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
|
180
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
181
|
+
Requires-Dist: sqlglot; extra == 'dev'
|
|
182
|
+
Requires-Dist: vgi-rpc[conformance,external,http,oauth,otel,sentry]; extra == 'dev'
|
|
183
|
+
Requires-Dist: vgi-rpc[http]; extra == 'dev'
|
|
184
|
+
Requires-Dist: vgi-rpc[oauth]; extra == 'dev'
|
|
185
|
+
Requires-Dist: vgi-rpc[otel]; extra == 'dev'
|
|
186
|
+
Requires-Dist: vgi-rpc[sentry]; extra == 'dev'
|
|
187
|
+
Provides-Extra: duckdb
|
|
188
|
+
Requires-Dist: duckdb>=1.5.3; extra == 'duckdb'
|
|
189
|
+
Provides-Extra: fixtures
|
|
190
|
+
Requires-Dist: vgi-fixtures; extra == 'fixtures'
|
|
191
|
+
Provides-Extra: haybarn
|
|
192
|
+
Requires-Dist: haybarn>=1.5.3rc10; extra == 'haybarn'
|
|
193
|
+
Provides-Extra: http
|
|
194
|
+
Requires-Dist: vgi-rpc[http]; extra == 'http'
|
|
195
|
+
Provides-Extra: oauth
|
|
196
|
+
Requires-Dist: vgi-rpc[oauth]; extra == 'oauth'
|
|
197
|
+
Provides-Extra: otel
|
|
198
|
+
Requires-Dist: vgi-rpc[otel]; extra == 'otel'
|
|
199
|
+
Provides-Extra: sentry
|
|
200
|
+
Requires-Dist: vgi-rpc[sentry]; extra == 'sentry'
|
|
201
|
+
Provides-Extra: test-fixtures
|
|
202
|
+
Requires-Dist: numpy>=2.4.1; extra == 'test-fixtures'
|
|
203
|
+
Provides-Extra: test-fixtures-writable
|
|
204
|
+
Requires-Dist: numpy>=2.4.1; extra == 'test-fixtures-writable'
|
|
205
|
+
Requires-Dist: sqlglot; extra == 'test-fixtures-writable'
|
|
206
|
+
Provides-Extra: transactor
|
|
207
|
+
Requires-Dist: sqlglot; extra == 'transactor'
|
|
208
|
+
Description-Content-Type: text/markdown
|
|
209
|
+
|
|
210
|
+
# VGI (Vector Gateway Interface)
|
|
211
|
+
|
|
212
|
+
<p align="center">
|
|
213
|
+
<img src="docs/vgi-logo.png" alt="VGI Logo" width="400">
|
|
214
|
+
</p>
|
|
215
|
+
|
|
216
|
+
<p align="center">
|
|
217
|
+
<strong>Apache Arrow-based protocol for extending DuckDB using any language.</strong><br/>
|
|
218
|
+
<strong>No C++/C/Zig/Rust or compilation/linking required (unless you want to).</strong>
|
|
219
|
+
</p>
|
|
220
|
+
|
|
221
|
+
<p align="center">
|
|
222
|
+
Created by <a href="https://query.farm">Query.Farm</a>
|
|
223
|
+
</p>
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## See It in Action
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
# my_worker.py
|
|
231
|
+
from typing import Annotated
|
|
232
|
+
from vgi import ScalarFunction, Param, Returns, Worker
|
|
233
|
+
import pyarrow as pa
|
|
234
|
+
import pyarrow.compute as pc
|
|
235
|
+
|
|
236
|
+
class Greeting(ScalarFunction):
|
|
237
|
+
"""Generate a greeting for each name."""
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
def compute(
|
|
241
|
+
cls,
|
|
242
|
+
name: Annotated[pa.StringArray, Param(doc="Column containing names")],
|
|
243
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
244
|
+
return pc.binary_join_element_wise("Hello, ", name, "!")
|
|
245
|
+
|
|
246
|
+
class MyWorker(Worker):
|
|
247
|
+
functions = [Greeting]
|
|
248
|
+
|
|
249
|
+
if __name__ == "__main__":
|
|
250
|
+
MyWorker().run()
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
```sql
|
|
254
|
+
-- First time only.
|
|
255
|
+
INSTALL vgi FROM COMMUNITY;
|
|
256
|
+
LOAD vgi;
|
|
257
|
+
ATTACH 'my_worker' (TYPE 'vgi', LOCATION './my_worker.py');
|
|
258
|
+
|
|
259
|
+
SELECT greeting(name) FROM users;
|
|
260
|
+
-- "Hello, Alice!"
|
|
261
|
+
-- "Hello, Bob!"
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
Or you can launch the DuckDB CLI with
|
|
265
|
+
|
|
266
|
+
`duckdb vgi:my_worker.py` to start a new session with the functions you just added.
|
|
267
|
+
|
|
268
|
+
That's it. No C++ compilation, no extension versioning, no complex build process. Just a Python script that DuckDB can call.
|
|
269
|
+
|
|
270
|
+
---
|
|
271
|
+
|
|
272
|
+
## Installation
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
pip install vgi
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
Or with [uv](https://github.com/astral-sh/uv):
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
uv add vgi
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
286
|
+
## Why VGI?
|
|
287
|
+
|
|
288
|
+
VGI lets you extend DuckDB with Python functions that run in separate processes, communicating via Apache Arrow IPC. This means:
|
|
289
|
+
|
|
290
|
+
| Traditional Extensions | VGI Workers |
|
|
291
|
+
|----------------------|-------------|
|
|
292
|
+
| C/C++ compilation required | Any language but first Python and Typescript and Go |
|
|
293
|
+
| Tied to DuckDB version | Version independent |
|
|
294
|
+
| Complex build/release cycle | Ship a script or executable |
|
|
295
|
+
| Runs in-process | Process isolation |
|
|
296
|
+
| Single-threaded | Parallel workers |
|
|
297
|
+
|
|
298
|
+
**Use cases:**
|
|
299
|
+
- Call REST APIs or external services from SQL
|
|
300
|
+
- Run ML inference (PyTorch, scikit-learn, etc.)
|
|
301
|
+
- Process data with Python libraries (pandas, numpy)
|
|
302
|
+
- Build custom ETL transforms
|
|
303
|
+
- Create domain-specific functions for your team
|
|
304
|
+
- Expose external data sources as queryable tables and views
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Quick Start
|
|
309
|
+
|
|
310
|
+
### Step 1: Create a Worker
|
|
311
|
+
|
|
312
|
+
A worker is a Python script that defines one or more functions:
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
#!/usr/bin/env python
|
|
316
|
+
# my_worker.py
|
|
317
|
+
from typing import Annotated
|
|
318
|
+
import pyarrow as pa
|
|
319
|
+
import pyarrow.compute as pc
|
|
320
|
+
from vgi import ScalarFunction, Param, Returns, Worker
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class UpperCase(ScalarFunction):
|
|
324
|
+
"""Convert string values to uppercase."""
|
|
325
|
+
|
|
326
|
+
@classmethod
|
|
327
|
+
def compute(
|
|
328
|
+
cls,
|
|
329
|
+
value: Annotated[pa.StringArray, Param(doc="String value to uppercase")],
|
|
330
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
331
|
+
return pc.utf8_upper(value)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class MyWorker(Worker):
|
|
335
|
+
catalog_name = "my_funcs"
|
|
336
|
+
functions = [UpperCase]
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
if __name__ == "__main__":
|
|
340
|
+
MyWorker().run()
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
### Step 2: Use from DuckDB
|
|
344
|
+
|
|
345
|
+
```sql
|
|
346
|
+
-- Attach the worker as a catalog
|
|
347
|
+
ATTACH 'my_funcs' (TYPE 'vgi', LOCATION './my_worker.py');
|
|
348
|
+
|
|
349
|
+
-- Call your function
|
|
350
|
+
SELECT upper_case(name) FROM users;
|
|
351
|
+
|
|
352
|
+
-- Use in complex queries
|
|
353
|
+
SELECT id, upper_case(status) as status
|
|
354
|
+
FROM orders
|
|
355
|
+
WHERE created_at > '2024-01-01';
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Step 3: There is no step 3
|
|
359
|
+
|
|
360
|
+
Your function is now available in DuckDB. Ship the Python script to your team, and they can use it immediately.
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
364
|
+
## Going Further: Type-Safe Arguments
|
|
365
|
+
|
|
366
|
+
For production use, you'll want type validation. Use `Param` with `type_bound` to ensure columns have the correct type:
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
from typing import Annotated
|
|
370
|
+
from vgi import ScalarFunction, Param, Returns, Worker
|
|
371
|
+
import pyarrow as pa
|
|
372
|
+
import pyarrow.compute as pc
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class AddValues(ScalarFunction):
|
|
376
|
+
"""Add two integer values together."""
|
|
377
|
+
|
|
378
|
+
@classmethod
|
|
379
|
+
def compute(
|
|
380
|
+
cls,
|
|
381
|
+
left: Annotated[pa.Int64Array, Param(type_bound=pa.types.is_integer, doc="First integer value")],
|
|
382
|
+
right: Annotated[pa.Int64Array, Param(type_bound=pa.types.is_integer, doc="Second integer value")],
|
|
383
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
384
|
+
return pc.add(left, right)
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
```sql
|
|
388
|
+
SELECT add_values(price, tax) as total FROM orders;
|
|
389
|
+
|
|
390
|
+
-- This would fail at bind time with a clear error:
|
|
391
|
+
-- SELECT add_values(name, price) FROM orders;
|
|
392
|
+
-- Error: Column 'name' has type string, expected integer
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
Key features of the `Param`/`Returns` API:
|
|
396
|
+
- Types are inferred from PyArrow array annotations (`pa.Int64Array` -> `pa.int64()`)
|
|
397
|
+
- `type_bound` validates the column's Arrow type at bind time
|
|
398
|
+
- `ConstParam` receives scalar values (not columns) from SQL arguments
|
|
399
|
+
- `Returns` declares the output type
|
|
400
|
+
|
|
401
|
+
---
|
|
402
|
+
|
|
403
|
+
## Function Types
|
|
404
|
+
|
|
405
|
+
VGI supports three function types:
|
|
406
|
+
|
|
407
|
+
| Type | Base Class | SQL Pattern | Use Case |
|
|
408
|
+
|------|------------|-------------|----------|
|
|
409
|
+
| **Scalar** | `ScalarFunction` | `SELECT func(col) FROM t` | Per-row transforms (1:1) |
|
|
410
|
+
| **Table** | `TableFunctionGenerator` | `SELECT * FROM func(args)` | Generate data |
|
|
411
|
+
| **Table-In-Out** | `TableInOutFunction` | `SELECT * FROM func((SELECT ...))` | Aggregation, filtering |
|
|
412
|
+
|
|
413
|
+
### Scalar Functions
|
|
414
|
+
|
|
415
|
+
Transform each row independently. Output has the same number of rows as input.
|
|
416
|
+
|
|
417
|
+
```python
|
|
418
|
+
class Double(ScalarFunction):
|
|
419
|
+
"""Double an integer value."""
|
|
420
|
+
|
|
421
|
+
@classmethod
|
|
422
|
+
def compute(
|
|
423
|
+
cls,
|
|
424
|
+
value: Annotated[pa.Int64Array, Param(doc="Value to double")],
|
|
425
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
426
|
+
return pc.multiply(value, 2)
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
### Table Functions
|
|
430
|
+
|
|
431
|
+
Generate output data from arguments (no input table). Each call to `process()` emits
|
|
432
|
+
a batch via `out.emit()` or signals completion via `out.finish()`.
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
from dataclasses import dataclass
|
|
436
|
+
from typing import Annotated, ClassVar
|
|
437
|
+
import pyarrow as pa
|
|
438
|
+
from vgi import TableFunctionGenerator, Arg
|
|
439
|
+
from vgi.table_function import ProcessParams, OutputCollector
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
@dataclass
|
|
443
|
+
class CounterState:
|
|
444
|
+
remaining: int
|
|
445
|
+
current: int = 0
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
class Counter(TableFunctionGenerator):
|
|
449
|
+
"""Generate a sequence of integers."""
|
|
450
|
+
|
|
451
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate")]
|
|
452
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = pa.schema([("n", pa.int64())])
|
|
453
|
+
|
|
454
|
+
@classmethod
|
|
455
|
+
def initial_state(cls, params: ProcessParams) -> CounterState:
|
|
456
|
+
return CounterState(remaining=params.args.count)
|
|
457
|
+
|
|
458
|
+
@classmethod
|
|
459
|
+
def process(cls, params: ProcessParams, state: CounterState, out: OutputCollector) -> None:
|
|
460
|
+
if state.remaining <= 0:
|
|
461
|
+
out.finish()
|
|
462
|
+
return
|
|
463
|
+
batch_size = min(state.remaining, 1000)
|
|
464
|
+
values = list(range(state.current, state.current + batch_size))
|
|
465
|
+
out.emit(pa.RecordBatch.from_pydict({"n": values}, schema=params.output_schema))
|
|
466
|
+
state.current += batch_size
|
|
467
|
+
state.remaining -= batch_size
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
### Table-In-Out Functions
|
|
471
|
+
|
|
472
|
+
Transform or aggregate input data. Override `transform()` for per-batch processing
|
|
473
|
+
and `finish()` for final output after all input is consumed.
|
|
474
|
+
|
|
475
|
+
```python
|
|
476
|
+
import pyarrow as pa
|
|
477
|
+
import pyarrow.compute as pc
|
|
478
|
+
from vgi import TableInOutFunction
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
class FilterPositive(TableInOutFunction):
|
|
482
|
+
"""Keep only rows where all numeric columns are positive."""
|
|
483
|
+
|
|
484
|
+
@property
|
|
485
|
+
def output_schema(self) -> pa.Schema:
|
|
486
|
+
return self.input_schema
|
|
487
|
+
|
|
488
|
+
def transform(self, batch: pa.RecordBatch) -> pa.RecordBatch:
|
|
489
|
+
mask = None
|
|
490
|
+
for i, field in enumerate(batch.schema):
|
|
491
|
+
if pa.types.is_integer(field.type) or pa.types.is_floating(field.type):
|
|
492
|
+
col_mask = pc.greater(batch.column(i), 0)
|
|
493
|
+
mask = col_mask if mask is None else pc.and_(mask, col_mask)
|
|
494
|
+
if mask is not None:
|
|
495
|
+
return pc.filter(batch, mask)
|
|
496
|
+
return batch
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
501
|
+
## Beyond Functions: Full Catalog Support
|
|
502
|
+
|
|
503
|
+
VGI workers can expose more than just functions. A worker can provide a complete database catalog with:
|
|
504
|
+
|
|
505
|
+
- **Schemas** - Organize objects into namespaces
|
|
506
|
+
- **Tables** - Expose external data as queryable tables
|
|
507
|
+
- **Views** - Define SQL views over your data
|
|
508
|
+
- **Functions** - Scalar, table, and table-in-out functions
|
|
509
|
+
|
|
510
|
+
```sql
|
|
511
|
+
ATTACH 'external_db' (TYPE 'vgi', LOCATION './my_catalog_worker.py');
|
|
512
|
+
|
|
513
|
+
-- Query tables from the attached catalog
|
|
514
|
+
SELECT * FROM external_db.main.users;
|
|
515
|
+
|
|
516
|
+
-- Use views
|
|
517
|
+
SELECT * FROM external_db.analytics.daily_summary;
|
|
518
|
+
|
|
519
|
+
-- Call functions
|
|
520
|
+
SELECT external_db.main.transform(col) FROM my_table;
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
This enables VGI workers to act as bridges to external systems—databases, APIs, file systems—presenting them as native DuckDB catalogs.
|
|
524
|
+
|
|
525
|
+
See [Catalog Interface](docs/catalog-interface.md) for implementation details.
|
|
526
|
+
|
|
527
|
+
---
|
|
528
|
+
|
|
529
|
+
## Parallel Execution
|
|
530
|
+
|
|
531
|
+
Functions can run across multiple worker processes. The client automatically
|
|
532
|
+
distributes input batches round-robin across workers and collects results.
|
|
533
|
+
|
|
534
|
+
See [Function API Reference](docs/generator-api.md) for advanced patterns like distributed aggregation.
|
|
535
|
+
|
|
536
|
+
---
|
|
537
|
+
|
|
538
|
+
## Error Handling
|
|
539
|
+
|
|
540
|
+
Errors in your functions propagate to DuckDB with clear messages:
|
|
541
|
+
|
|
542
|
+
```python test="skip"
|
|
543
|
+
@classmethod
|
|
544
|
+
def compute(cls, value: Annotated[pa.Int64Array, Param()]) -> Annotated[pa.Int64Array, Returns()]:
|
|
545
|
+
raise ValueError("Something went wrong")
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
```sql
|
|
549
|
+
SELECT my_func(col) FROM my_table;
|
|
550
|
+
-- Error: Something went wrong
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
Type bound violations are caught at bind time (before processing starts):
|
|
554
|
+
|
|
555
|
+
```sql
|
|
556
|
+
SELECT add_values(name, price) FROM orders;
|
|
557
|
+
-- Error: Argument 'left': Column 'name' has type string,
|
|
558
|
+
-- but type bound requires: is_integer
|
|
559
|
+
```
|
|
560
|
+
|
|
561
|
+
### Debugging Worker Failures
|
|
562
|
+
|
|
563
|
+
When a worker fails, the Python traceback is written to stderr. By default, the client captures this stderr and includes it in the error message (last 50 lines), so you get the full context:
|
|
564
|
+
|
|
565
|
+
```
|
|
566
|
+
ClientError: Worker Exception: function 'my_func' raised ValueError
|
|
567
|
+
|
|
568
|
+
Worker stderr:
|
|
569
|
+
Traceback (most recent call last):
|
|
570
|
+
File "my_worker.py", line 42, in compute
|
|
571
|
+
...
|
|
572
|
+
ValueError: Something went wrong
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
For real-time debugging, set `VGI_WORKER_DEBUG=1` to stream worker logs directly to your terminal and enable DEBUG-level logging:
|
|
576
|
+
|
|
577
|
+
```bash
|
|
578
|
+
VGI_WORKER_DEBUG=1 python my_script.py
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
This is especially useful when integrating from C++ or other clients where stderr might otherwise be lost.
|
|
582
|
+
|
|
583
|
+
---
|
|
584
|
+
|
|
585
|
+
## Testing Your Functions
|
|
586
|
+
|
|
587
|
+
Use the VGI client for integration tests:
|
|
588
|
+
|
|
589
|
+
```python
|
|
590
|
+
from vgi.client import Client
|
|
591
|
+
from vgi import Arguments
|
|
592
|
+
import pyarrow as pa
|
|
593
|
+
|
|
594
|
+
batch = pa.RecordBatch.from_pydict({"name": ["alice", "bob"]})
|
|
595
|
+
|
|
596
|
+
with Client("./my_worker.py") as client:
|
|
597
|
+
results = list(client.scalar_function(
|
|
598
|
+
function_name="upper_case",
|
|
599
|
+
input=iter([batch]),
|
|
600
|
+
arguments=Arguments(positional=[pa.scalar("name")]),
|
|
601
|
+
))
|
|
602
|
+
|
|
603
|
+
assert results[0]["result"].to_pylist() == ["ALICE", "BOB"]
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
---
|
|
607
|
+
|
|
608
|
+
## Protocol Overview
|
|
609
|
+
|
|
610
|
+
VGI uses `vgi_rpc`, an Apache Arrow IPC-based RPC framework, for all
|
|
611
|
+
client-worker communication over stdin/stdout pipes:
|
|
612
|
+
|
|
613
|
+
```
|
|
614
|
+
Client Worker
|
|
615
|
+
│ │
|
|
616
|
+
│──── bind(request) ──────────────▶ │ Function name, args, input schema
|
|
617
|
+
│◀─── BindResponse ──────────────── │ Output schema, opaque data
|
|
618
|
+
│ │
|
|
619
|
+
│──── init(request) ──────────────▶ │ Start processing stream
|
|
620
|
+
│◀─── Stream header ─────────────── │ execution_id, max_workers
|
|
621
|
+
│ │
|
|
622
|
+
│──── exchange(batch1) ───────────▶ │
|
|
623
|
+
│◀─── output batch 1 ────────────── │ transform(batch)
|
|
624
|
+
│ ... │
|
|
625
|
+
│──── [stream close] ─────────────▶ │ Signal end of input
|
|
626
|
+
│ │
|
|
627
|
+
│──── init(phase=FINALIZE) ───────▶ │ Start finalize stream
|
|
628
|
+
│◀─── final output batches ──────── │ finish() results
|
|
629
|
+
└───────────────────────────────────┘
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
---
|
|
633
|
+
|
|
634
|
+
## External Batch Offloading (Demo Storage)
|
|
635
|
+
|
|
636
|
+
When record batches are too large for HTTP request/response bodies, VGI supports
|
|
637
|
+
externalizing them to blob storage. The server replaces oversized batches with
|
|
638
|
+
pointer batches containing a URL, and the client transparently fetches the data.
|
|
639
|
+
|
|
640
|
+
The example HTTP server includes a built-in demo blob store for testing this
|
|
641
|
+
without S3 or any cloud infrastructure:
|
|
642
|
+
|
|
643
|
+
```bash
|
|
644
|
+
# Start with demo storage (4 KiB threshold for testing)
|
|
645
|
+
vgi-fixture-http --demo-storage --externalize-threshold-bytes 4096
|
|
646
|
+
|
|
647
|
+
# With zstd compression
|
|
648
|
+
vgi-fixture-http --demo-storage --externalize-threshold-bytes 4096 --externalize-compression zstd
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
When `--demo-storage` is enabled:
|
|
652
|
+
- Batches exceeding `--externalize-threshold-bytes` are stored in-memory and
|
|
653
|
+
served from `/__blobs__/{id}` endpoints on the same server
|
|
654
|
+
- Clients can request upload URLs for large inputs via the `__upload_url__` endpoint
|
|
655
|
+
- The server advertises `VGI-Max-Request-Bytes` and rejects oversized requests with 413
|
|
656
|
+
|
|
657
|
+
For production use, implement the `ExternalStorage` protocol from `vgi_rpc` against
|
|
658
|
+
your cloud storage (S3, GCS, etc.). The example server also supports S3 via `--s3-bucket`.
|
|
659
|
+
|
|
660
|
+
---
|
|
661
|
+
|
|
662
|
+
## Documentation
|
|
663
|
+
|
|
664
|
+
- [Function Lifecycle](docs/lifecycle.md) - Bind, init, process, finalize
|
|
665
|
+
- [Metadata API](docs/metadata.md) - Function introspection
|
|
666
|
+
- [Function API Reference](docs/generator-api.md) - Advanced function patterns
|
|
667
|
+
- [Catalog Interface](docs/catalog-interface.md) - DuckDB ATTACH integration
|
|
668
|
+
|
|
669
|
+
---
|
|
670
|
+
|
|
671
|
+
## Logging
|
|
672
|
+
|
|
673
|
+
Workers support `--debug`, `--log-level`, `--log-format`, and `--log-logger` options:
|
|
674
|
+
|
|
675
|
+
```bash
|
|
676
|
+
# Enable debug logging
|
|
677
|
+
vgi-fixture-worker --debug
|
|
678
|
+
|
|
679
|
+
# JSON-formatted logs for structured pipelines
|
|
680
|
+
vgi-fixture-worker --log-format json
|
|
681
|
+
|
|
682
|
+
# Target a specific logger
|
|
683
|
+
vgi-fixture-worker --log-level DEBUG --log-logger vgi.worker
|
|
684
|
+
```
|
|
685
|
+
|
|
686
|
+
You can also use the `VGI_WORKER_DEBUG=1` environment variable, which enables `--debug` on the worker and stderr passthrough on the client without changing any code or CLI flags:
|
|
687
|
+
|
|
688
|
+
```bash
|
|
689
|
+
VGI_WORKER_DEBUG=1 python my_script.py
|
|
690
|
+
```
|
|
691
|
+
|
|
692
|
+
See [CLI Reference](docs/cli.md#worker-logging) for the full list of loggers and options.
|
|
693
|
+
|
|
694
|
+
---
|
|
695
|
+
|
|
696
|
+
## Development
|
|
697
|
+
|
|
698
|
+
```bash
|
|
699
|
+
git clone https://github.com/query-farm/vgi-python
|
|
700
|
+
cd vgi-python
|
|
701
|
+
|
|
702
|
+
uv sync --all-extras # Install dependencies
|
|
703
|
+
uv run pytest -n auto # Run tests
|
|
704
|
+
uv run ruff check --fix . # Lint
|
|
705
|
+
uv run ruff format . # Format
|
|
706
|
+
uv run mypy vgi/ # Type check
|
|
707
|
+
```
|
|
708
|
+
|
|
709
|
+
## Requirements
|
|
710
|
+
|
|
711
|
+
- Python >= 3.12.4
|
|
712
|
+
- pyarrow
|
|
713
|
+
- DuckDB (for SQL integration)
|
|
714
|
+
|
|
715
|
+
---
|
|
716
|
+
|
|
717
|
+
## License
|
|
718
|
+
|
|
719
|
+
Copyright (c) 2025, 2026 Query Farm LLC.
|
|
720
|
+
|
|
721
|
+
Licensed under the **Query Farm Source-Available License, Version 1.0** — see
|
|
722
|
+
[LICENSE](LICENSE) for the binding terms. In summary (the LICENSE text governs):
|
|
723
|
+
|
|
724
|
+
- ✅ **Use, copy, modify, and redistribute** the code freely, **including in
|
|
725
|
+
production and for commercial purposes** — your own internal use, and building
|
|
726
|
+
products and services on top of VGI.
|
|
727
|
+
- 🚫 Not permitted **without a separate commercial license**: offering a
|
|
728
|
+
*competing* VGI-equivalent product or service to third parties (hosted,
|
|
729
|
+
embedded, or as-a-service), or operating a commercial marketplace for such
|
|
730
|
+
services.
|
|
731
|
+
- ⏳ Each released version converts to the **Apache License, Version 2.0**, ten
|
|
732
|
+
years after its public release.
|
|
733
|
+
|
|
734
|
+
For a commercial license or any licensing questions, contact
|
|
735
|
+
[hello@query.farm](mailto:hello@query.farm).
|