vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Scan functions backing the ``rff_*`` required-filter sqllogictest Tables.
|
|
4
|
+
|
|
5
|
+
Used by the ``vgi_required_filters_*.test`` matrix. These fixtures exercise
|
|
6
|
+
the ``Table.required_field_filter_paths`` field +
|
|
7
|
+
the C++ optimizer extension that enforces it. The five tables form a small
|
|
8
|
+
matrix:
|
|
9
|
+
|
|
10
|
+
* ``rff_simple`` — flat columns, single top-level required path.
|
|
11
|
+
* ``rff_struct`` — struct column with two required subfield paths.
|
|
12
|
+
* ``rff_nested`` — nested struct with a 3-deep required path.
|
|
13
|
+
* ``rff_multi`` — mixed top-level + struct subfield requirements.
|
|
14
|
+
* ``rff_none`` — no requirement (control / regression for the fast path).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
import pyarrow as pa
|
|
22
|
+
from vgi_rpc.rpc import OutputCollector
|
|
23
|
+
|
|
24
|
+
from vgi._test_fixtures.table._common import _EmptyArgs, _OneShotState
|
|
25
|
+
from vgi._test_fixtures.table.catalog_scans import _static_scan_function
|
|
26
|
+
from vgi.invocation import BindResponse
|
|
27
|
+
from vgi.table_function import (
|
|
28
|
+
BindParams,
|
|
29
|
+
ProcessParams,
|
|
30
|
+
TableFunctionGenerator,
|
|
31
|
+
init_single_worker,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# The fixture schemas. These are referenced both by the scan functions below and
|
|
35
|
+
# by the Table descriptors registered on the worker.
|
|
36
|
+
|
|
37
|
+
RFF_SIMPLE_COLUMNS = pa.schema(
|
|
38
|
+
[
|
|
39
|
+
pa.field("a", pa.int64()),
|
|
40
|
+
pa.field("b", pa.int64()),
|
|
41
|
+
]
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
RFF_STRUCT_COLUMNS = pa.schema(
|
|
45
|
+
[
|
|
46
|
+
pa.field(
|
|
47
|
+
"s",
|
|
48
|
+
pa.struct(
|
|
49
|
+
[
|
|
50
|
+
pa.field("a", pa.int64()),
|
|
51
|
+
pa.field("b", pa.int64()),
|
|
52
|
+
]
|
|
53
|
+
),
|
|
54
|
+
),
|
|
55
|
+
pa.field("other", pa.int64()),
|
|
56
|
+
]
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
RFF_NESTED_COLUMNS = pa.schema(
|
|
60
|
+
[
|
|
61
|
+
pa.field(
|
|
62
|
+
"wrapper",
|
|
63
|
+
pa.struct(
|
|
64
|
+
[
|
|
65
|
+
pa.field(
|
|
66
|
+
"mid",
|
|
67
|
+
pa.struct(
|
|
68
|
+
[
|
|
69
|
+
pa.field("leaf", pa.int64()),
|
|
70
|
+
]
|
|
71
|
+
),
|
|
72
|
+
),
|
|
73
|
+
]
|
|
74
|
+
),
|
|
75
|
+
),
|
|
76
|
+
]
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
RFF_MULTI_COLUMNS = pa.schema(
|
|
80
|
+
[
|
|
81
|
+
pa.field(
|
|
82
|
+
"s",
|
|
83
|
+
pa.struct(
|
|
84
|
+
[
|
|
85
|
+
pa.field("a", pa.int64()),
|
|
86
|
+
pa.field("b", pa.int64()),
|
|
87
|
+
]
|
|
88
|
+
),
|
|
89
|
+
),
|
|
90
|
+
pa.field("top", pa.int64()),
|
|
91
|
+
]
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
RFF_NONE_COLUMNS = pa.schema(
|
|
95
|
+
[
|
|
96
|
+
pa.field("a", pa.int64()),
|
|
97
|
+
pa.field("b", pa.int64()),
|
|
98
|
+
]
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# rff_rowid — a row-id column (virtual, hidden from SELECT *) alongside a bbox
|
|
102
|
+
# struct with required_field_filter_paths. A `WHERE rowid = N` predicate pushes
|
|
103
|
+
# a table_filter keyed by the COLUMN_IDENTIFIER_ROW_ID sentinel (>> column
|
|
104
|
+
# count), which the optimizer's required-filter check must skip rather than
|
|
105
|
+
# index out of bounds. See required_field_filter_paths_native.test.
|
|
106
|
+
RFF_ROWID_COLUMNS = pa.schema(
|
|
107
|
+
[
|
|
108
|
+
pa.field("row_id", pa.int64(), metadata={b"is_row_id": b""}),
|
|
109
|
+
pa.field(
|
|
110
|
+
"bbox",
|
|
111
|
+
pa.struct(
|
|
112
|
+
[
|
|
113
|
+
pa.field("xmin", pa.float32()),
|
|
114
|
+
pa.field("ymin", pa.float32()),
|
|
115
|
+
pa.field("xmax", pa.float32()),
|
|
116
|
+
pa.field("ymax", pa.float32()),
|
|
117
|
+
]
|
|
118
|
+
),
|
|
119
|
+
),
|
|
120
|
+
pa.field("other", pa.int64()),
|
|
121
|
+
]
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
RffSimpleScanFunction = _static_scan_function(
|
|
126
|
+
func_name="rff_simple_scan",
|
|
127
|
+
func_description="rff_simple — flat columns (a, b) for required_field_filter_paths tests",
|
|
128
|
+
output_schema=RFF_SIMPLE_COLUMNS,
|
|
129
|
+
data={
|
|
130
|
+
"a": [1, 2, 3],
|
|
131
|
+
"b": [10, 20, 30],
|
|
132
|
+
},
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
RffStructScanFunction = _static_scan_function(
|
|
136
|
+
func_name="rff_struct_scan",
|
|
137
|
+
func_description="rff_struct — STRUCT(s.a, s.b) + other for required_field_filter_paths tests",
|
|
138
|
+
output_schema=RFF_STRUCT_COLUMNS,
|
|
139
|
+
data={
|
|
140
|
+
"s": [
|
|
141
|
+
{"a": 1, "b": 10},
|
|
142
|
+
{"a": 2, "b": 20},
|
|
143
|
+
{"a": 3, "b": 30},
|
|
144
|
+
],
|
|
145
|
+
"other": [100, 200, 300],
|
|
146
|
+
},
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
RffNestedScanFunction = _static_scan_function(
|
|
150
|
+
func_name="rff_nested_scan",
|
|
151
|
+
func_description="rff_nested — nested STRUCT(wrapper.mid.leaf) for required_field_filter_paths tests",
|
|
152
|
+
output_schema=RFF_NESTED_COLUMNS,
|
|
153
|
+
data={
|
|
154
|
+
"wrapper": [
|
|
155
|
+
{"mid": {"leaf": 1}},
|
|
156
|
+
{"mid": {"leaf": 2}},
|
|
157
|
+
{"mid": {"leaf": 3}},
|
|
158
|
+
],
|
|
159
|
+
},
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
RffMultiScanFunction = _static_scan_function(
|
|
163
|
+
func_name="rff_multi_scan",
|
|
164
|
+
func_description="rff_multi — top-level + struct subfield required paths",
|
|
165
|
+
output_schema=RFF_MULTI_COLUMNS,
|
|
166
|
+
data={
|
|
167
|
+
"s": [
|
|
168
|
+
{"a": 1, "b": 10},
|
|
169
|
+
{"a": 2, "b": 20},
|
|
170
|
+
],
|
|
171
|
+
"top": [100, 200],
|
|
172
|
+
},
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
RffNoneScanFunction = _static_scan_function(
|
|
176
|
+
func_name="rff_none_scan",
|
|
177
|
+
func_description="rff_none — control table with no required_field_filter_paths",
|
|
178
|
+
output_schema=RFF_NONE_COLUMNS,
|
|
179
|
+
data={
|
|
180
|
+
"a": [1, 2, 3],
|
|
181
|
+
"b": [10, 20, 30],
|
|
182
|
+
},
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# rff_rowid needs projection_pushdown (virtual row-id columns require it), so it
|
|
187
|
+
# can't use the one-shot static factory — under projection the emitted batch must
|
|
188
|
+
# match the *projected* output schema. Build only the requested columns.
|
|
189
|
+
@init_single_worker
|
|
190
|
+
class RffRowidScanFunction(TableFunctionGenerator[_EmptyArgs, _OneShotState]):
|
|
191
|
+
"""rff_rowid — row_id virtual column + bbox.* required filters."""
|
|
192
|
+
|
|
193
|
+
class Meta:
|
|
194
|
+
"""Function metadata."""
|
|
195
|
+
|
|
196
|
+
name = "rff_rowid_scan"
|
|
197
|
+
description = "rff_rowid — row_id virtual column + bbox.* required filters"
|
|
198
|
+
projection_pushdown = True
|
|
199
|
+
# filter_pushdown routes the WHERE predicates (incl. the rowid filter,
|
|
200
|
+
# keyed by the COLUMN_IDENTIFIER_ROW_ID sentinel) into the scan's
|
|
201
|
+
# table_filters; auto_apply_filters lets the framework apply them so
|
|
202
|
+
# results stay correct without a hand-written filter loop.
|
|
203
|
+
filter_pushdown = True
|
|
204
|
+
auto_apply_filters = True
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def on_bind(cls, params: BindParams[_EmptyArgs]) -> BindResponse:
|
|
208
|
+
"""Return the full output schema (row_id + bbox + other)."""
|
|
209
|
+
return BindResponse(output_schema=RFF_ROWID_COLUMNS)
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def initial_state(cls, params: ProcessParams[_EmptyArgs]) -> _OneShotState:
|
|
213
|
+
"""Create initial state."""
|
|
214
|
+
return _OneShotState()
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def process(
|
|
218
|
+
cls,
|
|
219
|
+
params: ProcessParams[_EmptyArgs],
|
|
220
|
+
state: _OneShotState,
|
|
221
|
+
out: OutputCollector,
|
|
222
|
+
) -> None:
|
|
223
|
+
"""Emit 10 rows, projecting to whatever columns the scan requested."""
|
|
224
|
+
if state.done:
|
|
225
|
+
out.finish()
|
|
226
|
+
return
|
|
227
|
+
state.done = True
|
|
228
|
+
full: dict[str, Any] = {
|
|
229
|
+
"row_id": list(range(10)),
|
|
230
|
+
"bbox": [{"xmin": float(i), "ymin": 2.0, "xmax": 3.0, "ymax": 4.0} for i in range(10)],
|
|
231
|
+
"other": [i * 10 for i in range(10)],
|
|
232
|
+
}
|
|
233
|
+
columns = {f.name: full[f.name] for f in params.output_schema}
|
|
234
|
+
out.emit(pa.RecordBatch.from_pydict(columns, schema=params.output_schema))
|