vgi-python 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/_duckdb.py +3 -0
- vgi/_test_fixtures/aggregate/dynamic.py +7 -1
- vgi/_test_fixtures/bad_enum.py +72 -0
- vgi/_test_fixtures/narrow_bind/__init__.py +15 -0
- vgi/_test_fixtures/narrow_bind/worker.py +237 -0
- vgi/_test_fixtures/scalar/__init__.py +6 -0
- vgi/_test_fixtures/scalar/random_demo.py +44 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +73 -0
- vgi/_test_fixtures/table/__init__.py +6 -0
- vgi/_test_fixtures/table/filters.py +128 -0
- vgi/_test_fixtures/table/late_materialization.py +3 -0
- vgi/_test_fixtures/table/make_series.py +15 -0
- vgi/_test_fixtures/table/misc.py +4 -0
- vgi/_test_fixtures/table/pairs.py +12 -0
- vgi/_test_fixtures/table/partition_columns.py +101 -0
- vgi/_test_fixtures/table/sequence.py +28 -0
- vgi/_test_fixtures/table/settings.py +6 -0
- vgi/_test_fixtures/table/typed_probe.py +154 -0
- vgi/_test_fixtures/table_in_out.py +8 -26
- vgi/_test_fixtures/worker.py +21 -1
- vgi/aggregate_function.py +45 -11
- vgi/argument_spec.py +20 -20
- vgi/arguments.py +114 -153
- vgi/catalog/_descriptor_spec.py +246 -0
- vgi/catalog/attach_option.py +14 -171
- vgi/catalog/catalog_interface.py +390 -264
- vgi/catalog/descriptors.py +59 -26
- vgi/catalog/secret_type.py +1 -0
- vgi/catalog/setting.py +19 -214
- vgi/catalog/storage.py +8 -4
- vgi/client/catalog_mixin.py +37 -33
- vgi/client/cli_catalog.py +4 -16
- vgi/client/cli_schema.py +12 -73
- vgi/client/cli_table.py +30 -199
- vgi/client/cli_utils.py +74 -19
- vgi/client/cli_view.py +12 -74
- vgi/client/client.py +104 -79
- vgi/exceptions.py +4 -0
- vgi/function.py +9 -33
- vgi/function_storage.py +11 -46
- vgi/function_storage_azure_sql.py +6 -6
- vgi/function_storage_cf_do.py +23 -3
- vgi/http/worker_page.py +18 -8
- vgi/invocation.py +10 -10
- vgi/meta_worker.py +7 -7
- vgi/metadata.py +111 -46
- vgi/otel.py +3 -3
- vgi/protocol.py +504 -94
- vgi/scalar_function.py +93 -72
- vgi/schema_utils.py +2 -2
- vgi/secret_protocol.py +22 -3
- vgi/secret_service.py +9 -6
- vgi/serve.py +11 -11
- vgi/table_buffering_function.py +28 -22
- vgi/table_filter_pushdown.py +435 -61
- vgi/table_function.py +279 -82
- vgi/table_in_out_function.py +88 -28
- vgi/transactor/client.py +1 -1
- vgi/transactor/protocol.py +1 -1
- vgi/transactor/server.py +26 -64
- vgi/worker.py +125 -272
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.2.dist-info}/METADATA +89 -176
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.2.dist-info}/RECORD +66 -61
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.2.dist-info}/WHEEL +0 -0
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.2.dist-info}/entry_points.txt +0 -0
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.2.dist-info}/licenses/LICENSE +0 -0
vgi/_duckdb.py
CHANGED
|
@@ -190,7 +190,7 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
|
|
|
190
190
|
# when DuckDB batches many partitions into shared buffers.
|
|
191
191
|
|
|
192
192
|
@staticmethod
|
|
193
|
-
def _slice_to_frame(
|
|
193
|
+
def _slice_to_frame(
|
|
194
194
|
partition: WindowPartition,
|
|
195
195
|
subframes: list[tuple[int, int]],
|
|
196
196
|
data_start: int,
|
|
@@ -198,11 +198,17 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
|
|
|
198
198
|
"""Slice all partition columns to the frame rows.
|
|
199
199
|
|
|
200
200
|
Args:
|
|
201
|
+
partition: The window partition whose columns are sliced.
|
|
202
|
+
subframes: List of ``(begin, end)`` index tuples describing the
|
|
203
|
+
row ranges to include in the frame.
|
|
201
204
|
data_start: Index where data columns begin (header columns are
|
|
202
205
|
``[0 .. data_start)``). NULL-drop is applied on data columns
|
|
203
206
|
only — matches the filtering ``_do_update`` performs in the
|
|
204
207
|
non-window path.
|
|
205
208
|
|
|
209
|
+
Returns:
|
|
210
|
+
A table containing only the frame rows across all partition columns.
|
|
211
|
+
|
|
206
212
|
"""
|
|
207
213
|
num_cols = partition.inputs.num_columns
|
|
208
214
|
cols = [partition.inputs.column(i) for i in range(num_cols)]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Fixture worker that deliberately advertises an unrecognized enum value.
|
|
4
|
+
|
|
5
|
+
This fixture exercises the C++ extension's *wire-enum validation* end-to-end:
|
|
6
|
+
the catalog-metadata parser (``ParseFunctionInfo`` in
|
|
7
|
+
``vgi/src/vgi_catalog_api.cpp``) must reject an enum string it does not
|
|
8
|
+
recognize with a loud ``IOException`` rather than silently falling back to a
|
|
9
|
+
default. A silent fallback would run with behavior inconsistent with what the
|
|
10
|
+
worker declared (e.g. treating a ``SPECIAL`` null-handling function as
|
|
11
|
+
``DEFAULT``).
|
|
12
|
+
|
|
13
|
+
The trick is entirely Python-side and needs no extension rebuild. The normal
|
|
14
|
+
metadata path can only ever emit valid enum names because the values come from
|
|
15
|
+
typed Python ``Enum`` members. To get a bogus string onto the wire we override
|
|
16
|
+
:meth:`ExampleCatalog._function_to_info` for one scalar function (``double``)
|
|
17
|
+
and swap its ``null_handling`` for :class:`_BogusNullHandling.WEIRD` — a real
|
|
18
|
+
``Enum`` member whose ``.name`` is ``"WEIRD"``. The vgi-rpc serializer converts
|
|
19
|
+
any ``Enum`` field to ``value.name`` (see ``ArrowSerializableDataclass``), so
|
|
20
|
+
``"WEIRD"`` lands in the ``null_handling`` Arrow column and the C++ parser
|
|
21
|
+
trips on it the moment the ``double`` function's metadata is loaded.
|
|
22
|
+
|
|
23
|
+
Otherwise this is a drop-in replacement for ``vgi-fixture-worker``: every other
|
|
24
|
+
function and the catalog are inherited unchanged from :class:`ExampleWorker`,
|
|
25
|
+
so any function except ``double`` still resolves normally.
|
|
26
|
+
|
|
27
|
+
Registered as the ``vgi-fixture-bad-enum-worker`` entry point.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
from dataclasses import replace
|
|
33
|
+
from enum import Enum
|
|
34
|
+
|
|
35
|
+
from vgi._test_fixtures.worker import ExampleCatalog, ExampleWorker
|
|
36
|
+
from vgi.catalog.catalog_interface import FunctionInfo
|
|
37
|
+
|
|
38
|
+
# The scalar function whose null_handling we corrupt. Tests reference this name
|
|
39
|
+
# to force the broken metadata onto the parse path.
|
|
40
|
+
BAD_ENUM_FUNCTION = "double"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class _BogusNullHandling(Enum):
|
|
44
|
+
"""An enum member whose ``.name`` is a value the C++ parser cannot map."""
|
|
45
|
+
|
|
46
|
+
WEIRD = "WEIRD"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BadEnumCatalog(ExampleCatalog):
|
|
50
|
+
"""ExampleCatalog that advertises a bogus null_handling for one function."""
|
|
51
|
+
|
|
52
|
+
def _function_to_info(self, func_cls: type, schema_name: str) -> FunctionInfo:
|
|
53
|
+
info = super()._function_to_info(func_cls, schema_name)
|
|
54
|
+
if info.name == BAD_ENUM_FUNCTION and info.null_handling is not None:
|
|
55
|
+
# FunctionInfo is frozen; replace() returns a corrupted copy.
|
|
56
|
+
return replace(info, null_handling=_BogusNullHandling.WEIRD) # type: ignore[arg-type]
|
|
57
|
+
return info
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class BadEnumWorker(ExampleWorker):
|
|
61
|
+
"""ExampleWorker that serves the example catalog with one bad enum value."""
|
|
62
|
+
|
|
63
|
+
catalog_interface = BadEnumCatalog
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def main() -> None:
|
|
67
|
+
"""Run the bad-enum fixture worker process."""
|
|
68
|
+
BadEnumWorker.main()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if __name__ == "__main__":
|
|
72
|
+
main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Narrow-bind reproducer fixture.
|
|
4
|
+
|
|
5
|
+
Exposes a catalog whose virtual table advertises *more* columns in its
|
|
6
|
+
listing (``catalog_schema_contents_tables`` / ``catalog_table_get``) than
|
|
7
|
+
its scan function returns from ``on_bind``. A client that trusts the bind
|
|
8
|
+
``output_schema`` without checking it against the planned catalog columns
|
|
9
|
+
indexes past the end of the worker's narrower batch in
|
|
10
|
+
``ArrowTableFunction::ArrowToDuckDB`` and SIGSEGVs. The fix makes the
|
|
11
|
+
client fail closed at bind with a clear ``BinderException``.
|
|
12
|
+
|
|
13
|
+
Driven by ``test/sql/integration/narrow_bind_mismatch.test`` in
|
|
14
|
+
``~/Development/vgi``.
|
|
15
|
+
"""
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Narrow-bind reproducer worker.
|
|
4
|
+
|
|
5
|
+
Two virtual tables, each backed by a table function:
|
|
6
|
+
|
|
7
|
+
* ``mismatch`` — advertises columns ``{id, val}`` in its catalog listing
|
|
8
|
+
but its scan function ``narrow_scan`` binds to ``{id}`` only. This is
|
|
9
|
+
the inconsistency that used to segfault the client at scan time
|
|
10
|
+
(``ArrowTableFunction::ArrowToDuckDB`` walking off the end of the
|
|
11
|
+
worker's 1-column batch). The client must now refuse it at bind with a
|
|
12
|
+
clear ``BinderException``.
|
|
13
|
+
|
|
14
|
+
* ``consistent`` — advertises ``{id, val}`` and its scan function
|
|
15
|
+
``wide_scan`` binds to ``{id, val}``. Positive control: this must keep
|
|
16
|
+
working unchanged.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import Annotated, Any
|
|
23
|
+
|
|
24
|
+
import pyarrow as pa
|
|
25
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
26
|
+
from vgi_rpc.rpc import OutputCollector
|
|
27
|
+
|
|
28
|
+
from vgi import Worker
|
|
29
|
+
from vgi.arguments import Arg
|
|
30
|
+
from vgi.catalog import Catalog, Schema
|
|
31
|
+
from vgi.catalog.catalog_interface import (
|
|
32
|
+
AttachOpaqueData,
|
|
33
|
+
ReadOnlyCatalogInterface,
|
|
34
|
+
ScanFunctionResult,
|
|
35
|
+
SchemaInfo,
|
|
36
|
+
SchemaObjectType,
|
|
37
|
+
SerializedSchema,
|
|
38
|
+
TableInfo,
|
|
39
|
+
TransactionOpaqueData,
|
|
40
|
+
)
|
|
41
|
+
from vgi.function import Function
|
|
42
|
+
from vgi.invocation import BindResponse
|
|
43
|
+
from vgi.table_function import (
|
|
44
|
+
BindParams,
|
|
45
|
+
ProcessParams,
|
|
46
|
+
TableFunctionGenerator,
|
|
47
|
+
init_single_worker,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
CATALOG_NAME = "narrow_bind"
|
|
51
|
+
|
|
52
|
+
# What the catalog advertises for both tables: two columns.
|
|
53
|
+
_TABLE_SCHEMA: pa.Schema = pa.schema([pa.field("id", pa.int64()), pa.field("val", pa.int64())])
|
|
54
|
+
# What the narrow scan function actually binds to: one column.
|
|
55
|
+
_NARROW_BIND_SCHEMA: pa.Schema = pa.schema([pa.field("id", pa.int64())])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(kw_only=True)
|
|
59
|
+
class _State(ArrowSerializableDataclass):
|
|
60
|
+
done: bool = False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass(frozen=True)
|
|
64
|
+
class _Args:
|
|
65
|
+
count: Annotated[int, Arg(0, doc="rows", ge=0)]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@init_single_worker
|
|
69
|
+
class NarrowScan(TableFunctionGenerator[_Args, _State]):
|
|
70
|
+
"""Binds to a NARROWER schema than the catalog advertises (the bug)."""
|
|
71
|
+
|
|
72
|
+
class Meta:
|
|
73
|
+
name = "narrow_scan"
|
|
74
|
+
description = "bind reports a narrower schema than the table advertises"
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def on_bind(cls, params: BindParams[_Args]) -> BindResponse:
|
|
78
|
+
return BindResponse(output_schema=_NARROW_BIND_SCHEMA)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def initial_state(cls, params: ProcessParams[_Args]) -> _State:
|
|
82
|
+
return _State()
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def process(cls, params: ProcessParams[_Args], state: _State, out: OutputCollector) -> None:
|
|
86
|
+
if state.done:
|
|
87
|
+
out.finish()
|
|
88
|
+
return
|
|
89
|
+
state.done = True
|
|
90
|
+
out.emit(pa.RecordBatch.from_pydict({"id": [0, 1, 2]}, schema=params.output_schema))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@init_single_worker
|
|
94
|
+
class WideScan(TableFunctionGenerator[_Args, _State]):
|
|
95
|
+
"""Binds to the full advertised schema (positive control — must work)."""
|
|
96
|
+
|
|
97
|
+
class Meta:
|
|
98
|
+
name = "wide_scan"
|
|
99
|
+
description = "bind matches the table's advertised schema"
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def on_bind(cls, params: BindParams[_Args]) -> BindResponse:
|
|
103
|
+
return BindResponse(output_schema=_TABLE_SCHEMA)
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def initial_state(cls, params: ProcessParams[_Args]) -> _State:
|
|
107
|
+
return _State()
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def process(cls, params: ProcessParams[_Args], state: _State, out: OutputCollector) -> None:
|
|
111
|
+
if state.done:
|
|
112
|
+
out.finish()
|
|
113
|
+
return
|
|
114
|
+
state.done = True
|
|
115
|
+
out.emit(pa.RecordBatch.from_pydict({"id": [0, 1, 2], "val": [10, 20, 30]}, schema=params.output_schema))
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
_FUNCTIONS: list[type[Function]] = [NarrowScan, WideScan]
|
|
119
|
+
|
|
120
|
+
_CATALOG = Catalog(
|
|
121
|
+
name=CATALOG_NAME,
|
|
122
|
+
default_schema="main",
|
|
123
|
+
schemas=[
|
|
124
|
+
Schema(
|
|
125
|
+
name="main",
|
|
126
|
+
comment="narrow-bind reproducer catalog",
|
|
127
|
+
functions=list(_FUNCTIONS),
|
|
128
|
+
tables=[],
|
|
129
|
+
),
|
|
130
|
+
],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _serialize_schema(s: pa.Schema) -> bytes:
|
|
135
|
+
sink = pa.BufferOutputStream()
|
|
136
|
+
with pa.ipc.new_stream(sink, s):
|
|
137
|
+
pass
|
|
138
|
+
return sink.getvalue().to_pybytes()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# table name -> scan function name. Both advertise _TABLE_SCHEMA (2 cols).
|
|
142
|
+
_TABLE_FUNCTIONS = {
|
|
143
|
+
"mismatch": "narrow_scan",
|
|
144
|
+
"consistent": "wide_scan",
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class NarrowBindCatalog(ReadOnlyCatalogInterface):
|
|
149
|
+
catalog = _CATALOG
|
|
150
|
+
catalog_name = CATALOG_NAME
|
|
151
|
+
|
|
152
|
+
def _info(self, table_name: str) -> TableInfo:
|
|
153
|
+
return TableInfo(
|
|
154
|
+
comment=f"narrow-bind reproducer table -> {_TABLE_FUNCTIONS[table_name]}",
|
|
155
|
+
tags={},
|
|
156
|
+
name=table_name,
|
|
157
|
+
schema_name="main",
|
|
158
|
+
columns=SerializedSchema(_serialize_schema(_TABLE_SCHEMA)),
|
|
159
|
+
not_null_constraints=[],
|
|
160
|
+
unique_constraints=[],
|
|
161
|
+
check_constraints=[],
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def schemas(
|
|
165
|
+
self, *, attach_opaque_data: AttachOpaqueData, transaction_opaque_data: TransactionOpaqueData | None
|
|
166
|
+
) -> list[SchemaInfo]:
|
|
167
|
+
infos = super().schemas(attach_opaque_data=attach_opaque_data, transaction_opaque_data=transaction_opaque_data)
|
|
168
|
+
for i, info in enumerate(infos):
|
|
169
|
+
if info.name == "main":
|
|
170
|
+
infos[i] = SchemaInfo(
|
|
171
|
+
attach_opaque_data=info.attach_opaque_data,
|
|
172
|
+
name=info.name,
|
|
173
|
+
comment=info.comment,
|
|
174
|
+
tags=info.tags,
|
|
175
|
+
estimated_object_count={
|
|
176
|
+
**(info.estimated_object_count or {}),
|
|
177
|
+
"table": len(_TABLE_FUNCTIONS),
|
|
178
|
+
},
|
|
179
|
+
)
|
|
180
|
+
return infos
|
|
181
|
+
|
|
182
|
+
def schema_contents(
|
|
183
|
+
self,
|
|
184
|
+
*,
|
|
185
|
+
attach_opaque_data: AttachOpaqueData,
|
|
186
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
187
|
+
name: str,
|
|
188
|
+
type: Any,
|
|
189
|
+
) -> Any:
|
|
190
|
+
if name.lower() == "main" and type == SchemaObjectType.TABLE:
|
|
191
|
+
return [self._info(table_name) for table_name in _TABLE_FUNCTIONS]
|
|
192
|
+
return super().schema_contents(
|
|
193
|
+
attach_opaque_data=attach_opaque_data, transaction_opaque_data=transaction_opaque_data, name=name, type=type
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def table_get(
|
|
197
|
+
self,
|
|
198
|
+
*,
|
|
199
|
+
attach_opaque_data: AttachOpaqueData,
|
|
200
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
201
|
+
schema_name: str,
|
|
202
|
+
name: str,
|
|
203
|
+
at_unit: str | None = None,
|
|
204
|
+
at_value: str | None = None,
|
|
205
|
+
) -> TableInfo | None:
|
|
206
|
+
if schema_name.lower() != "main":
|
|
207
|
+
return None
|
|
208
|
+
if name in _TABLE_FUNCTIONS:
|
|
209
|
+
return self._info(name)
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
def table_scan_function_get(
|
|
213
|
+
self,
|
|
214
|
+
*,
|
|
215
|
+
attach_opaque_data: AttachOpaqueData,
|
|
216
|
+
transaction_opaque_data: TransactionOpaqueData | None,
|
|
217
|
+
schema_name: str,
|
|
218
|
+
name: str,
|
|
219
|
+
at_unit: str | None,
|
|
220
|
+
at_value: str | None,
|
|
221
|
+
) -> ScanFunctionResult:
|
|
222
|
+
fn = _TABLE_FUNCTIONS.get(name)
|
|
223
|
+
if fn is None:
|
|
224
|
+
raise ValueError(f"unknown narrow-bind reproducer table: {name}")
|
|
225
|
+
return ScanFunctionResult(
|
|
226
|
+
function_name=fn,
|
|
227
|
+
positional_arguments=[pa.scalar(3, type=pa.int64())],
|
|
228
|
+
named_arguments={},
|
|
229
|
+
required_extensions=[],
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class NarrowBindWorker(Worker):
|
|
234
|
+
catalog_interface = NarrowBindCatalog
|
|
235
|
+
catalog_name = CATALOG_NAME
|
|
236
|
+
catalog = _CATALOG
|
|
237
|
+
functions = list(_FUNCTIONS)
|
|
@@ -52,12 +52,15 @@ from vgi._test_fixtures.scalar.null_handling import (
|
|
|
52
52
|
from vgi._test_fixtures.scalar.random_demo import (
|
|
53
53
|
BernoulliFunction,
|
|
54
54
|
HashSeedFunction,
|
|
55
|
+
QuerySeedFunction,
|
|
55
56
|
RandomBytesFunction,
|
|
56
57
|
RandomIntFunction,
|
|
57
58
|
)
|
|
58
59
|
from vgi._test_fixtures.scalar.settings_secrets import (
|
|
59
60
|
MultiplyBySettingFunction,
|
|
60
61
|
ReturnSecretValueFunction,
|
|
62
|
+
ScaleBySettingFunction,
|
|
63
|
+
SecretFieldFunction,
|
|
61
64
|
WhoAmIFunction,
|
|
62
65
|
)
|
|
63
66
|
from vgi._test_fixtures.scalar.type_info import (
|
|
@@ -100,9 +103,12 @@ __all__ = [
|
|
|
100
103
|
"PairTypeIntIntFunction",
|
|
101
104
|
"PairTypeIntStrFunction",
|
|
102
105
|
"PairTypeStrStrFunction",
|
|
106
|
+
"QuerySeedFunction",
|
|
103
107
|
"RandomBytesFunction",
|
|
104
108
|
"RandomIntFunction",
|
|
105
109
|
"ReturnSecretValueFunction",
|
|
110
|
+
"ScaleBySettingFunction",
|
|
111
|
+
"SecretFieldFunction",
|
|
106
112
|
"SmartFormatPrefixFunction",
|
|
107
113
|
"SmartFormatWidthFunction",
|
|
108
114
|
"SumValuesFunction",
|
|
@@ -136,6 +136,50 @@ class HashSeedFunction(ScalarFunction):
|
|
|
136
136
|
return pa.array([seed + i for i in range(_length)], type=pa.int64())
|
|
137
137
|
|
|
138
138
|
|
|
139
|
+
class QuerySeedFunction(ScalarFunction):
|
|
140
|
+
"""Adds a per-query-stable seed to each input value.
|
|
141
|
+
|
|
142
|
+
Demonstrates ``FunctionStability.CONSISTENT_WITHIN_QUERY`` — the only
|
|
143
|
+
fixture that emits this stability variant. Semantically the value is fixed
|
|
144
|
+
for the duration of a single query but may differ across queries (like
|
|
145
|
+
``now()``). DuckDB has no behavioral consumer that this fixture asserts; it
|
|
146
|
+
exists so the wire path for the third stability value stays exercised and
|
|
147
|
+
so other-language workers must specify it.
|
|
148
|
+
|
|
149
|
+
Example:
|
|
150
|
+
SQL: SELECT query_seed(value) FROM data
|
|
151
|
+
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
class Meta:
|
|
155
|
+
"""Function metadata."""
|
|
156
|
+
|
|
157
|
+
name = "query_seed"
|
|
158
|
+
description = "Add a per-query-stable seed to each value (demonstrates CONSISTENT_WITHIN_QUERY stability)"
|
|
159
|
+
stability = FunctionStability.CONSISTENT_WITHIN_QUERY
|
|
160
|
+
examples = [
|
|
161
|
+
FunctionExample(
|
|
162
|
+
sql="SELECT query_seed(value) FROM data",
|
|
163
|
+
description="Offset each value by a seed that is constant within a query",
|
|
164
|
+
),
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def compute(
|
|
169
|
+
cls,
|
|
170
|
+
value: Annotated[pa.Int64Array, Param(doc="Value to offset")],
|
|
171
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
172
|
+
"""Add a fixed per-query offset to each value.
|
|
173
|
+
|
|
174
|
+
The offset is deterministic here (a constant) so SQL tests have a
|
|
175
|
+
stable expected output; the stability flag is what is under test, not
|
|
176
|
+
the numeric result.
|
|
177
|
+
"""
|
|
178
|
+
import pyarrow.compute as pc
|
|
179
|
+
|
|
180
|
+
return pc.add(value, 1000)
|
|
181
|
+
|
|
182
|
+
|
|
139
183
|
class RandomBytesFunction(ScalarFunction):
|
|
140
184
|
"""Generates deterministic pseudo-random binary blobs from a seed."""
|
|
141
185
|
|
|
@@ -42,6 +42,79 @@ class MultiplyBySettingFunction(ScalarFunction):
|
|
|
42
42
|
return pc.multiply(multiplier, value)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
+
class ScaleBySettingFunction(ScalarFunction):
|
|
46
|
+
"""Scale the input value by the float (DOUBLE) setting ``scale_factor``.
|
|
47
|
+
|
|
48
|
+
Companion to :class:`MultiplyBySettingFunction`, but reads a floating-point
|
|
49
|
+
setting rather than an integer one.
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
SQL: SELECT scale_by_setting(4.0)
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
class Meta:
|
|
57
|
+
"""Function metadata."""
|
|
58
|
+
|
|
59
|
+
name = "scale_by_setting"
|
|
60
|
+
description = "Scale the input value by the float setting `scale_factor`"
|
|
61
|
+
examples = [
|
|
62
|
+
FunctionExample(
|
|
63
|
+
sql="SELECT scale_by_setting(4.0)",
|
|
64
|
+
description="Scale the input value by the float setting's value",
|
|
65
|
+
),
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def compute(
|
|
70
|
+
cls,
|
|
71
|
+
value: Annotated[pa.DoubleArray, Param(doc="Value to scale")],
|
|
72
|
+
scale_factor: Annotated[pa.Scalar[Any] | None, Setting()],
|
|
73
|
+
) -> Annotated[pa.DoubleArray, Returns()]:
|
|
74
|
+
"""Generate the result for each row."""
|
|
75
|
+
factor = 1.0 if scale_factor is None or scale_factor.as_py() is None else scale_factor.as_py()
|
|
76
|
+
return pc.multiply(pa.scalar(factor, type=pa.float64()), value)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class SecretFieldFunction(ScalarFunction):
|
|
80
|
+
"""Look up individual secret fields by name.
|
|
81
|
+
|
|
82
|
+
``port`` is read by named lookup on the ``vgi_example`` secret and
|
|
83
|
+
``secret_string`` by field name; the result mirrors the wire behaviour of
|
|
84
|
+
the worker-side named/positional secret field accessors.
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
SQL: SELECT secret_field()
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
class Meta:
|
|
92
|
+
"""Function metadata."""
|
|
93
|
+
|
|
94
|
+
name = "secret_field"
|
|
95
|
+
description = "Look up secret fields by name"
|
|
96
|
+
examples = [
|
|
97
|
+
FunctionExample(
|
|
98
|
+
sql="SELECT secret_field()",
|
|
99
|
+
description="Look up secret fields by name",
|
|
100
|
+
),
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def compute(
|
|
105
|
+
cls,
|
|
106
|
+
vgi_example: Annotated[dict[str, pa.Scalar[Any]], Secret("vgi_example")],
|
|
107
|
+
_length: Annotated[int, OutputLength()],
|
|
108
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
109
|
+
"""Generate the result for each row."""
|
|
110
|
+
port = vgi_example.get("port")
|
|
111
|
+
name = vgi_example.get("secret_string")
|
|
112
|
+
port_s = "" if port is None else str(port.as_py())
|
|
113
|
+
name_s = "" if name is None else str(name.as_py())
|
|
114
|
+
result = f"port={port_s};name={name_s}"
|
|
115
|
+
return pa.array([result for _ in range(_length)], type=pa.string())
|
|
116
|
+
|
|
117
|
+
|
|
45
118
|
class ReturnSecretValueFunction(ScalarFunction):
|
|
46
119
|
"""Return the value of a secret.
|
|
47
120
|
|
|
@@ -45,6 +45,7 @@ from vgi._test_fixtures.table.filters import (
|
|
|
45
45
|
FilterEchoFunction,
|
|
46
46
|
FilterEchoPartitionedFunction,
|
|
47
47
|
FilterEchoTableScanFunction,
|
|
48
|
+
FilteredColumnsEchoFunction,
|
|
48
49
|
SpatialFilterExampleFunction,
|
|
49
50
|
ValuePruneFunction,
|
|
50
51
|
)
|
|
@@ -81,6 +82,7 @@ from vgi._test_fixtures.table.pairs import (
|
|
|
81
82
|
from vgi._test_fixtures.table.partition_columns import (
|
|
82
83
|
CountryPartitionedSalesFunction,
|
|
83
84
|
DisjointRangePartitionedFunction,
|
|
85
|
+
OverlappingRangePartitionedFunction,
|
|
84
86
|
PartitionedWithExplicitOverrideFunction,
|
|
85
87
|
RegionYearPartitionedFunction,
|
|
86
88
|
)
|
|
@@ -123,6 +125,7 @@ from vgi._test_fixtures.table.settings import (
|
|
|
123
125
|
StructSettingsFunction,
|
|
124
126
|
)
|
|
125
127
|
from vgi._test_fixtures.table.transaction_storage import TxCachedValueFunction
|
|
128
|
+
from vgi._test_fixtures.table.typed_probe import TypedProbeFunction
|
|
126
129
|
from vgi._test_fixtures.table.versioned import (
|
|
127
130
|
_CURRENT_VERSION,
|
|
128
131
|
_VERSIONED_CONSTRAINTS_CURRENT,
|
|
@@ -137,6 +140,7 @@ from vgi._test_fixtures.table.versioned import (
|
|
|
137
140
|
)
|
|
138
141
|
|
|
139
142
|
__all__ = [
|
|
143
|
+
"TypedProbeFunction",
|
|
140
144
|
"_CURRENT_VERSION",
|
|
141
145
|
"_VERSIONED_CONSTRAINTS_CURRENT",
|
|
142
146
|
"_VERSIONED_CONSTRAINTS_DATA",
|
|
@@ -161,6 +165,7 @@ __all__ = [
|
|
|
161
165
|
"FilterEchoFunction",
|
|
162
166
|
"FilterEchoPartitionedFunction",
|
|
163
167
|
"FilterEchoTableScanFunction",
|
|
168
|
+
"FilteredColumnsEchoFunction",
|
|
164
169
|
"GeneratorExceptionFunction",
|
|
165
170
|
"ValuePruneFunction",
|
|
166
171
|
"LateMaterializationFunction",
|
|
@@ -178,6 +183,7 @@ __all__ = [
|
|
|
178
183
|
"NestedSequenceFunction",
|
|
179
184
|
"NonMonotoneBatchIndexFunction",
|
|
180
185
|
"OrderEchoFunction",
|
|
186
|
+
"OverlappingRangePartitionedFunction",
|
|
181
187
|
"PartitionedBatchIndexFunction",
|
|
182
188
|
"PartitionedBatchIndexMarkedFunction",
|
|
183
189
|
"PartitionedFixedOrderFunction",
|