vgi-python 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/_duckdb.py +3 -0
- vgi/_test_fixtures/aggregate/dynamic.py +7 -1
- vgi/_test_fixtures/scalar/__init__.py +4 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +73 -0
- vgi/_test_fixtures/table/__init__.py +4 -0
- vgi/_test_fixtures/table/filters.py +128 -0
- vgi/_test_fixtures/table/late_materialization.py +3 -0
- vgi/_test_fixtures/table/make_series.py +15 -0
- vgi/_test_fixtures/table/misc.py +4 -0
- vgi/_test_fixtures/table/pairs.py +12 -0
- vgi/_test_fixtures/table/sequence.py +28 -0
- vgi/_test_fixtures/table/settings.py +6 -0
- vgi/_test_fixtures/table/typed_probe.py +154 -0
- vgi/_test_fixtures/table_in_out.py +8 -26
- vgi/_test_fixtures/worker.py +9 -0
- vgi/aggregate_function.py +29 -11
- vgi/argument_spec.py +20 -20
- vgi/arguments.py +114 -153
- vgi/catalog/_descriptor_spec.py +246 -0
- vgi/catalog/attach_option.py +14 -171
- vgi/catalog/catalog_interface.py +390 -264
- vgi/catalog/descriptors.py +59 -26
- vgi/catalog/secret_type.py +1 -0
- vgi/catalog/setting.py +19 -214
- vgi/catalog/storage.py +8 -4
- vgi/client/catalog_mixin.py +37 -33
- vgi/client/cli_catalog.py +4 -16
- vgi/client/cli_schema.py +12 -73
- vgi/client/cli_table.py +30 -199
- vgi/client/cli_utils.py +74 -19
- vgi/client/cli_view.py +12 -74
- vgi/client/client.py +104 -79
- vgi/exceptions.py +4 -0
- vgi/function.py +9 -33
- vgi/function_storage.py +11 -46
- vgi/function_storage_azure_sql.py +6 -6
- vgi/function_storage_cf_do.py +23 -3
- vgi/http/worker_page.py +18 -8
- vgi/invocation.py +10 -10
- vgi/meta_worker.py +7 -7
- vgi/metadata.py +111 -46
- vgi/otel.py +3 -3
- vgi/protocol.py +504 -94
- vgi/scalar_function.py +93 -72
- vgi/schema_utils.py +2 -2
- vgi/secret_protocol.py +22 -3
- vgi/secret_service.py +9 -6
- vgi/serve.py +11 -11
- vgi/table_buffering_function.py +28 -22
- vgi/table_filter_pushdown.py +435 -61
- vgi/table_function.py +279 -82
- vgi/table_in_out_function.py +88 -28
- vgi/transactor/client.py +1 -1
- vgi/transactor/protocol.py +1 -1
- vgi/transactor/server.py +26 -64
- vgi/worker.py +125 -272
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/METADATA +89 -176
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/RECORD +61 -59
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/WHEEL +0 -0
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/entry_points.txt +0 -0
- {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/licenses/LICENSE +0 -0
vgi/_duckdb.py
CHANGED
|
@@ -190,7 +190,7 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
|
|
|
190
190
|
# when DuckDB batches many partitions into shared buffers.
|
|
191
191
|
|
|
192
192
|
@staticmethod
|
|
193
|
-
def _slice_to_frame(
|
|
193
|
+
def _slice_to_frame(
|
|
194
194
|
partition: WindowPartition,
|
|
195
195
|
subframes: list[tuple[int, int]],
|
|
196
196
|
data_start: int,
|
|
@@ -198,11 +198,17 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
|
|
|
198
198
|
"""Slice all partition columns to the frame rows.
|
|
199
199
|
|
|
200
200
|
Args:
|
|
201
|
+
partition: The window partition whose columns are sliced.
|
|
202
|
+
subframes: List of ``(begin, end)`` index tuples describing the
|
|
203
|
+
row ranges to include in the frame.
|
|
201
204
|
data_start: Index where data columns begin (header columns are
|
|
202
205
|
``[0 .. data_start)``). NULL-drop is applied on data columns
|
|
203
206
|
only — matches the filtering ``_do_update`` performs in the
|
|
204
207
|
non-window path.
|
|
205
208
|
|
|
209
|
+
Returns:
|
|
210
|
+
A table containing only the frame rows across all partition columns.
|
|
211
|
+
|
|
206
212
|
"""
|
|
207
213
|
num_cols = partition.inputs.num_columns
|
|
208
214
|
cols = [partition.inputs.column(i) for i in range(num_cols)]
|
|
@@ -58,6 +58,8 @@ from vgi._test_fixtures.scalar.random_demo import (
|
|
|
58
58
|
from vgi._test_fixtures.scalar.settings_secrets import (
|
|
59
59
|
MultiplyBySettingFunction,
|
|
60
60
|
ReturnSecretValueFunction,
|
|
61
|
+
ScaleBySettingFunction,
|
|
62
|
+
SecretFieldFunction,
|
|
61
63
|
WhoAmIFunction,
|
|
62
64
|
)
|
|
63
65
|
from vgi._test_fixtures.scalar.type_info import (
|
|
@@ -103,6 +105,8 @@ __all__ = [
|
|
|
103
105
|
"RandomBytesFunction",
|
|
104
106
|
"RandomIntFunction",
|
|
105
107
|
"ReturnSecretValueFunction",
|
|
108
|
+
"ScaleBySettingFunction",
|
|
109
|
+
"SecretFieldFunction",
|
|
106
110
|
"SmartFormatPrefixFunction",
|
|
107
111
|
"SmartFormatWidthFunction",
|
|
108
112
|
"SumValuesFunction",
|
|
@@ -42,6 +42,79 @@ class MultiplyBySettingFunction(ScalarFunction):
|
|
|
42
42
|
return pc.multiply(multiplier, value)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
+
class ScaleBySettingFunction(ScalarFunction):
|
|
46
|
+
"""Scale the input value by the float (DOUBLE) setting ``scale_factor``.
|
|
47
|
+
|
|
48
|
+
Companion to :class:`MultiplyBySettingFunction`, but reads a floating-point
|
|
49
|
+
setting rather than an integer one.
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
SQL: SELECT scale_by_setting(4.0)
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
class Meta:
|
|
57
|
+
"""Function metadata."""
|
|
58
|
+
|
|
59
|
+
name = "scale_by_setting"
|
|
60
|
+
description = "Scale the input value by the float setting `scale_factor`"
|
|
61
|
+
examples = [
|
|
62
|
+
FunctionExample(
|
|
63
|
+
sql="SELECT scale_by_setting(4.0)",
|
|
64
|
+
description="Scale the input value by the float setting's value",
|
|
65
|
+
),
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def compute(
|
|
70
|
+
cls,
|
|
71
|
+
value: Annotated[pa.DoubleArray, Param(doc="Value to scale")],
|
|
72
|
+
scale_factor: Annotated[pa.Scalar[Any] | None, Setting()],
|
|
73
|
+
) -> Annotated[pa.DoubleArray, Returns()]:
|
|
74
|
+
"""Generate the result for each row."""
|
|
75
|
+
factor = 1.0 if scale_factor is None or scale_factor.as_py() is None else scale_factor.as_py()
|
|
76
|
+
return pc.multiply(pa.scalar(factor, type=pa.float64()), value)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class SecretFieldFunction(ScalarFunction):
|
|
80
|
+
"""Look up individual secret fields by name.
|
|
81
|
+
|
|
82
|
+
``port`` is read by named lookup on the ``vgi_example`` secret and
|
|
83
|
+
``secret_string`` by field name; the result mirrors the wire behaviour of
|
|
84
|
+
the worker-side named/positional secret field accessors.
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
SQL: SELECT secret_field()
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
class Meta:
|
|
92
|
+
"""Function metadata."""
|
|
93
|
+
|
|
94
|
+
name = "secret_field"
|
|
95
|
+
description = "Look up secret fields by name"
|
|
96
|
+
examples = [
|
|
97
|
+
FunctionExample(
|
|
98
|
+
sql="SELECT secret_field()",
|
|
99
|
+
description="Look up secret fields by name",
|
|
100
|
+
),
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def compute(
|
|
105
|
+
cls,
|
|
106
|
+
vgi_example: Annotated[dict[str, pa.Scalar[Any]], Secret("vgi_example")],
|
|
107
|
+
_length: Annotated[int, OutputLength()],
|
|
108
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
109
|
+
"""Generate the result for each row."""
|
|
110
|
+
port = vgi_example.get("port")
|
|
111
|
+
name = vgi_example.get("secret_string")
|
|
112
|
+
port_s = "" if port is None else str(port.as_py())
|
|
113
|
+
name_s = "" if name is None else str(name.as_py())
|
|
114
|
+
result = f"port={port_s};name={name_s}"
|
|
115
|
+
return pa.array([result for _ in range(_length)], type=pa.string())
|
|
116
|
+
|
|
117
|
+
|
|
45
118
|
class ReturnSecretValueFunction(ScalarFunction):
|
|
46
119
|
"""Return the value of a secret.
|
|
47
120
|
|
|
@@ -45,6 +45,7 @@ from vgi._test_fixtures.table.filters import (
|
|
|
45
45
|
FilterEchoFunction,
|
|
46
46
|
FilterEchoPartitionedFunction,
|
|
47
47
|
FilterEchoTableScanFunction,
|
|
48
|
+
FilteredColumnsEchoFunction,
|
|
48
49
|
SpatialFilterExampleFunction,
|
|
49
50
|
ValuePruneFunction,
|
|
50
51
|
)
|
|
@@ -123,6 +124,7 @@ from vgi._test_fixtures.table.settings import (
|
|
|
123
124
|
StructSettingsFunction,
|
|
124
125
|
)
|
|
125
126
|
from vgi._test_fixtures.table.transaction_storage import TxCachedValueFunction
|
|
127
|
+
from vgi._test_fixtures.table.typed_probe import TypedProbeFunction
|
|
126
128
|
from vgi._test_fixtures.table.versioned import (
|
|
127
129
|
_CURRENT_VERSION,
|
|
128
130
|
_VERSIONED_CONSTRAINTS_CURRENT,
|
|
@@ -137,6 +139,7 @@ from vgi._test_fixtures.table.versioned import (
|
|
|
137
139
|
)
|
|
138
140
|
|
|
139
141
|
__all__ = [
|
|
142
|
+
"TypedProbeFunction",
|
|
140
143
|
"_CURRENT_VERSION",
|
|
141
144
|
"_VERSIONED_CONSTRAINTS_CURRENT",
|
|
142
145
|
"_VERSIONED_CONSTRAINTS_DATA",
|
|
@@ -161,6 +164,7 @@ __all__ = [
|
|
|
161
164
|
"FilterEchoFunction",
|
|
162
165
|
"FilterEchoPartitionedFunction",
|
|
163
166
|
"FilterEchoTableScanFunction",
|
|
167
|
+
"FilteredColumnsEchoFunction",
|
|
164
168
|
"GeneratorExceptionFunction",
|
|
165
169
|
"ValuePruneFunction",
|
|
166
170
|
"LateMaterializationFunction",
|
|
@@ -116,6 +116,9 @@ class FilterEchoFunction(TableFunctionGenerator[FilterEchoFunctionArgs, FilterEc
|
|
|
116
116
|
SELECT * FROM filter_echo(10) WHERE n >= 8
|
|
117
117
|
Returns: rows 8-9 with pushed_filters showing "n >= 8"
|
|
118
118
|
|
|
119
|
+
Attributes:
|
|
120
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
121
|
+
|
|
119
122
|
"""
|
|
120
123
|
|
|
121
124
|
class Meta:
|
|
@@ -287,6 +290,125 @@ class ValuePruneFunction(TableFunctionGenerator[_ValuePruneArgs, _ValuePruneStat
|
|
|
287
290
|
state.cursor += size
|
|
288
291
|
|
|
289
292
|
|
|
293
|
+
# ============================================================================
|
|
294
|
+
# FilteredColumnsEchoFunction — echoes the column-introspection accessors on the
|
|
295
|
+
# pushed-down filter set: filtered_columns(), has_filter_for_column(), and the
|
|
296
|
+
# typed (string-capable) get_column_values(). A query's WHERE clause is reflected
|
|
297
|
+
# back as diagnostic columns so each accessor is observable end-to-end.
|
|
298
|
+
# ============================================================================
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@dataclass(slots=True, frozen=True)
|
|
302
|
+
class _FilteredColumnsEchoArgs:
|
|
303
|
+
"""Arguments for FilteredColumnsEchoFunction."""
|
|
304
|
+
|
|
305
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
|
|
306
|
+
batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
@dataclass(kw_only=True)
|
|
310
|
+
class _FilteredColumnsEchoState(ArrowSerializableDataclass):
|
|
311
|
+
"""Resolved diagnostics (serialized so the HTTP rehydrate path preserves them)."""
|
|
312
|
+
|
|
313
|
+
count: int
|
|
314
|
+
filtered_cols: str
|
|
315
|
+
has_n: bool
|
|
316
|
+
has_tag: bool
|
|
317
|
+
tag_values: str
|
|
318
|
+
cursor: int = 0
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@init_single_worker
|
|
322
|
+
@bind_fixed_schema
|
|
323
|
+
@_cardinality_from_count
|
|
324
|
+
class FilteredColumnsEchoFunction(TableFunctionGenerator[_FilteredColumnsEchoArgs, _FilteredColumnsEchoState]):
|
|
325
|
+
"""Report the columns referenced by pushed-down filters and ``tag``'s values.
|
|
326
|
+
|
|
327
|
+
Surfaces which columns the pushed-down filters reference and the discrete
|
|
328
|
+
value set resolved for the string column ``tag``.
|
|
329
|
+
|
|
330
|
+
``filtered_cols`` is the sorted, comma-joined ``filtered_columns()`` set;
|
|
331
|
+
``has_n`` / ``has_tag`` are ``has_filter_for_column()``; ``tag_values`` is
|
|
332
|
+
the sorted, comma-joined ``get_column_values('tag')`` result (``"(none)"``
|
|
333
|
+
when the predicate is not an enumerable equality/IN on ``tag``).
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
class Meta:
|
|
337
|
+
"""Metadata for FilteredColumnsEchoFunction."""
|
|
338
|
+
|
|
339
|
+
name = "filtered_columns_echo"
|
|
340
|
+
description = "Echoes filtered_columns / has_filter_for_column / get_column_values_array"
|
|
341
|
+
categories = ["generator", "diagnostic"]
|
|
342
|
+
filter_pushdown = True
|
|
343
|
+
auto_apply_filters = True
|
|
344
|
+
projection_pushdown = True
|
|
345
|
+
|
|
346
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = schema(
|
|
347
|
+
{
|
|
348
|
+
"n": pa.int64(),
|
|
349
|
+
"tag": pa.utf8(),
|
|
350
|
+
"filtered_cols": pa.utf8(),
|
|
351
|
+
"has_n": pa.bool_(),
|
|
352
|
+
"has_tag": pa.bool_(),
|
|
353
|
+
"tag_values": pa.utf8(),
|
|
354
|
+
}
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
@classmethod
|
|
358
|
+
def initial_state(cls, params: ProcessParams[_FilteredColumnsEchoArgs]) -> _FilteredColumnsEchoState:
|
|
359
|
+
"""Resolve the filter-column diagnostics from the pushed-down filters."""
|
|
360
|
+
assert params.init_call is not None
|
|
361
|
+
pf = params.init_call.pushdown_filters
|
|
362
|
+
jk = params.init_call.join_keys
|
|
363
|
+
filters = cls.pushdown_filters(pf, join_keys=jk) if pf is not None else None
|
|
364
|
+
if filters is not None:
|
|
365
|
+
filtered_cols = ",".join(sorted(filters.filtered_columns))
|
|
366
|
+
has_n = filters.has_filter_for_column("n")
|
|
367
|
+
has_tag = filters.has_filter_for_column("tag")
|
|
368
|
+
tag_arr = filters.get_column_values("tag")
|
|
369
|
+
if tag_arr is not None:
|
|
370
|
+
tag_values = ",".join(sorted(str(v) for v in tag_arr.to_pylist() if v is not None))
|
|
371
|
+
else:
|
|
372
|
+
tag_values = "(none)"
|
|
373
|
+
else:
|
|
374
|
+
filtered_cols, has_n, has_tag, tag_values = "", False, False, "(none)"
|
|
375
|
+
return _FilteredColumnsEchoState(
|
|
376
|
+
count=params.args.count,
|
|
377
|
+
filtered_cols=filtered_cols,
|
|
378
|
+
has_n=has_n,
|
|
379
|
+
has_tag=has_tag,
|
|
380
|
+
tag_values=tag_values,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
@classmethod
|
|
384
|
+
def process(
|
|
385
|
+
cls,
|
|
386
|
+
params: ProcessParams[_FilteredColumnsEchoArgs],
|
|
387
|
+
state: _FilteredColumnsEchoState,
|
|
388
|
+
out: OutputCollector,
|
|
389
|
+
) -> None:
|
|
390
|
+
"""Emit the generated rows, each carrying the resolved diagnostics."""
|
|
391
|
+
if state.cursor >= state.count:
|
|
392
|
+
out.finish()
|
|
393
|
+
return
|
|
394
|
+
size = min(state.count - state.cursor, params.args.batch_size)
|
|
395
|
+
ns = list(range(state.cursor, state.cursor + size))
|
|
396
|
+
out.emit(
|
|
397
|
+
pa.RecordBatch.from_pydict(
|
|
398
|
+
{
|
|
399
|
+
"n": ns,
|
|
400
|
+
"tag": [f"t{i}" for i in ns],
|
|
401
|
+
"filtered_cols": [state.filtered_cols] * size,
|
|
402
|
+
"has_n": [state.has_n] * size,
|
|
403
|
+
"has_tag": [state.has_tag] * size,
|
|
404
|
+
"tag_values": [state.tag_values] * size,
|
|
405
|
+
},
|
|
406
|
+
schema=params.output_schema,
|
|
407
|
+
)
|
|
408
|
+
)
|
|
409
|
+
state.cursor += size
|
|
410
|
+
|
|
411
|
+
|
|
290
412
|
# ============================================================================
|
|
291
413
|
# DictFilterEchoFunction — output column declared as a *dictionary* Arrow type
|
|
292
414
|
# (dictionary<int8, utf8>) with no ENUM metadata. DuckDB maps such a column to
|
|
@@ -354,6 +476,9 @@ class DictFilterEchoFunction(TableFunctionGenerator[_DictFilterEchoArgs, _DictFi
|
|
|
354
476
|
SELECT * FROM dict_filter_echo(6) WHERE s = 'green'
|
|
355
477
|
Returns: rows 1 and 4.
|
|
356
478
|
|
|
479
|
+
Attributes:
|
|
480
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
481
|
+
|
|
357
482
|
"""
|
|
358
483
|
|
|
359
484
|
class Meta:
|
|
@@ -483,6 +608,9 @@ class SpatialFilterExampleFunction(TableFunctionGenerator[_SpatialFilterArgs, _S
|
|
|
483
608
|
SELECT * FROM spatial_filter_example(100) WHERE geom && ST_MakeEnvelope(0, 0, 0.5, 0.5)
|
|
484
609
|
Returns: points in the lower-left quadrant of the unit square.
|
|
485
610
|
|
|
611
|
+
Attributes:
|
|
612
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
613
|
+
|
|
486
614
|
"""
|
|
487
615
|
|
|
488
616
|
class Meta:
|
|
@@ -148,6 +148,9 @@ class LateMaterializationFunction(TableFunctionGenerator[LateMaterializationFunc
|
|
|
148
148
|
-------
|
|
149
149
|
SELECT row_id, payload FROM late_materialization(100000) ORDER BY ord LIMIT 10
|
|
150
150
|
|
|
151
|
+
Attributes:
|
|
152
|
+
FunctionArguments: The argument dataclass type bound to this function.
|
|
153
|
+
|
|
151
154
|
"""
|
|
152
155
|
|
|
153
156
|
FunctionArguments = LateMaterializationFunctionArgs
|
|
@@ -77,6 +77,9 @@ class MakeSeriesCountFunction(TableFunctionGenerator[MakeSeriesCountArgs, MakeSe
|
|
|
77
77
|
SELECT * FROM make_series(5)
|
|
78
78
|
Returns: 0, 1, 2, 3, 4
|
|
79
79
|
|
|
80
|
+
Attributes:
|
|
81
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
82
|
+
|
|
80
83
|
"""
|
|
81
84
|
|
|
82
85
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
|
|
@@ -113,6 +116,9 @@ class MakeSeriesRangeFunction(TableFunctionGenerator[MakeSeriesRangeArgs, MakeSe
|
|
|
113
116
|
SELECT * FROM make_series(3, 7)
|
|
114
117
|
Returns: 3, 4, 5, 6
|
|
115
118
|
|
|
119
|
+
Attributes:
|
|
120
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
121
|
+
|
|
116
122
|
"""
|
|
117
123
|
|
|
118
124
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
|
|
@@ -149,6 +155,9 @@ class MakeSeriesStepFunction(TableFunctionGenerator[MakeSeriesStepArgs, MakeSeri
|
|
|
149
155
|
SELECT * FROM make_series(0, 10, 3)
|
|
150
156
|
Returns: 0, 3, 6, 9
|
|
151
157
|
|
|
158
|
+
Attributes:
|
|
159
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
160
|
+
|
|
152
161
|
"""
|
|
153
162
|
|
|
154
163
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
|
|
@@ -195,6 +204,9 @@ class MakeSeriesCsvFunction(TableFunctionGenerator[MakeSeriesCsvArgs, MakeSeries
|
|
|
195
204
|
SELECT * FROM make_series('10,20,30')
|
|
196
205
|
Returns: 10, 20, 30
|
|
197
206
|
|
|
207
|
+
Attributes:
|
|
208
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
209
|
+
|
|
198
210
|
"""
|
|
199
211
|
|
|
200
212
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
|
|
@@ -243,6 +255,9 @@ class MakeSeriesFloatFunction(TableFunctionGenerator[MakeSeriesFloatArgs, MakeSe
|
|
|
243
255
|
SELECT * FROM make_series(0.5)
|
|
244
256
|
Returns: 0.0, 0.5, 1.0, ..., 4.5
|
|
245
257
|
|
|
258
|
+
Attributes:
|
|
259
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
260
|
+
|
|
246
261
|
"""
|
|
247
262
|
|
|
248
263
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_FLOAT_SCHEMA
|
vgi/_test_fixtures/table/misc.py
CHANGED
|
@@ -178,6 +178,10 @@ class ProjectedDataFunction(TableFunctionGenerator[ProjectedDataFunctionArgument
|
|
|
178
178
|
SELECT id, value FROM projected_data(10) -- Only computes id and value
|
|
179
179
|
Returns: 10 rows with id and value columns only
|
|
180
180
|
|
|
181
|
+
Attributes:
|
|
182
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
183
|
+
BATCH_SIZE: Number of rows emitted per output batch.
|
|
184
|
+
|
|
181
185
|
"""
|
|
182
186
|
|
|
183
187
|
class Meta:
|
|
@@ -83,6 +83,9 @@ class ConstantColumnsFunction(TableFunctionGenerator[ConstantColumnsFunctionArgu
|
|
|
83
83
|
Returns: [{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"},
|
|
84
84
|
{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}]
|
|
85
85
|
|
|
86
|
+
Attributes:
|
|
87
|
+
BATCH_SIZE: Number of rows emitted per output batch.
|
|
88
|
+
|
|
86
89
|
"""
|
|
87
90
|
|
|
88
91
|
class Meta:
|
|
@@ -188,6 +191,9 @@ class MakePairsIntFunction(TableFunctionGenerator[MakePairsIntArgs, MakePairsInt
|
|
|
188
191
|
SELECT * FROM make_pairs(1, 4)
|
|
189
192
|
Returns: (1,2), (2,4), (3,6)
|
|
190
193
|
|
|
194
|
+
Attributes:
|
|
195
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
196
|
+
|
|
191
197
|
"""
|
|
192
198
|
|
|
193
199
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_INT_SCHEMA
|
|
@@ -223,6 +229,9 @@ class MakePairsStrFunction(TableFunctionGenerator[MakePairsStrArgs, MakePairsStr
|
|
|
223
229
|
SELECT * FROM make_pairs('row_', '_end')
|
|
224
230
|
Returns: ('row_0','_end0'), ('row_1','_end1'), ...
|
|
225
231
|
|
|
232
|
+
Attributes:
|
|
233
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
234
|
+
|
|
226
235
|
"""
|
|
227
236
|
|
|
228
237
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_STR_SCHEMA
|
|
@@ -284,6 +293,9 @@ class MakePairsIntStrFunction(TableFunctionGenerator[MakePairsIntStrArgs, MakePa
|
|
|
284
293
|
SELECT * FROM make_pairs(10, 'item_')
|
|
285
294
|
Returns: (10, 'item_0'), (11, 'item_1'), ..., (14, 'item_4')
|
|
286
295
|
|
|
296
|
+
Attributes:
|
|
297
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
298
|
+
|
|
287
299
|
"""
|
|
288
300
|
|
|
289
301
|
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_MIXED_SCHEMA
|
|
@@ -69,6 +69,11 @@ class SequenceFunction(_BaseSequenceFunction):
|
|
|
69
69
|
SELECT * FROM sequence(1000, batch_size := 100)
|
|
70
70
|
Returns: integers 0-999 in batches of 100 rows each
|
|
71
71
|
|
|
72
|
+
Attributes:
|
|
73
|
+
FunctionArguments: The argument dataclass type bound to this function.
|
|
74
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
75
|
+
NUMPY_DTYPE: NumPy dtype used to build the output column(s).
|
|
76
|
+
|
|
72
77
|
"""
|
|
73
78
|
|
|
74
79
|
FunctionArguments = SequenceFunctionArgs
|
|
@@ -144,6 +149,10 @@ class NamedParamsEchoFunction(_BaseSequenceFunction):
|
|
|
144
149
|
SELECT * FROM named_params_echo(3, greeting := 'hi', multiplier := 10)
|
|
145
150
|
Returns: rows with id=0..2, greeting='hi', value=id*10, float_value=id*1.0, enabled=true
|
|
146
151
|
|
|
152
|
+
Attributes:
|
|
153
|
+
FunctionArguments: The argument dataclass type bound to this function.
|
|
154
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
155
|
+
|
|
147
156
|
"""
|
|
148
157
|
|
|
149
158
|
FunctionArguments = NamedParamsEchoFunctionArgs
|
|
@@ -242,6 +251,10 @@ class NestedSequenceFunction(_BaseSequenceFunction):
|
|
|
242
251
|
SELECT metadata.index FROM nested_sequence(10)
|
|
243
252
|
Test projection pushdown with struct field access
|
|
244
253
|
|
|
254
|
+
Attributes:
|
|
255
|
+
FunctionArguments: The argument dataclass type bound to this function.
|
|
256
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
257
|
+
|
|
245
258
|
"""
|
|
246
259
|
|
|
247
260
|
class Meta:
|
|
@@ -356,6 +369,12 @@ class DoubleSequenceFunction(_BaseSequenceFunction):
|
|
|
356
369
|
SELECT * FROM double_sequence(1000, batch_size := 100)
|
|
357
370
|
Returns: floats 0.0-999.0 in batches of 100 rows each
|
|
358
371
|
|
|
372
|
+
Attributes:
|
|
373
|
+
FunctionArguments: The argument dataclass type bound to this function.
|
|
374
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
375
|
+
NUMPY_DTYPE: NumPy dtype used to build the output column(s).
|
|
376
|
+
STATS_ARROW_TYPE: Arrow type used for the column statistics this function reports.
|
|
377
|
+
|
|
359
378
|
"""
|
|
360
379
|
|
|
361
380
|
FunctionArguments = DoubleSequenceFunctionArguments
|
|
@@ -441,6 +460,11 @@ class PartitionedSequenceFunction(
|
|
|
441
460
|
With count=5 and increment=10:
|
|
442
461
|
Combined output: [0, 10, 20, 30, 40]
|
|
443
462
|
|
|
463
|
+
Attributes:
|
|
464
|
+
MAX_PARTITIONS: Maximum number of partitions this function emits.
|
|
465
|
+
BATCH_SIZE: Number of rows emitted per output batch.
|
|
466
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
467
|
+
|
|
444
468
|
"""
|
|
445
469
|
|
|
446
470
|
class Meta:
|
|
@@ -556,6 +580,10 @@ class TenThousandFunction(TableFunctionGenerator[TenThousandFunctionArguments, T
|
|
|
556
580
|
SELECT * FROM ten_thousand()
|
|
557
581
|
Returns: [{"n": 0}, {"n": 1}, ..., {"n": 9999}]
|
|
558
582
|
|
|
583
|
+
Attributes:
|
|
584
|
+
BATCH_SIZE: Number of rows emitted per output batch.
|
|
585
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
586
|
+
|
|
559
587
|
"""
|
|
560
588
|
|
|
561
589
|
class Meta:
|
|
@@ -75,6 +75,9 @@ class SettingsAwareFunction(TableFunctionGenerator[SettingsAwareFunctionArgument
|
|
|
75
75
|
With settings={vgi_verbose_mode: true, greeting: "Hi", multiplier: 2}:
|
|
76
76
|
Returns: [{"id": 0, "greeting": "Hi", "value": 0.0, "details": "row_0"}, ...]
|
|
77
77
|
|
|
78
|
+
Attributes:
|
|
79
|
+
BATCH_SIZE: Number of rows emitted per output batch.
|
|
80
|
+
|
|
78
81
|
"""
|
|
79
82
|
|
|
80
83
|
class Meta:
|
|
@@ -207,6 +210,9 @@ class StructSettingsFunction(TableFunctionGenerator[StructSettingsFunctionArgume
|
|
|
207
210
|
With config={'start': 10, 'step': 5, 'label': 'item'} and count=3:
|
|
208
211
|
Returns: [{"n": 10, "label": "item_0"}, {"n": 15, "label": "item_1"}, {"n": 20, "label": "item_2"}]
|
|
209
212
|
|
|
213
|
+
Attributes:
|
|
214
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
215
|
+
|
|
210
216
|
"""
|
|
211
217
|
|
|
212
218
|
class Meta:
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""typed_probe — exercises typed const-argument binding and typed column emit.
|
|
4
|
+
|
|
5
|
+
Const args cover the less-common Arrow scalar types — TIMESTAMP, INTERVAL
|
|
6
|
+
(duration), BLOB and UBIGINT — each with a default so calling ``typed_probe(n)``
|
|
7
|
+
drives the default path and passing named args drives the scalar-extraction
|
|
8
|
+
path. The output echoes the bound values into uint64 / int64 / blob / double
|
|
9
|
+
columns. Values are echoed in normalized integer/byte form so this fixture and
|
|
10
|
+
its vgi-go counterpart produce byte-identical results for the shared test.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import datetime
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Annotated, ClassVar
|
|
18
|
+
|
|
19
|
+
import pyarrow as pa
|
|
20
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
21
|
+
from vgi_rpc.rpc import OutputCollector
|
|
22
|
+
|
|
23
|
+
from vgi.arguments import Arg
|
|
24
|
+
from vgi.schema_utils import schema
|
|
25
|
+
from vgi.table_function import (
|
|
26
|
+
ProcessParams,
|
|
27
|
+
TableFunctionGenerator,
|
|
28
|
+
bind_fixed_schema,
|
|
29
|
+
init_single_worker,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.UTC)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _iv_to_ms(iv: object) -> int:
|
|
36
|
+
"""Collapse a duration/interval const to whole milliseconds.
|
|
37
|
+
|
|
38
|
+
A declared default arrives as a ``datetime.timedelta``; a SQL ``INTERVAL``
|
|
39
|
+
literal arrives as a pyarrow ``MonthDayNano`` (DuckDB intervals are
|
|
40
|
+
month-day-nano). Mirror vgi-go's GetScalarDuration collapse — months→30d,
|
|
41
|
+
days→24h — so both implementations agree.
|
|
42
|
+
"""
|
|
43
|
+
if isinstance(iv, datetime.timedelta):
|
|
44
|
+
return iv // datetime.timedelta(milliseconds=1)
|
|
45
|
+
months = getattr(iv, "months", 0)
|
|
46
|
+
days = getattr(iv, "days", 0)
|
|
47
|
+
nanos = getattr(iv, "nanoseconds", 0)
|
|
48
|
+
return months * 30 * 24 * 3600 * 1000 + days * 24 * 3600 * 1000 + nanos // 1_000_000
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
TYPED_PROBE_SCHEMA = schema(
|
|
52
|
+
idx=pa.uint64(),
|
|
53
|
+
ts_us=pa.int64(),
|
|
54
|
+
iv_ms=pa.int64(),
|
|
55
|
+
payload=pa.binary(),
|
|
56
|
+
ub=pa.uint64(),
|
|
57
|
+
f=pa.float64(),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(kw_only=True)
|
|
62
|
+
class TypedProbeArgs:
|
|
63
|
+
"""Arguments for TypedProbeFunction — one named const per scalar type."""
|
|
64
|
+
|
|
65
|
+
n: Annotated[int, Arg(0, doc="Number of rows to emit", ge=0)]
|
|
66
|
+
ts: Annotated[
|
|
67
|
+
datetime.datetime,
|
|
68
|
+
Arg(
|
|
69
|
+
"ts",
|
|
70
|
+
default=datetime.datetime(2026, 1, 2, 3, 4, 5, tzinfo=datetime.UTC),
|
|
71
|
+
arrow_type=pa.timestamp("us", tz="UTC"),
|
|
72
|
+
doc="Timestamp const (TIMESTAMPTZ)",
|
|
73
|
+
),
|
|
74
|
+
]
|
|
75
|
+
iv: Annotated[
|
|
76
|
+
datetime.timedelta,
|
|
77
|
+
Arg(
|
|
78
|
+
"iv",
|
|
79
|
+
default=datetime.timedelta(milliseconds=1500),
|
|
80
|
+
arrow_type=pa.duration("ns"),
|
|
81
|
+
doc="Interval const (INTERVAL)",
|
|
82
|
+
),
|
|
83
|
+
]
|
|
84
|
+
blob: Annotated[
|
|
85
|
+
bytes,
|
|
86
|
+
Arg("blob", default=b"vgi", arrow_type=pa.binary(), doc="Blob const (BLOB)"),
|
|
87
|
+
]
|
|
88
|
+
ub: Annotated[
|
|
89
|
+
int,
|
|
90
|
+
Arg("ub", default=9, arrow_type=pa.uint64(), doc="Unsigned const (UBIGINT)"),
|
|
91
|
+
]
|
|
92
|
+
f: Annotated[float, Arg("f", default=2.5, doc="Float const (DOUBLE)")]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass(kw_only=True)
|
|
96
|
+
class TypedProbeState(ArrowSerializableDataclass):
|
|
97
|
+
"""Mutable state — the resolved const values plus emit cursor."""
|
|
98
|
+
|
|
99
|
+
n: int
|
|
100
|
+
ts_us: int
|
|
101
|
+
iv_ms: int
|
|
102
|
+
payload: bytes
|
|
103
|
+
ub: int
|
|
104
|
+
f: float
|
|
105
|
+
offset: int = 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@init_single_worker
|
|
109
|
+
@bind_fixed_schema
|
|
110
|
+
class TypedProbeFunction(TableFunctionGenerator[TypedProbeArgs, TypedProbeState]):
|
|
111
|
+
"""Echo typed const args (timestamp/interval/blob/ubigint) into typed columns."""
|
|
112
|
+
|
|
113
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = TYPED_PROBE_SCHEMA
|
|
114
|
+
|
|
115
|
+
class Meta:
|
|
116
|
+
"""Function metadata."""
|
|
117
|
+
|
|
118
|
+
name = "typed_probe"
|
|
119
|
+
description = "Echoes typed const args (timestamp/interval/blob/ubigint) into typed columns"
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def initial_state(cls, params: ProcessParams[TypedProbeArgs]) -> TypedProbeState:
|
|
123
|
+
"""Resolve const args into normalized integer/byte form."""
|
|
124
|
+
a = params.args
|
|
125
|
+
return TypedProbeState(
|
|
126
|
+
n=a.n,
|
|
127
|
+
ts_us=(a.ts - _EPOCH) // datetime.timedelta(microseconds=1),
|
|
128
|
+
iv_ms=_iv_to_ms(a.iv),
|
|
129
|
+
payload=a.blob,
|
|
130
|
+
ub=a.ub,
|
|
131
|
+
f=a.f,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def process(cls, params: ProcessParams[TypedProbeArgs], state: TypedProbeState, out: OutputCollector) -> None:
|
|
136
|
+
"""Emit all rows in a single batch."""
|
|
137
|
+
if state.offset >= state.n:
|
|
138
|
+
out.finish()
|
|
139
|
+
return
|
|
140
|
+
rows = list(range(state.offset, state.n))
|
|
141
|
+
state.offset = state.n
|
|
142
|
+
out.emit(
|
|
143
|
+
pa.RecordBatch.from_pydict(
|
|
144
|
+
{
|
|
145
|
+
"idx": pa.array(rows, type=pa.uint64()),
|
|
146
|
+
"ts_us": pa.array([state.ts_us] * len(rows), type=pa.int64()),
|
|
147
|
+
"iv_ms": pa.array([state.iv_ms] * len(rows), type=pa.int64()),
|
|
148
|
+
"payload": pa.array([state.payload] * len(rows), type=pa.binary()),
|
|
149
|
+
"ub": pa.array([state.ub] * len(rows), type=pa.uint64()),
|
|
150
|
+
"f": pa.array([state.f + i for i in rows], type=pa.float64()),
|
|
151
|
+
},
|
|
152
|
+
schema=TYPED_PROBE_SCHEMA,
|
|
153
|
+
)
|
|
154
|
+
)
|