vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Geospatial scalar fixtures (geo_distance_*, geo_centroid_*)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Annotated, Any
|
|
8
|
+
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
import pyarrow.compute as pc
|
|
11
|
+
|
|
12
|
+
from vgi.arguments import Param, Returns
|
|
13
|
+
from vgi.metadata import FunctionExample
|
|
14
|
+
from vgi.scalar_function import ScalarFunction
|
|
15
|
+
|
|
16
|
+
_POINT_STRUCT_TYPE = pa.struct([("lat", pa.float64()), ("lon", pa.float64())])
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _euclidean_distance(
|
|
20
|
+
lat1: pa.Array[Any], lon1: pa.Array[Any], lat2: pa.Array[Any], lon2: pa.Array[Any]
|
|
21
|
+
) -> pa.DoubleArray:
|
|
22
|
+
"""Compute Euclidean distance: sqrt((lat2-lat1)^2 + (lon2-lon1)^2)."""
|
|
23
|
+
dlat = pc.subtract(lat2, lat1)
|
|
24
|
+
dlon = pc.subtract(lon2, lon1)
|
|
25
|
+
return pc.sqrt(pc.add(pc.multiply(dlat, dlat), pc.multiply(dlon, dlon))) # type: ignore[return-value]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _compute_centroid(lat_arrays: list[pa.Array[Any]], lon_arrays: list[pa.Array[Any]]) -> pa.StructArray:
|
|
29
|
+
"""Compute centroid (average lat, average lon) from parallel lat/lon arrays."""
|
|
30
|
+
n = len(lat_arrays)
|
|
31
|
+
lat_sum: pa.Array[Any] = lat_arrays[0]
|
|
32
|
+
lon_sum: pa.Array[Any] = lon_arrays[0]
|
|
33
|
+
for i in range(1, n):
|
|
34
|
+
lat_sum = pc.add(lat_sum, lat_arrays[i])
|
|
35
|
+
lon_sum = pc.add(lon_sum, lon_arrays[i])
|
|
36
|
+
divisor = pa.scalar(n, type=pa.float64())
|
|
37
|
+
avg_lat = pc.divide(lat_sum, divisor)
|
|
38
|
+
avg_lon = pc.divide(lon_sum, divisor)
|
|
39
|
+
return pa.StructArray.from_arrays([avg_lat, avg_lon], names=["lat", "lon"])
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class GeoDistanceStructFunction(ScalarFunction):
|
|
43
|
+
"""Euclidean distance between two struct points.
|
|
44
|
+
|
|
45
|
+
Each point is a struct with lat and lon fields.
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
SQL: SELECT geo_distance_struct(p1, p2) FROM points
|
|
49
|
+
Input: p1={lat: 0.0, lon: 0.0}, p2={lat: 3.0, lon: 4.0}
|
|
50
|
+
Output: result=5.0
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class Meta:
|
|
55
|
+
"""Function metadata."""
|
|
56
|
+
|
|
57
|
+
name = "geo_distance_struct"
|
|
58
|
+
description = "Euclidean distance between two struct points"
|
|
59
|
+
examples = [
|
|
60
|
+
FunctionExample(
|
|
61
|
+
sql="SELECT geo_distance_struct({lat: 0, lon: 0}, {lat: 3, lon: 4})",
|
|
62
|
+
description="Distance between origin and (3, 4)",
|
|
63
|
+
),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def compute(
|
|
68
|
+
cls,
|
|
69
|
+
p1: Annotated[
|
|
70
|
+
pa.StructArray,
|
|
71
|
+
Param(doc="First point {lat, lon}", arrow_type=_POINT_STRUCT_TYPE),
|
|
72
|
+
],
|
|
73
|
+
p2: Annotated[
|
|
74
|
+
pa.StructArray,
|
|
75
|
+
Param(doc="Second point {lat, lon}", arrow_type=_POINT_STRUCT_TYPE),
|
|
76
|
+
],
|
|
77
|
+
) -> Annotated[pa.DoubleArray, Returns()]:
|
|
78
|
+
"""Compute Euclidean distance between two points."""
|
|
79
|
+
return _euclidean_distance(p1.field("lat"), p1.field("lon"), p2.field("lat"), p2.field("lon"))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class GeoDistanceListFunction(ScalarFunction):
|
|
83
|
+
"""Euclidean distance between two list points.
|
|
84
|
+
|
|
85
|
+
Each point is a list of two float64 values [lat, lon].
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
SQL: SELECT geo_distance_list(p1, p2) FROM points
|
|
89
|
+
Input: p1=[0.0, 0.0], p2=[3.0, 4.0]
|
|
90
|
+
Output: result=5.0
|
|
91
|
+
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
class Meta:
|
|
95
|
+
"""Function metadata."""
|
|
96
|
+
|
|
97
|
+
name = "geo_distance_list"
|
|
98
|
+
description = "Euclidean distance between two list points"
|
|
99
|
+
examples = [
|
|
100
|
+
FunctionExample(
|
|
101
|
+
sql="SELECT geo_distance_list([0, 0], [3, 4])",
|
|
102
|
+
description="Distance between origin and (3, 4)",
|
|
103
|
+
),
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def compute(
|
|
108
|
+
cls,
|
|
109
|
+
p1: Annotated[ # type: ignore[type-arg]
|
|
110
|
+
pa.ListArray,
|
|
111
|
+
Param(doc="First point [lat, lon]", arrow_type=pa.list_(pa.float64())),
|
|
112
|
+
],
|
|
113
|
+
p2: Annotated[ # type: ignore[type-arg]
|
|
114
|
+
pa.ListArray,
|
|
115
|
+
Param(doc="Second point [lat, lon]", arrow_type=pa.list_(pa.float64())),
|
|
116
|
+
],
|
|
117
|
+
) -> Annotated[pa.DoubleArray, Returns()]:
|
|
118
|
+
"""Compute Euclidean distance between two points."""
|
|
119
|
+
return _euclidean_distance(
|
|
120
|
+
pc.list_element(p1, 0),
|
|
121
|
+
pc.list_element(p1, 1),
|
|
122
|
+
pc.list_element(p2, 0),
|
|
123
|
+
pc.list_element(p2, 1),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class GeoDistanceFixedFunction(ScalarFunction):
|
|
128
|
+
"""Euclidean distance between two fixed-size list points.
|
|
129
|
+
|
|
130
|
+
Each point is a fixed-size list of two float64 values [lat, lon].
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
SQL: SELECT geo_distance_fixed(p1, p2) FROM points
|
|
134
|
+
Input: p1=[0.0, 0.0], p2=[3.0, 4.0]
|
|
135
|
+
Output: result=5.0
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
class Meta:
|
|
140
|
+
"""Function metadata."""
|
|
141
|
+
|
|
142
|
+
name = "geo_distance_fixed"
|
|
143
|
+
description = "Euclidean distance between two fixed-size list points"
|
|
144
|
+
examples = [
|
|
145
|
+
FunctionExample(
|
|
146
|
+
sql="SELECT geo_distance_fixed([0, 0], [3, 4])",
|
|
147
|
+
description="Distance between origin and (3, 4)",
|
|
148
|
+
),
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def compute(
|
|
153
|
+
cls,
|
|
154
|
+
p1: Annotated[ # type: ignore[type-arg]
|
|
155
|
+
pa.FixedSizeListArray,
|
|
156
|
+
Param(doc="First point [lat, lon]", arrow_type=pa.list_(pa.float64(), 2)),
|
|
157
|
+
],
|
|
158
|
+
p2: Annotated[ # type: ignore[type-arg]
|
|
159
|
+
pa.FixedSizeListArray,
|
|
160
|
+
Param(doc="Second point [lat, lon]", arrow_type=pa.list_(pa.float64(), 2)),
|
|
161
|
+
],
|
|
162
|
+
) -> Annotated[pa.DoubleArray, Returns()]:
|
|
163
|
+
"""Compute Euclidean distance between two points."""
|
|
164
|
+
return _euclidean_distance(
|
|
165
|
+
pc.list_element(p1, 0),
|
|
166
|
+
pc.list_element(p1, 1),
|
|
167
|
+
pc.list_element(p2, 0),
|
|
168
|
+
pc.list_element(p2, 1),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class GeoCentroidStructFunction(ScalarFunction):
|
|
173
|
+
"""Centroid of N struct points (varargs).
|
|
174
|
+
|
|
175
|
+
Computes the average lat and average lon across all input point columns.
|
|
176
|
+
|
|
177
|
+
Example:
|
|
178
|
+
SQL: SELECT geo_centroid_struct(p1, p2) FROM points
|
|
179
|
+
Input: p1={lat: 0.0, lon: 0.0}, p2={lat: 4.0, lon: 6.0}
|
|
180
|
+
Output: result={lat: 2.0, lon: 3.0}
|
|
181
|
+
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
class Meta:
|
|
185
|
+
"""Function metadata."""
|
|
186
|
+
|
|
187
|
+
name = "geo_centroid_struct"
|
|
188
|
+
description = "Centroid of N struct points"
|
|
189
|
+
examples = [
|
|
190
|
+
FunctionExample(
|
|
191
|
+
sql="SELECT geo_centroid_struct(p1, p2) FROM points",
|
|
192
|
+
description="Compute centroid of two struct points",
|
|
193
|
+
),
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def compute(
|
|
198
|
+
cls,
|
|
199
|
+
points: Annotated[
|
|
200
|
+
list[pa.StructArray],
|
|
201
|
+
Param(
|
|
202
|
+
doc="Point columns {lat, lon}",
|
|
203
|
+
arrow_type=_POINT_STRUCT_TYPE,
|
|
204
|
+
varargs=True,
|
|
205
|
+
),
|
|
206
|
+
],
|
|
207
|
+
) -> Annotated[pa.StructArray, Returns(arrow_type=_POINT_STRUCT_TYPE)]:
|
|
208
|
+
"""Compute centroid of all points."""
|
|
209
|
+
return _compute_centroid(
|
|
210
|
+
[p.field("lat") for p in points],
|
|
211
|
+
[p.field("lon") for p in points],
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class GeoCentroidListFunction(ScalarFunction):
|
|
216
|
+
"""Centroid of N list points (varargs).
|
|
217
|
+
|
|
218
|
+
Computes the average lat and average lon across all input point columns,
|
|
219
|
+
where each point is a list of [lat, lon].
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
SQL: SELECT geo_centroid_list(p1, p2) FROM points
|
|
223
|
+
Input: p1=[0.0, 0.0], p2=[4.0, 6.0]
|
|
224
|
+
Output: result={lat: 2.0, lon: 3.0}
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
class Meta:
|
|
229
|
+
"""Function metadata."""
|
|
230
|
+
|
|
231
|
+
name = "geo_centroid_list"
|
|
232
|
+
description = "Centroid of N list points"
|
|
233
|
+
examples = [
|
|
234
|
+
FunctionExample(
|
|
235
|
+
sql="SELECT geo_centroid_list(p1, p2) FROM points",
|
|
236
|
+
description="Compute centroid of two list points",
|
|
237
|
+
),
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def compute(
|
|
242
|
+
cls,
|
|
243
|
+
points: Annotated[ # type: ignore[type-arg]
|
|
244
|
+
list[pa.ListArray],
|
|
245
|
+
Param(
|
|
246
|
+
doc="Point columns [lat, lon]",
|
|
247
|
+
arrow_type=pa.list_(pa.float64()),
|
|
248
|
+
varargs=True,
|
|
249
|
+
),
|
|
250
|
+
],
|
|
251
|
+
) -> Annotated[pa.StructArray, Returns(arrow_type=_POINT_STRUCT_TYPE)]:
|
|
252
|
+
"""Compute centroid of all points."""
|
|
253
|
+
return _compute_centroid(
|
|
254
|
+
[pc.list_element(p, 0) for p in points],
|
|
255
|
+
[pc.list_element(p, 1) for p in points],
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class GeoCentroidFixedFunction(ScalarFunction):
|
|
260
|
+
"""Centroid of N fixed-size list points (varargs).
|
|
261
|
+
|
|
262
|
+
Computes the average lat and average lon across all input point columns,
|
|
263
|
+
where each point is a fixed-size list of [lat, lon].
|
|
264
|
+
|
|
265
|
+
Example:
|
|
266
|
+
SQL: SELECT geo_centroid_fixed(p1, p2) FROM points
|
|
267
|
+
Input: p1=[0.0, 0.0], p2=[4.0, 6.0]
|
|
268
|
+
Output: result={lat: 2.0, lon: 3.0}
|
|
269
|
+
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
class Meta:
|
|
273
|
+
"""Function metadata."""
|
|
274
|
+
|
|
275
|
+
name = "geo_centroid_fixed"
|
|
276
|
+
description = "Centroid of N fixed-size list points"
|
|
277
|
+
examples = [
|
|
278
|
+
FunctionExample(
|
|
279
|
+
sql="SELECT geo_centroid_fixed(p1, p2) FROM points",
|
|
280
|
+
description="Compute centroid of two fixed-size list points",
|
|
281
|
+
),
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
@classmethod
|
|
285
|
+
def compute(
|
|
286
|
+
cls,
|
|
287
|
+
points: Annotated[ # type: ignore[type-arg]
|
|
288
|
+
list[pa.FixedSizeListArray],
|
|
289
|
+
Param(
|
|
290
|
+
doc="Point columns [lat, lon]",
|
|
291
|
+
arrow_type=pa.list_(pa.float64(), 2),
|
|
292
|
+
varargs=True,
|
|
293
|
+
),
|
|
294
|
+
],
|
|
295
|
+
) -> Annotated[pa.StructArray, Returns(arrow_type=_POINT_STRUCT_TYPE)]:
|
|
296
|
+
"""Compute centroid of all points."""
|
|
297
|
+
return _compute_centroid(
|
|
298
|
+
[pc.list_element(p, 0) for p in points],
|
|
299
|
+
[pc.list_element(p, 1) for p in points],
|
|
300
|
+
)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Null-handling and conditional-message scalar fixtures."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
import pyarrow.compute as pc
|
|
11
|
+
|
|
12
|
+
from vgi.arguments import ConstParam, Param, Returns
|
|
13
|
+
from vgi.metadata import FunctionExample, NullHandling
|
|
14
|
+
from vgi.scalar_function import ScalarFunction
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConditionalMessageFunction(ScalarFunction):
|
|
18
|
+
"""Returns a repeated message when condition is true, empty string otherwise.
|
|
19
|
+
|
|
20
|
+
This example demonstrates multiple ConstParam parameters:
|
|
21
|
+
- repeat_count (int): How many times to repeat the message
|
|
22
|
+
- message (string): The message to repeat
|
|
23
|
+
- condition (boolean column): Whether to apply the message
|
|
24
|
+
|
|
25
|
+
The constant parameters come first, followed by the column parameter.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
SQL: SELECT conditional_message(3, 'Hi! ', is_active) FROM users
|
|
29
|
+
Input: is_active=[true, false, true]
|
|
30
|
+
Args: repeat_count=3, message='Hi! '
|
|
31
|
+
Output: result=['Hi! Hi! Hi! ', '', 'Hi! Hi! Hi! ']
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
class Meta:
|
|
36
|
+
"""Function metadata."""
|
|
37
|
+
|
|
38
|
+
name = "conditional_message"
|
|
39
|
+
description = "Returns repeated message when condition is true"
|
|
40
|
+
examples = [
|
|
41
|
+
FunctionExample(
|
|
42
|
+
sql="SELECT conditional_message(3, 'Alert! ', flag) FROM items",
|
|
43
|
+
description="Show alert message for flagged items",
|
|
44
|
+
),
|
|
45
|
+
FunctionExample(
|
|
46
|
+
sql="SELECT conditional_message(2, '⭐', is_featured) FROM products",
|
|
47
|
+
description="Add stars to featured products",
|
|
48
|
+
),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def compute(
|
|
53
|
+
cls,
|
|
54
|
+
repeat_count: Annotated[int, ConstParam("Number of times to repeat")],
|
|
55
|
+
message: Annotated[str, ConstParam("Message to repeat")],
|
|
56
|
+
condition: Annotated[pa.BooleanArray, Param(doc="Apply message condition")],
|
|
57
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
58
|
+
"""Return repeated message when condition is true, empty string otherwise."""
|
|
59
|
+
repeated_message = message * repeat_count
|
|
60
|
+
result: pa.StringArray = pc.if_else(condition, repeated_message, "") # type: ignore[assignment]
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# Type for config struct: {label: string, version: int64}
|
|
65
|
+
_CONFIG_STRUCT_TYPE = pa.struct([("label", pa.string()), ("version", pa.int64())])
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class NullHandlingFunction(ScalarFunction):
|
|
69
|
+
"""Demonstrates special null handling in a scalar function.
|
|
70
|
+
|
|
71
|
+
This function returns the input value if it's not null, or -5000 if it is null.
|
|
72
|
+
It demonstrates how to use NullHandling.SPECIAL to receive null values
|
|
73
|
+
instead of having them automatically converted to null output.
|
|
74
|
+
|
|
75
|
+
This example uses type inference with pa.Int64Array and Meta.null_handling.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
SQL: SELECT null_handling(value) FROM data
|
|
79
|
+
Input: value=[1, None, 3]
|
|
80
|
+
Output: result=[1, -5000, 3]
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
class Meta:
|
|
85
|
+
"""Function metadata."""
|
|
86
|
+
|
|
87
|
+
name = "null_handling"
|
|
88
|
+
description = "Returns value or -5000 if null"
|
|
89
|
+
null_handling = NullHandling.SPECIAL
|
|
90
|
+
examples = [
|
|
91
|
+
FunctionExample(
|
|
92
|
+
sql="SELECT null_handling(value) FROM data",
|
|
93
|
+
description="Replace null values with -5000",
|
|
94
|
+
),
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def compute(
|
|
99
|
+
cls,
|
|
100
|
+
value: Annotated[pa.Int64Array, Param(doc="Integer value to process")],
|
|
101
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
102
|
+
"""Return value if not null, otherwise -5000."""
|
|
103
|
+
# Use if_else: if value is null, return -5000, otherwise return the value
|
|
104
|
+
result: pa.Int64Array = pc.if_else( # type: ignore[assignment]
|
|
105
|
+
pc.is_null(value), pa.scalar(-5000, type=pa.int64()), value
|
|
106
|
+
)
|
|
107
|
+
return result
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Random/seeded scalar fixtures (random_int, random_bytes, bernoulli, hash_seed)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
|
|
11
|
+
from vgi.arguments import ConstParam, OutputLength, Param, Returns
|
|
12
|
+
from vgi.metadata import FunctionExample, FunctionStability
|
|
13
|
+
from vgi.scalar_function import ScalarFunction
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RandomIntFunction(ScalarFunction):
|
|
17
|
+
"""Generates random integers for each row (demonstrates VOLATILE stability).
|
|
18
|
+
|
|
19
|
+
This function demonstrates FunctionStability.VOLATILE - calling it twice
|
|
20
|
+
with the same input will produce different results. The database optimizer
|
|
21
|
+
cannot cache or reuse results from volatile functions.
|
|
22
|
+
|
|
23
|
+
This example uses type inference with pa.Int64Array and Meta.stability.
|
|
24
|
+
|
|
25
|
+
Other stability options:
|
|
26
|
+
- CONSISTENT: Same input always produces same output (deterministic)
|
|
27
|
+
- CONSISTENT_WITHIN_QUERY: Same within a query, may vary across queries
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
SQL: SELECT random_int(min_col, max_col) FROM data
|
|
31
|
+
Input: min_col=[1, 10, 100], max_col=[10, 100, 1000]
|
|
32
|
+
Output: result=[7, 55, 823] (random values per row, different each time)
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
class Meta:
|
|
37
|
+
"""Function metadata."""
|
|
38
|
+
|
|
39
|
+
name = "random_int"
|
|
40
|
+
description = "Generate random integers (demonstrates VOLATILE stability)"
|
|
41
|
+
stability = FunctionStability.VOLATILE
|
|
42
|
+
examples = [
|
|
43
|
+
FunctionExample(
|
|
44
|
+
sql="SELECT random_int(min_col, max_col) FROM data",
|
|
45
|
+
description="Generate random integers between min and max values",
|
|
46
|
+
),
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def compute(
|
|
51
|
+
cls,
|
|
52
|
+
min_val: Annotated[pa.Int64Array, Param(doc="Minimum value (inclusive)")],
|
|
53
|
+
max_val: Annotated[pa.Int64Array, Param(doc="Maximum value (inclusive)")],
|
|
54
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
55
|
+
"""Generate random integers for each row."""
|
|
56
|
+
import numpy as np
|
|
57
|
+
|
|
58
|
+
# Use np.random.default_rng().integers(..., endpoint=True) so we don't
|
|
59
|
+
# have to add 1 to max_val (which overflows int64 when max_val is
|
|
60
|
+
# INT64_MAX, wrapping to a negative value and triggering "high <= 0").
|
|
61
|
+
rng = np.random.default_rng()
|
|
62
|
+
result = rng.integers(min_val.to_numpy(), max_val.to_numpy(), endpoint=True)
|
|
63
|
+
return pa.array(result, type=pa.int64())
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class BernoulliFunction(ScalarFunction):
|
|
67
|
+
"""Generates random booleans for each row (demonstrates VOLATILE stability).
|
|
68
|
+
|
|
69
|
+
This function demonstrates how to generate output without any input parameters.
|
|
70
|
+
It will produce a random 0 or 1 for each row in the output.
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
SQL: SELECT bernoulli() FROM data
|
|
74
|
+
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
class Meta:
|
|
78
|
+
"""Function metadata."""
|
|
79
|
+
|
|
80
|
+
name = "bernoulli"
|
|
81
|
+
description = "Generate random booleans (demonstrates VOLATILE stability)"
|
|
82
|
+
stability = FunctionStability.VOLATILE
|
|
83
|
+
examples = [
|
|
84
|
+
FunctionExample(
|
|
85
|
+
sql="SELECT bernoulli() FROM data",
|
|
86
|
+
description="Generate samples from the bernoulli distribution",
|
|
87
|
+
),
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def compute(
|
|
92
|
+
cls,
|
|
93
|
+
_length: Annotated[int, OutputLength()],
|
|
94
|
+
) -> Annotated[pa.BooleanArray, Returns()]:
|
|
95
|
+
"""Generate random booleans for each row."""
|
|
96
|
+
import random
|
|
97
|
+
|
|
98
|
+
values = [bool(random.randint(0, 1)) for _ in range(_length)]
|
|
99
|
+
return pa.array(values, type=pa.bool_())
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class HashSeedFunction(ScalarFunction):
|
|
103
|
+
"""Generates deterministic integers from a constant seed.
|
|
104
|
+
|
|
105
|
+
Demonstrates the single-ConstParam pattern: one constant argument
|
|
106
|
+
folded at plan time, no column parameters.
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
SQL: SELECT hash_seed(42) FROM data
|
|
110
|
+
Input: (no column input)
|
|
111
|
+
Args: seed=42
|
|
112
|
+
Output: result=[42, 43, 44, ...] (seed + row_index)
|
|
113
|
+
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
class Meta:
|
|
117
|
+
"""Function metadata."""
|
|
118
|
+
|
|
119
|
+
name = "hash_seed"
|
|
120
|
+
description = "Generate deterministic integers from a constant seed"
|
|
121
|
+
stability = FunctionStability.CONSISTENT
|
|
122
|
+
examples = [
|
|
123
|
+
FunctionExample(
|
|
124
|
+
sql="SELECT hash_seed(42) FROM data",
|
|
125
|
+
description="Generate deterministic integers seeded at 42",
|
|
126
|
+
),
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def compute(
|
|
131
|
+
cls,
|
|
132
|
+
seed: Annotated[int, ConstParam("Seed value")],
|
|
133
|
+
_length: Annotated[int, OutputLength()],
|
|
134
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
135
|
+
"""Generate deterministic integers: seed + row_index for each row."""
|
|
136
|
+
return pa.array([seed + i for i in range(_length)], type=pa.int64())
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class RandomBytesFunction(ScalarFunction):
|
|
140
|
+
"""Generates deterministic pseudo-random binary blobs from a seed."""
|
|
141
|
+
|
|
142
|
+
class Meta:
|
|
143
|
+
"""Function metadata."""
|
|
144
|
+
|
|
145
|
+
name = "random_bytes"
|
|
146
|
+
description = "Generate pseudo-random binary blobs from seed and length"
|
|
147
|
+
stability = FunctionStability.CONSISTENT
|
|
148
|
+
examples = [
|
|
149
|
+
FunctionExample(
|
|
150
|
+
sql="SELECT random_bytes(42, 16) FROM data",
|
|
151
|
+
description="Generate a deterministic 16-byte blob per input row",
|
|
152
|
+
),
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def compute(
|
|
157
|
+
cls,
|
|
158
|
+
seed: Annotated[int, ConstParam("Seed for pseudo-random byte generation")],
|
|
159
|
+
byte_length: Annotated[int, ConstParam("Output blob length in bytes")],
|
|
160
|
+
_length: Annotated[int, OutputLength()],
|
|
161
|
+
) -> Annotated[pa.BinaryArray, Returns()]:
|
|
162
|
+
"""Generate pseudo-random binary blobs for each row."""
|
|
163
|
+
import random
|
|
164
|
+
|
|
165
|
+
if byte_length < 0:
|
|
166
|
+
raise ValueError("byte_length must be >= 0")
|
|
167
|
+
rng = random.Random(seed)
|
|
168
|
+
return pa.array(
|
|
169
|
+
[bytes(rng.getrandbits(8) for _ in range(byte_length)) for _ in range(_length)],
|
|
170
|
+
type=pa.binary(),
|
|
171
|
+
)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Setting/secret/auth-aware scalar fixtures (multiply_by_setting, return_secret_value, who_am_i)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Annotated, Any
|
|
9
|
+
|
|
10
|
+
import pyarrow as pa
|
|
11
|
+
import pyarrow.compute as pc
|
|
12
|
+
|
|
13
|
+
from vgi.arguments import Auth, OutputLength, Param, Returns, Secret, Setting
|
|
14
|
+
from vgi.auth import AuthContext
|
|
15
|
+
from vgi.metadata import FunctionExample
|
|
16
|
+
from vgi.scalar_function import ScalarFunction
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MultiplyBySettingFunction(ScalarFunction):
|
|
20
|
+
"""Generates the input value multiplied by a setting."""
|
|
21
|
+
|
|
22
|
+
class Meta:
|
|
23
|
+
"""Function metadata."""
|
|
24
|
+
|
|
25
|
+
name = "multiply_by_setting"
|
|
26
|
+
description = "Multiply the input value by a setting value"
|
|
27
|
+
examples = [
|
|
28
|
+
FunctionExample(
|
|
29
|
+
sql="SELECT multiply_by_setting(5)",
|
|
30
|
+
description="Multiply the input value by a setting's value",
|
|
31
|
+
),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def compute(
|
|
36
|
+
cls,
|
|
37
|
+
value: Annotated[pa.Int64Array, Param(doc="Integer value to multiply")],
|
|
38
|
+
multiplier: Annotated[pa.Scalar[Any] | None, Setting()],
|
|
39
|
+
) -> Annotated[pa.Int64Array, Returns()]:
|
|
40
|
+
"""Generate the result for each row."""
|
|
41
|
+
assert multiplier is not None
|
|
42
|
+
return pc.multiply(multiplier, value)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ReturnSecretValueFunction(ScalarFunction):
|
|
46
|
+
"""Return the value of a secret.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
SQL: SELECT return_secret_value()
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
class Meta:
|
|
54
|
+
"""Function metadata."""
|
|
55
|
+
|
|
56
|
+
name = "return_secret_value"
|
|
57
|
+
description = "Return a secret's value"
|
|
58
|
+
examples = [
|
|
59
|
+
FunctionExample(
|
|
60
|
+
sql="SELECT return_secret_value()",
|
|
61
|
+
description="Return a secret's value",
|
|
62
|
+
),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def compute(
|
|
67
|
+
cls,
|
|
68
|
+
vgi_example_secret: Annotated[dict[str, pa.Scalar[Any]], Secret("vgi_example")],
|
|
69
|
+
_length: Annotated[int, OutputLength()],
|
|
70
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
71
|
+
"""Generate the result for each row."""
|
|
72
|
+
# Convert pa.Scalar values to Python for JSON serialization
|
|
73
|
+
secret_dict = {k: v.as_py() for k, v in vgi_example_secret.items()}
|
|
74
|
+
return pa.array(
|
|
75
|
+
[json.dumps(secret_dict) for _ in range(_length)],
|
|
76
|
+
type=pa.string(),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WhoAmIFunction(ScalarFunction):
|
|
81
|
+
"""Return the authenticated principal name.
|
|
82
|
+
|
|
83
|
+
Demonstrates the Auth annotation for accessing auth context in compute().
|
|
84
|
+
Over stdio transport (or when no auth is configured), returns "anonymous".
|
|
85
|
+
|
|
86
|
+
SQL: ``SELECT whoami(1)``
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
class Meta:
|
|
90
|
+
"""Metadata for the whoami function."""
|
|
91
|
+
|
|
92
|
+
name = "whoami"
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def compute(
|
|
96
|
+
cls,
|
|
97
|
+
x: Annotated[pa.Int64Array, Param(doc="dummy input")],
|
|
98
|
+
auth: Annotated[AuthContext, Auth()],
|
|
99
|
+
) -> Annotated[pa.StringArray, Returns()]:
|
|
100
|
+
"""Return the authenticated principal name."""
|
|
101
|
+
name = auth.principal or "anonymous"
|
|
102
|
+
return pa.array([name] * len(x))
|