vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,300 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Geospatial scalar fixtures (geo_distance_*, geo_centroid_*)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Annotated, Any
8
+
9
+ import pyarrow as pa
10
+ import pyarrow.compute as pc
11
+
12
+ from vgi.arguments import Param, Returns
13
+ from vgi.metadata import FunctionExample
14
+ from vgi.scalar_function import ScalarFunction
15
+
16
+ _POINT_STRUCT_TYPE = pa.struct([("lat", pa.float64()), ("lon", pa.float64())])
17
+
18
+
19
+ def _euclidean_distance(
20
+ lat1: pa.Array[Any], lon1: pa.Array[Any], lat2: pa.Array[Any], lon2: pa.Array[Any]
21
+ ) -> pa.DoubleArray:
22
+ """Compute Euclidean distance: sqrt((lat2-lat1)^2 + (lon2-lon1)^2)."""
23
+ dlat = pc.subtract(lat2, lat1)
24
+ dlon = pc.subtract(lon2, lon1)
25
+ return pc.sqrt(pc.add(pc.multiply(dlat, dlat), pc.multiply(dlon, dlon))) # type: ignore[return-value]
26
+
27
+
28
+ def _compute_centroid(lat_arrays: list[pa.Array[Any]], lon_arrays: list[pa.Array[Any]]) -> pa.StructArray:
29
+ """Compute centroid (average lat, average lon) from parallel lat/lon arrays."""
30
+ n = len(lat_arrays)
31
+ lat_sum: pa.Array[Any] = lat_arrays[0]
32
+ lon_sum: pa.Array[Any] = lon_arrays[0]
33
+ for i in range(1, n):
34
+ lat_sum = pc.add(lat_sum, lat_arrays[i])
35
+ lon_sum = pc.add(lon_sum, lon_arrays[i])
36
+ divisor = pa.scalar(n, type=pa.float64())
37
+ avg_lat = pc.divide(lat_sum, divisor)
38
+ avg_lon = pc.divide(lon_sum, divisor)
39
+ return pa.StructArray.from_arrays([avg_lat, avg_lon], names=["lat", "lon"])
40
+
41
+
42
+ class GeoDistanceStructFunction(ScalarFunction):
43
+ """Euclidean distance between two struct points.
44
+
45
+ Each point is a struct with lat and lon fields.
46
+
47
+ Example:
48
+ SQL: SELECT geo_distance_struct(p1, p2) FROM points
49
+ Input: p1={lat: 0.0, lon: 0.0}, p2={lat: 3.0, lon: 4.0}
50
+ Output: result=5.0
51
+
52
+ """
53
+
54
+ class Meta:
55
+ """Function metadata."""
56
+
57
+ name = "geo_distance_struct"
58
+ description = "Euclidean distance between two struct points"
59
+ examples = [
60
+ FunctionExample(
61
+ sql="SELECT geo_distance_struct({lat: 0, lon: 0}, {lat: 3, lon: 4})",
62
+ description="Distance between origin and (3, 4)",
63
+ ),
64
+ ]
65
+
66
+ @classmethod
67
+ def compute(
68
+ cls,
69
+ p1: Annotated[
70
+ pa.StructArray,
71
+ Param(doc="First point {lat, lon}", arrow_type=_POINT_STRUCT_TYPE),
72
+ ],
73
+ p2: Annotated[
74
+ pa.StructArray,
75
+ Param(doc="Second point {lat, lon}", arrow_type=_POINT_STRUCT_TYPE),
76
+ ],
77
+ ) -> Annotated[pa.DoubleArray, Returns()]:
78
+ """Compute Euclidean distance between two points."""
79
+ return _euclidean_distance(p1.field("lat"), p1.field("lon"), p2.field("lat"), p2.field("lon"))
80
+
81
+
82
+ class GeoDistanceListFunction(ScalarFunction):
83
+ """Euclidean distance between two list points.
84
+
85
+ Each point is a list of two float64 values [lat, lon].
86
+
87
+ Example:
88
+ SQL: SELECT geo_distance_list(p1, p2) FROM points
89
+ Input: p1=[0.0, 0.0], p2=[3.0, 4.0]
90
+ Output: result=5.0
91
+
92
+ """
93
+
94
+ class Meta:
95
+ """Function metadata."""
96
+
97
+ name = "geo_distance_list"
98
+ description = "Euclidean distance between two list points"
99
+ examples = [
100
+ FunctionExample(
101
+ sql="SELECT geo_distance_list([0, 0], [3, 4])",
102
+ description="Distance between origin and (3, 4)",
103
+ ),
104
+ ]
105
+
106
+ @classmethod
107
+ def compute(
108
+ cls,
109
+ p1: Annotated[ # type: ignore[type-arg]
110
+ pa.ListArray,
111
+ Param(doc="First point [lat, lon]", arrow_type=pa.list_(pa.float64())),
112
+ ],
113
+ p2: Annotated[ # type: ignore[type-arg]
114
+ pa.ListArray,
115
+ Param(doc="Second point [lat, lon]", arrow_type=pa.list_(pa.float64())),
116
+ ],
117
+ ) -> Annotated[pa.DoubleArray, Returns()]:
118
+ """Compute Euclidean distance between two points."""
119
+ return _euclidean_distance(
120
+ pc.list_element(p1, 0),
121
+ pc.list_element(p1, 1),
122
+ pc.list_element(p2, 0),
123
+ pc.list_element(p2, 1),
124
+ )
125
+
126
+
127
+ class GeoDistanceFixedFunction(ScalarFunction):
128
+ """Euclidean distance between two fixed-size list points.
129
+
130
+ Each point is a fixed-size list of two float64 values [lat, lon].
131
+
132
+ Example:
133
+ SQL: SELECT geo_distance_fixed(p1, p2) FROM points
134
+ Input: p1=[0.0, 0.0], p2=[3.0, 4.0]
135
+ Output: result=5.0
136
+
137
+ """
138
+
139
+ class Meta:
140
+ """Function metadata."""
141
+
142
+ name = "geo_distance_fixed"
143
+ description = "Euclidean distance between two fixed-size list points"
144
+ examples = [
145
+ FunctionExample(
146
+ sql="SELECT geo_distance_fixed([0, 0], [3, 4])",
147
+ description="Distance between origin and (3, 4)",
148
+ ),
149
+ ]
150
+
151
+ @classmethod
152
+ def compute(
153
+ cls,
154
+ p1: Annotated[ # type: ignore[type-arg]
155
+ pa.FixedSizeListArray,
156
+ Param(doc="First point [lat, lon]", arrow_type=pa.list_(pa.float64(), 2)),
157
+ ],
158
+ p2: Annotated[ # type: ignore[type-arg]
159
+ pa.FixedSizeListArray,
160
+ Param(doc="Second point [lat, lon]", arrow_type=pa.list_(pa.float64(), 2)),
161
+ ],
162
+ ) -> Annotated[pa.DoubleArray, Returns()]:
163
+ """Compute Euclidean distance between two points."""
164
+ return _euclidean_distance(
165
+ pc.list_element(p1, 0),
166
+ pc.list_element(p1, 1),
167
+ pc.list_element(p2, 0),
168
+ pc.list_element(p2, 1),
169
+ )
170
+
171
+
172
+ class GeoCentroidStructFunction(ScalarFunction):
173
+ """Centroid of N struct points (varargs).
174
+
175
+ Computes the average lat and average lon across all input point columns.
176
+
177
+ Example:
178
+ SQL: SELECT geo_centroid_struct(p1, p2) FROM points
179
+ Input: p1={lat: 0.0, lon: 0.0}, p2={lat: 4.0, lon: 6.0}
180
+ Output: result={lat: 2.0, lon: 3.0}
181
+
182
+ """
183
+
184
+ class Meta:
185
+ """Function metadata."""
186
+
187
+ name = "geo_centroid_struct"
188
+ description = "Centroid of N struct points"
189
+ examples = [
190
+ FunctionExample(
191
+ sql="SELECT geo_centroid_struct(p1, p2) FROM points",
192
+ description="Compute centroid of two struct points",
193
+ ),
194
+ ]
195
+
196
+ @classmethod
197
+ def compute(
198
+ cls,
199
+ points: Annotated[
200
+ list[pa.StructArray],
201
+ Param(
202
+ doc="Point columns {lat, lon}",
203
+ arrow_type=_POINT_STRUCT_TYPE,
204
+ varargs=True,
205
+ ),
206
+ ],
207
+ ) -> Annotated[pa.StructArray, Returns(arrow_type=_POINT_STRUCT_TYPE)]:
208
+ """Compute centroid of all points."""
209
+ return _compute_centroid(
210
+ [p.field("lat") for p in points],
211
+ [p.field("lon") for p in points],
212
+ )
213
+
214
+
215
+ class GeoCentroidListFunction(ScalarFunction):
216
+ """Centroid of N list points (varargs).
217
+
218
+ Computes the average lat and average lon across all input point columns,
219
+ where each point is a list of [lat, lon].
220
+
221
+ Example:
222
+ SQL: SELECT geo_centroid_list(p1, p2) FROM points
223
+ Input: p1=[0.0, 0.0], p2=[4.0, 6.0]
224
+ Output: result={lat: 2.0, lon: 3.0}
225
+
226
+ """
227
+
228
+ class Meta:
229
+ """Function metadata."""
230
+
231
+ name = "geo_centroid_list"
232
+ description = "Centroid of N list points"
233
+ examples = [
234
+ FunctionExample(
235
+ sql="SELECT geo_centroid_list(p1, p2) FROM points",
236
+ description="Compute centroid of two list points",
237
+ ),
238
+ ]
239
+
240
+ @classmethod
241
+ def compute(
242
+ cls,
243
+ points: Annotated[ # type: ignore[type-arg]
244
+ list[pa.ListArray],
245
+ Param(
246
+ doc="Point columns [lat, lon]",
247
+ arrow_type=pa.list_(pa.float64()),
248
+ varargs=True,
249
+ ),
250
+ ],
251
+ ) -> Annotated[pa.StructArray, Returns(arrow_type=_POINT_STRUCT_TYPE)]:
252
+ """Compute centroid of all points."""
253
+ return _compute_centroid(
254
+ [pc.list_element(p, 0) for p in points],
255
+ [pc.list_element(p, 1) for p in points],
256
+ )
257
+
258
+
259
+ class GeoCentroidFixedFunction(ScalarFunction):
260
+ """Centroid of N fixed-size list points (varargs).
261
+
262
+ Computes the average lat and average lon across all input point columns,
263
+ where each point is a fixed-size list of [lat, lon].
264
+
265
+ Example:
266
+ SQL: SELECT geo_centroid_fixed(p1, p2) FROM points
267
+ Input: p1=[0.0, 0.0], p2=[4.0, 6.0]
268
+ Output: result={lat: 2.0, lon: 3.0}
269
+
270
+ """
271
+
272
+ class Meta:
273
+ """Function metadata."""
274
+
275
+ name = "geo_centroid_fixed"
276
+ description = "Centroid of N fixed-size list points"
277
+ examples = [
278
+ FunctionExample(
279
+ sql="SELECT geo_centroid_fixed(p1, p2) FROM points",
280
+ description="Compute centroid of two fixed-size list points",
281
+ ),
282
+ ]
283
+
284
+ @classmethod
285
+ def compute(
286
+ cls,
287
+ points: Annotated[ # type: ignore[type-arg]
288
+ list[pa.FixedSizeListArray],
289
+ Param(
290
+ doc="Point columns [lat, lon]",
291
+ arrow_type=pa.list_(pa.float64(), 2),
292
+ varargs=True,
293
+ ),
294
+ ],
295
+ ) -> Annotated[pa.StructArray, Returns(arrow_type=_POINT_STRUCT_TYPE)]:
296
+ """Compute centroid of all points."""
297
+ return _compute_centroid(
298
+ [pc.list_element(p, 0) for p in points],
299
+ [pc.list_element(p, 1) for p in points],
300
+ )
@@ -0,0 +1,107 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Null-handling and conditional-message scalar fixtures."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Annotated
8
+
9
+ import pyarrow as pa
10
+ import pyarrow.compute as pc
11
+
12
+ from vgi.arguments import ConstParam, Param, Returns
13
+ from vgi.metadata import FunctionExample, NullHandling
14
+ from vgi.scalar_function import ScalarFunction
15
+
16
+
17
+ class ConditionalMessageFunction(ScalarFunction):
18
+ """Returns a repeated message when condition is true, empty string otherwise.
19
+
20
+ This example demonstrates multiple ConstParam parameters:
21
+ - repeat_count (int): How many times to repeat the message
22
+ - message (string): The message to repeat
23
+ - condition (boolean column): Whether to apply the message
24
+
25
+ The constant parameters come first, followed by the column parameter.
26
+
27
+ Example:
28
+ SQL: SELECT conditional_message(3, 'Hi! ', is_active) FROM users
29
+ Input: is_active=[true, false, true]
30
+ Args: repeat_count=3, message='Hi! '
31
+ Output: result=['Hi! Hi! Hi! ', '', 'Hi! Hi! Hi! ']
32
+
33
+ """
34
+
35
+ class Meta:
36
+ """Function metadata."""
37
+
38
+ name = "conditional_message"
39
+ description = "Returns repeated message when condition is true"
40
+ examples = [
41
+ FunctionExample(
42
+ sql="SELECT conditional_message(3, 'Alert! ', flag) FROM items",
43
+ description="Show alert message for flagged items",
44
+ ),
45
+ FunctionExample(
46
+ sql="SELECT conditional_message(2, '⭐', is_featured) FROM products",
47
+ description="Add stars to featured products",
48
+ ),
49
+ ]
50
+
51
+ @classmethod
52
+ def compute(
53
+ cls,
54
+ repeat_count: Annotated[int, ConstParam("Number of times to repeat")],
55
+ message: Annotated[str, ConstParam("Message to repeat")],
56
+ condition: Annotated[pa.BooleanArray, Param(doc="Apply message condition")],
57
+ ) -> Annotated[pa.StringArray, Returns()]:
58
+ """Return repeated message when condition is true, empty string otherwise."""
59
+ repeated_message = message * repeat_count
60
+ result: pa.StringArray = pc.if_else(condition, repeated_message, "") # type: ignore[assignment]
61
+ return result
62
+
63
+
64
+ # Type for config struct: {label: string, version: int64}
65
+ _CONFIG_STRUCT_TYPE = pa.struct([("label", pa.string()), ("version", pa.int64())])
66
+
67
+
68
+ class NullHandlingFunction(ScalarFunction):
69
+ """Demonstrates special null handling in a scalar function.
70
+
71
+ This function returns the input value if it's not null, or -5000 if it is null.
72
+ It demonstrates how to use NullHandling.SPECIAL to receive null values
73
+ instead of having them automatically converted to null output.
74
+
75
+ This example uses type inference with pa.Int64Array and Meta.null_handling.
76
+
77
+ Example:
78
+ SQL: SELECT null_handling(value) FROM data
79
+ Input: value=[1, None, 3]
80
+ Output: result=[1, -5000, 3]
81
+
82
+ """
83
+
84
+ class Meta:
85
+ """Function metadata."""
86
+
87
+ name = "null_handling"
88
+ description = "Returns value or -5000 if null"
89
+ null_handling = NullHandling.SPECIAL
90
+ examples = [
91
+ FunctionExample(
92
+ sql="SELECT null_handling(value) FROM data",
93
+ description="Replace null values with -5000",
94
+ ),
95
+ ]
96
+
97
+ @classmethod
98
+ def compute(
99
+ cls,
100
+ value: Annotated[pa.Int64Array, Param(doc="Integer value to process")],
101
+ ) -> Annotated[pa.Int64Array, Returns()]:
102
+ """Return value if not null, otherwise -5000."""
103
+ # Use if_else: if value is null, return -5000, otherwise return the value
104
+ result: pa.Int64Array = pc.if_else( # type: ignore[assignment]
105
+ pc.is_null(value), pa.scalar(-5000, type=pa.int64()), value
106
+ )
107
+ return result
@@ -0,0 +1,171 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Random/seeded scalar fixtures (random_int, random_bytes, bernoulli, hash_seed)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Annotated
8
+
9
+ import pyarrow as pa
10
+
11
+ from vgi.arguments import ConstParam, OutputLength, Param, Returns
12
+ from vgi.metadata import FunctionExample, FunctionStability
13
+ from vgi.scalar_function import ScalarFunction
14
+
15
+
16
+ class RandomIntFunction(ScalarFunction):
17
+ """Generates random integers for each row (demonstrates VOLATILE stability).
18
+
19
+ This function demonstrates FunctionStability.VOLATILE - calling it twice
20
+ with the same input will produce different results. The database optimizer
21
+ cannot cache or reuse results from volatile functions.
22
+
23
+ This example uses type inference with pa.Int64Array and Meta.stability.
24
+
25
+ Other stability options:
26
+ - CONSISTENT: Same input always produces same output (deterministic)
27
+ - CONSISTENT_WITHIN_QUERY: Same within a query, may vary across queries
28
+
29
+ Example:
30
+ SQL: SELECT random_int(min_col, max_col) FROM data
31
+ Input: min_col=[1, 10, 100], max_col=[10, 100, 1000]
32
+ Output: result=[7, 55, 823] (random values per row, different each time)
33
+
34
+ """
35
+
36
+ class Meta:
37
+ """Function metadata."""
38
+
39
+ name = "random_int"
40
+ description = "Generate random integers (demonstrates VOLATILE stability)"
41
+ stability = FunctionStability.VOLATILE
42
+ examples = [
43
+ FunctionExample(
44
+ sql="SELECT random_int(min_col, max_col) FROM data",
45
+ description="Generate random integers between min and max values",
46
+ ),
47
+ ]
48
+
49
+ @classmethod
50
+ def compute(
51
+ cls,
52
+ min_val: Annotated[pa.Int64Array, Param(doc="Minimum value (inclusive)")],
53
+ max_val: Annotated[pa.Int64Array, Param(doc="Maximum value (inclusive)")],
54
+ ) -> Annotated[pa.Int64Array, Returns()]:
55
+ """Generate random integers for each row."""
56
+ import numpy as np
57
+
58
+ # Use np.random.default_rng().integers(..., endpoint=True) so we don't
59
+ # have to add 1 to max_val (which overflows int64 when max_val is
60
+ # INT64_MAX, wrapping to a negative value and triggering "high <= 0").
61
+ rng = np.random.default_rng()
62
+ result = rng.integers(min_val.to_numpy(), max_val.to_numpy(), endpoint=True)
63
+ return pa.array(result, type=pa.int64())
64
+
65
+
66
+ class BernoulliFunction(ScalarFunction):
67
+ """Generates random booleans for each row (demonstrates VOLATILE stability).
68
+
69
+ This function demonstrates how to generate output without any input parameters.
70
+ It will produce a random 0 or 1 for each row in the output.
71
+
72
+ Example:
73
+ SQL: SELECT bernoulli() FROM data
74
+
75
+ """
76
+
77
+ class Meta:
78
+ """Function metadata."""
79
+
80
+ name = "bernoulli"
81
+ description = "Generate random booleans (demonstrates VOLATILE stability)"
82
+ stability = FunctionStability.VOLATILE
83
+ examples = [
84
+ FunctionExample(
85
+ sql="SELECT bernoulli() FROM data",
86
+ description="Generate samples from the bernoulli distribution",
87
+ ),
88
+ ]
89
+
90
+ @classmethod
91
+ def compute(
92
+ cls,
93
+ _length: Annotated[int, OutputLength()],
94
+ ) -> Annotated[pa.BooleanArray, Returns()]:
95
+ """Generate random booleans for each row."""
96
+ import random
97
+
98
+ values = [bool(random.randint(0, 1)) for _ in range(_length)]
99
+ return pa.array(values, type=pa.bool_())
100
+
101
+
102
+ class HashSeedFunction(ScalarFunction):
103
+ """Generates deterministic integers from a constant seed.
104
+
105
+ Demonstrates the single-ConstParam pattern: one constant argument
106
+ folded at plan time, no column parameters.
107
+
108
+ Example:
109
+ SQL: SELECT hash_seed(42) FROM data
110
+ Input: (no column input)
111
+ Args: seed=42
112
+ Output: result=[42, 43, 44, ...] (seed + row_index)
113
+
114
+ """
115
+
116
+ class Meta:
117
+ """Function metadata."""
118
+
119
+ name = "hash_seed"
120
+ description = "Generate deterministic integers from a constant seed"
121
+ stability = FunctionStability.CONSISTENT
122
+ examples = [
123
+ FunctionExample(
124
+ sql="SELECT hash_seed(42) FROM data",
125
+ description="Generate deterministic integers seeded at 42",
126
+ ),
127
+ ]
128
+
129
+ @classmethod
130
+ def compute(
131
+ cls,
132
+ seed: Annotated[int, ConstParam("Seed value")],
133
+ _length: Annotated[int, OutputLength()],
134
+ ) -> Annotated[pa.Int64Array, Returns()]:
135
+ """Generate deterministic integers: seed + row_index for each row."""
136
+ return pa.array([seed + i for i in range(_length)], type=pa.int64())
137
+
138
+
139
+ class RandomBytesFunction(ScalarFunction):
140
+ """Generates deterministic pseudo-random binary blobs from a seed."""
141
+
142
+ class Meta:
143
+ """Function metadata."""
144
+
145
+ name = "random_bytes"
146
+ description = "Generate pseudo-random binary blobs from seed and length"
147
+ stability = FunctionStability.CONSISTENT
148
+ examples = [
149
+ FunctionExample(
150
+ sql="SELECT random_bytes(42, 16) FROM data",
151
+ description="Generate a deterministic 16-byte blob per input row",
152
+ ),
153
+ ]
154
+
155
+ @classmethod
156
+ def compute(
157
+ cls,
158
+ seed: Annotated[int, ConstParam("Seed for pseudo-random byte generation")],
159
+ byte_length: Annotated[int, ConstParam("Output blob length in bytes")],
160
+ _length: Annotated[int, OutputLength()],
161
+ ) -> Annotated[pa.BinaryArray, Returns()]:
162
+ """Generate pseudo-random binary blobs for each row."""
163
+ import random
164
+
165
+ if byte_length < 0:
166
+ raise ValueError("byte_length must be >= 0")
167
+ rng = random.Random(seed)
168
+ return pa.array(
169
+ [bytes(rng.getrandbits(8) for _ in range(byte_length)) for _ in range(_length)],
170
+ type=pa.binary(),
171
+ )
@@ -0,0 +1,102 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Setting/secret/auth-aware scalar fixtures (multiply_by_setting, return_secret_value, who_am_i)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from typing import Annotated, Any
9
+
10
+ import pyarrow as pa
11
+ import pyarrow.compute as pc
12
+
13
+ from vgi.arguments import Auth, OutputLength, Param, Returns, Secret, Setting
14
+ from vgi.auth import AuthContext
15
+ from vgi.metadata import FunctionExample
16
+ from vgi.scalar_function import ScalarFunction
17
+
18
+
19
+ class MultiplyBySettingFunction(ScalarFunction):
20
+ """Generates the input value multiplied by a setting."""
21
+
22
+ class Meta:
23
+ """Function metadata."""
24
+
25
+ name = "multiply_by_setting"
26
+ description = "Multiply the input value by a setting value"
27
+ examples = [
28
+ FunctionExample(
29
+ sql="SELECT multiply_by_setting(5)",
30
+ description="Multiply the input value by a setting's value",
31
+ ),
32
+ ]
33
+
34
+ @classmethod
35
+ def compute(
36
+ cls,
37
+ value: Annotated[pa.Int64Array, Param(doc="Integer value to multiply")],
38
+ multiplier: Annotated[pa.Scalar[Any] | None, Setting()],
39
+ ) -> Annotated[pa.Int64Array, Returns()]:
40
+ """Generate the result for each row."""
41
+ assert multiplier is not None
42
+ return pc.multiply(multiplier, value)
43
+
44
+
45
+ class ReturnSecretValueFunction(ScalarFunction):
46
+ """Return the value of a secret.
47
+
48
+ Example:
49
+ SQL: SELECT return_secret_value()
50
+
51
+ """
52
+
53
+ class Meta:
54
+ """Function metadata."""
55
+
56
+ name = "return_secret_value"
57
+ description = "Return a secret's value"
58
+ examples = [
59
+ FunctionExample(
60
+ sql="SELECT return_secret_value()",
61
+ description="Return a secret's value",
62
+ ),
63
+ ]
64
+
65
+ @classmethod
66
+ def compute(
67
+ cls,
68
+ vgi_example_secret: Annotated[dict[str, pa.Scalar[Any]], Secret("vgi_example")],
69
+ _length: Annotated[int, OutputLength()],
70
+ ) -> Annotated[pa.StringArray, Returns()]:
71
+ """Generate the result for each row."""
72
+ # Convert pa.Scalar values to Python for JSON serialization
73
+ secret_dict = {k: v.as_py() for k, v in vgi_example_secret.items()}
74
+ return pa.array(
75
+ [json.dumps(secret_dict) for _ in range(_length)],
76
+ type=pa.string(),
77
+ )
78
+
79
+
80
+ class WhoAmIFunction(ScalarFunction):
81
+ """Return the authenticated principal name.
82
+
83
+ Demonstrates the Auth annotation for accessing auth context in compute().
84
+ Over stdio transport (or when no auth is configured), returns "anonymous".
85
+
86
+ SQL: ``SELECT whoami(1)``
87
+ """
88
+
89
+ class Meta:
90
+ """Metadata for the whoami function."""
91
+
92
+ name = "whoami"
93
+
94
+ @classmethod
95
+ def compute(
96
+ cls,
97
+ x: Annotated[pa.Int64Array, Param(doc="dummy input")],
98
+ auth: Annotated[AuthContext, Auth()],
99
+ ) -> Annotated[pa.StringArray, Returns()]:
100
+ """Return the authenticated principal name."""
101
+ name = auth.principal or "anonymous"
102
+ return pa.array([name] * len(x))