vgi-python 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. vgi/_duckdb.py +3 -0
  2. vgi/_test_fixtures/aggregate/dynamic.py +7 -1
  3. vgi/_test_fixtures/scalar/__init__.py +4 -0
  4. vgi/_test_fixtures/scalar/settings_secrets.py +73 -0
  5. vgi/_test_fixtures/table/__init__.py +4 -0
  6. vgi/_test_fixtures/table/filters.py +128 -0
  7. vgi/_test_fixtures/table/late_materialization.py +3 -0
  8. vgi/_test_fixtures/table/make_series.py +15 -0
  9. vgi/_test_fixtures/table/misc.py +4 -0
  10. vgi/_test_fixtures/table/pairs.py +12 -0
  11. vgi/_test_fixtures/table/sequence.py +28 -0
  12. vgi/_test_fixtures/table/settings.py +6 -0
  13. vgi/_test_fixtures/table/typed_probe.py +154 -0
  14. vgi/_test_fixtures/table_in_out.py +8 -26
  15. vgi/_test_fixtures/worker.py +9 -0
  16. vgi/aggregate_function.py +29 -11
  17. vgi/argument_spec.py +20 -20
  18. vgi/arguments.py +114 -153
  19. vgi/catalog/_descriptor_spec.py +246 -0
  20. vgi/catalog/attach_option.py +14 -171
  21. vgi/catalog/catalog_interface.py +390 -264
  22. vgi/catalog/descriptors.py +59 -26
  23. vgi/catalog/secret_type.py +1 -0
  24. vgi/catalog/setting.py +19 -214
  25. vgi/catalog/storage.py +8 -4
  26. vgi/client/catalog_mixin.py +37 -33
  27. vgi/client/cli_catalog.py +4 -16
  28. vgi/client/cli_schema.py +12 -73
  29. vgi/client/cli_table.py +30 -199
  30. vgi/client/cli_utils.py +74 -19
  31. vgi/client/cli_view.py +12 -74
  32. vgi/client/client.py +104 -79
  33. vgi/exceptions.py +4 -0
  34. vgi/function.py +9 -33
  35. vgi/function_storage.py +11 -46
  36. vgi/function_storage_azure_sql.py +6 -6
  37. vgi/function_storage_cf_do.py +23 -3
  38. vgi/http/worker_page.py +18 -8
  39. vgi/invocation.py +10 -10
  40. vgi/meta_worker.py +7 -7
  41. vgi/metadata.py +111 -46
  42. vgi/otel.py +3 -3
  43. vgi/protocol.py +504 -94
  44. vgi/scalar_function.py +93 -72
  45. vgi/schema_utils.py +2 -2
  46. vgi/secret_protocol.py +22 -3
  47. vgi/secret_service.py +9 -6
  48. vgi/serve.py +11 -11
  49. vgi/table_buffering_function.py +28 -22
  50. vgi/table_filter_pushdown.py +435 -61
  51. vgi/table_function.py +279 -82
  52. vgi/table_in_out_function.py +88 -28
  53. vgi/transactor/client.py +1 -1
  54. vgi/transactor/protocol.py +1 -1
  55. vgi/transactor/server.py +26 -64
  56. vgi/worker.py +125 -272
  57. {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/METADATA +89 -176
  58. {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/RECORD +61 -59
  59. {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/WHEEL +0 -0
  60. {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/entry_points.txt +0 -0
  61. {vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/licenses/LICENSE +0 -0
vgi/_duckdb.py CHANGED
@@ -32,6 +32,9 @@ def engine_module() -> ModuleType:
32
32
 
33
33
  The result is cached for the life of the process.
34
34
 
35
+ Returns:
36
+ The resolved engine module (``haybarn`` if available, else ``duckdb``).
37
+
35
38
  Raises:
36
39
  ImportError: if neither engine is installed.
37
40
 
@@ -190,7 +190,7 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
190
190
  # when DuckDB batches many partitions into shared buffers.
191
191
 
192
192
  @staticmethod
193
- def _slice_to_frame( # noqa: D417
193
+ def _slice_to_frame(
194
194
  partition: WindowPartition,
195
195
  subframes: list[tuple[int, int]],
196
196
  data_start: int,
@@ -198,11 +198,17 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
198
198
  """Slice all partition columns to the frame rows.
199
199
 
200
200
  Args:
201
+ partition: The window partition whose columns are sliced.
202
+ subframes: List of ``(begin, end)`` index tuples describing the
203
+ row ranges to include in the frame.
201
204
  data_start: Index where data columns begin (header columns are
202
205
  ``[0 .. data_start)``). NULL-drop is applied on data columns
203
206
  only — matches the filtering ``_do_update`` performs in the
204
207
  non-window path.
205
208
 
209
+ Returns:
210
+ A table containing only the frame rows across all partition columns.
211
+
206
212
  """
207
213
  num_cols = partition.inputs.num_columns
208
214
  cols = [partition.inputs.column(i) for i in range(num_cols)]
@@ -58,6 +58,8 @@ from vgi._test_fixtures.scalar.random_demo import (
58
58
  from vgi._test_fixtures.scalar.settings_secrets import (
59
59
  MultiplyBySettingFunction,
60
60
  ReturnSecretValueFunction,
61
+ ScaleBySettingFunction,
62
+ SecretFieldFunction,
61
63
  WhoAmIFunction,
62
64
  )
63
65
  from vgi._test_fixtures.scalar.type_info import (
@@ -103,6 +105,8 @@ __all__ = [
103
105
  "RandomBytesFunction",
104
106
  "RandomIntFunction",
105
107
  "ReturnSecretValueFunction",
108
+ "ScaleBySettingFunction",
109
+ "SecretFieldFunction",
106
110
  "SmartFormatPrefixFunction",
107
111
  "SmartFormatWidthFunction",
108
112
  "SumValuesFunction",
@@ -42,6 +42,79 @@ class MultiplyBySettingFunction(ScalarFunction):
42
42
  return pc.multiply(multiplier, value)
43
43
 
44
44
 
45
+ class ScaleBySettingFunction(ScalarFunction):
46
+ """Scale the input value by the float (DOUBLE) setting ``scale_factor``.
47
+
48
+ Companion to :class:`MultiplyBySettingFunction`, but reads a floating-point
49
+ setting rather than an integer one.
50
+
51
+ Example:
52
+ SQL: SELECT scale_by_setting(4.0)
53
+
54
+ """
55
+
56
+ class Meta:
57
+ """Function metadata."""
58
+
59
+ name = "scale_by_setting"
60
+ description = "Scale the input value by the float setting `scale_factor`"
61
+ examples = [
62
+ FunctionExample(
63
+ sql="SELECT scale_by_setting(4.0)",
64
+ description="Scale the input value by the float setting's value",
65
+ ),
66
+ ]
67
+
68
+ @classmethod
69
+ def compute(
70
+ cls,
71
+ value: Annotated[pa.DoubleArray, Param(doc="Value to scale")],
72
+ scale_factor: Annotated[pa.Scalar[Any] | None, Setting()],
73
+ ) -> Annotated[pa.DoubleArray, Returns()]:
74
+ """Generate the result for each row."""
75
+ factor = 1.0 if scale_factor is None or scale_factor.as_py() is None else scale_factor.as_py()
76
+ return pc.multiply(pa.scalar(factor, type=pa.float64()), value)
77
+
78
+
79
+ class SecretFieldFunction(ScalarFunction):
80
+ """Look up individual secret fields by name.
81
+
82
+ ``port`` is read by named lookup on the ``vgi_example`` secret and
83
+ ``secret_string`` by field name; the result mirrors the wire behaviour of
84
+ the worker-side named/positional secret field accessors.
85
+
86
+ Example:
87
+ SQL: SELECT secret_field()
88
+
89
+ """
90
+
91
+ class Meta:
92
+ """Function metadata."""
93
+
94
+ name = "secret_field"
95
+ description = "Look up secret fields by name"
96
+ examples = [
97
+ FunctionExample(
98
+ sql="SELECT secret_field()",
99
+ description="Look up secret fields by name",
100
+ ),
101
+ ]
102
+
103
+ @classmethod
104
+ def compute(
105
+ cls,
106
+ vgi_example: Annotated[dict[str, pa.Scalar[Any]], Secret("vgi_example")],
107
+ _length: Annotated[int, OutputLength()],
108
+ ) -> Annotated[pa.StringArray, Returns()]:
109
+ """Generate the result for each row."""
110
+ port = vgi_example.get("port")
111
+ name = vgi_example.get("secret_string")
112
+ port_s = "" if port is None else str(port.as_py())
113
+ name_s = "" if name is None else str(name.as_py())
114
+ result = f"port={port_s};name={name_s}"
115
+ return pa.array([result for _ in range(_length)], type=pa.string())
116
+
117
+
45
118
  class ReturnSecretValueFunction(ScalarFunction):
46
119
  """Return the value of a secret.
47
120
 
@@ -45,6 +45,7 @@ from vgi._test_fixtures.table.filters import (
45
45
  FilterEchoFunction,
46
46
  FilterEchoPartitionedFunction,
47
47
  FilterEchoTableScanFunction,
48
+ FilteredColumnsEchoFunction,
48
49
  SpatialFilterExampleFunction,
49
50
  ValuePruneFunction,
50
51
  )
@@ -123,6 +124,7 @@ from vgi._test_fixtures.table.settings import (
123
124
  StructSettingsFunction,
124
125
  )
125
126
  from vgi._test_fixtures.table.transaction_storage import TxCachedValueFunction
127
+ from vgi._test_fixtures.table.typed_probe import TypedProbeFunction
126
128
  from vgi._test_fixtures.table.versioned import (
127
129
  _CURRENT_VERSION,
128
130
  _VERSIONED_CONSTRAINTS_CURRENT,
@@ -137,6 +139,7 @@ from vgi._test_fixtures.table.versioned import (
137
139
  )
138
140
 
139
141
  __all__ = [
142
+ "TypedProbeFunction",
140
143
  "_CURRENT_VERSION",
141
144
  "_VERSIONED_CONSTRAINTS_CURRENT",
142
145
  "_VERSIONED_CONSTRAINTS_DATA",
@@ -161,6 +164,7 @@ __all__ = [
161
164
  "FilterEchoFunction",
162
165
  "FilterEchoPartitionedFunction",
163
166
  "FilterEchoTableScanFunction",
167
+ "FilteredColumnsEchoFunction",
164
168
  "GeneratorExceptionFunction",
165
169
  "ValuePruneFunction",
166
170
  "LateMaterializationFunction",
@@ -116,6 +116,9 @@ class FilterEchoFunction(TableFunctionGenerator[FilterEchoFunctionArgs, FilterEc
116
116
  SELECT * FROM filter_echo(10) WHERE n >= 8
117
117
  Returns: rows 8-9 with pushed_filters showing "n >= 8"
118
118
 
119
+ Attributes:
120
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
121
+
119
122
  """
120
123
 
121
124
  class Meta:
@@ -287,6 +290,125 @@ class ValuePruneFunction(TableFunctionGenerator[_ValuePruneArgs, _ValuePruneStat
287
290
  state.cursor += size
288
291
 
289
292
 
293
+ # ============================================================================
294
+ # FilteredColumnsEchoFunction — echoes the column-introspection accessors on the
295
+ # pushed-down filter set: filtered_columns(), has_filter_for_column(), and the
296
+ # typed (string-capable) get_column_values(). A query's WHERE clause is reflected
297
+ # back as diagnostic columns so each accessor is observable end-to-end.
298
+ # ============================================================================
299
+
300
+
301
+ @dataclass(slots=True, frozen=True)
302
+ class _FilteredColumnsEchoArgs:
303
+ """Arguments for FilteredColumnsEchoFunction."""
304
+
305
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
306
+ batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
307
+
308
+
309
+ @dataclass(kw_only=True)
310
+ class _FilteredColumnsEchoState(ArrowSerializableDataclass):
311
+ """Resolved diagnostics (serialized so the HTTP rehydrate path preserves them)."""
312
+
313
+ count: int
314
+ filtered_cols: str
315
+ has_n: bool
316
+ has_tag: bool
317
+ tag_values: str
318
+ cursor: int = 0
319
+
320
+
321
+ @init_single_worker
322
+ @bind_fixed_schema
323
+ @_cardinality_from_count
324
+ class FilteredColumnsEchoFunction(TableFunctionGenerator[_FilteredColumnsEchoArgs, _FilteredColumnsEchoState]):
325
+ """Report the columns referenced by pushed-down filters and ``tag``'s values.
326
+
327
+ Surfaces which columns the pushed-down filters reference and the discrete
328
+ value set resolved for the string column ``tag``.
329
+
330
+ ``filtered_cols`` is the sorted, comma-joined ``filtered_columns()`` set;
331
+ ``has_n`` / ``has_tag`` are ``has_filter_for_column()``; ``tag_values`` is
332
+ the sorted, comma-joined ``get_column_values('tag')`` result (``"(none)"``
333
+ when the predicate is not an enumerable equality/IN on ``tag``).
334
+ """
335
+
336
+ class Meta:
337
+ """Metadata for FilteredColumnsEchoFunction."""
338
+
339
+ name = "filtered_columns_echo"
340
+ description = "Echoes filtered_columns / has_filter_for_column / get_column_values_array"
341
+ categories = ["generator", "diagnostic"]
342
+ filter_pushdown = True
343
+ auto_apply_filters = True
344
+ projection_pushdown = True
345
+
346
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(
347
+ {
348
+ "n": pa.int64(),
349
+ "tag": pa.utf8(),
350
+ "filtered_cols": pa.utf8(),
351
+ "has_n": pa.bool_(),
352
+ "has_tag": pa.bool_(),
353
+ "tag_values": pa.utf8(),
354
+ }
355
+ )
356
+
357
+ @classmethod
358
+ def initial_state(cls, params: ProcessParams[_FilteredColumnsEchoArgs]) -> _FilteredColumnsEchoState:
359
+ """Resolve the filter-column diagnostics from the pushed-down filters."""
360
+ assert params.init_call is not None
361
+ pf = params.init_call.pushdown_filters
362
+ jk = params.init_call.join_keys
363
+ filters = cls.pushdown_filters(pf, join_keys=jk) if pf is not None else None
364
+ if filters is not None:
365
+ filtered_cols = ",".join(sorted(filters.filtered_columns))
366
+ has_n = filters.has_filter_for_column("n")
367
+ has_tag = filters.has_filter_for_column("tag")
368
+ tag_arr = filters.get_column_values("tag")
369
+ if tag_arr is not None:
370
+ tag_values = ",".join(sorted(str(v) for v in tag_arr.to_pylist() if v is not None))
371
+ else:
372
+ tag_values = "(none)"
373
+ else:
374
+ filtered_cols, has_n, has_tag, tag_values = "", False, False, "(none)"
375
+ return _FilteredColumnsEchoState(
376
+ count=params.args.count,
377
+ filtered_cols=filtered_cols,
378
+ has_n=has_n,
379
+ has_tag=has_tag,
380
+ tag_values=tag_values,
381
+ )
382
+
383
+ @classmethod
384
+ def process(
385
+ cls,
386
+ params: ProcessParams[_FilteredColumnsEchoArgs],
387
+ state: _FilteredColumnsEchoState,
388
+ out: OutputCollector,
389
+ ) -> None:
390
+ """Emit the generated rows, each carrying the resolved diagnostics."""
391
+ if state.cursor >= state.count:
392
+ out.finish()
393
+ return
394
+ size = min(state.count - state.cursor, params.args.batch_size)
395
+ ns = list(range(state.cursor, state.cursor + size))
396
+ out.emit(
397
+ pa.RecordBatch.from_pydict(
398
+ {
399
+ "n": ns,
400
+ "tag": [f"t{i}" for i in ns],
401
+ "filtered_cols": [state.filtered_cols] * size,
402
+ "has_n": [state.has_n] * size,
403
+ "has_tag": [state.has_tag] * size,
404
+ "tag_values": [state.tag_values] * size,
405
+ },
406
+ schema=params.output_schema,
407
+ )
408
+ )
409
+ state.cursor += size
410
+
411
+
290
412
  # ============================================================================
291
413
  # DictFilterEchoFunction — output column declared as a *dictionary* Arrow type
292
414
  # (dictionary<int8, utf8>) with no ENUM metadata. DuckDB maps such a column to
@@ -354,6 +476,9 @@ class DictFilterEchoFunction(TableFunctionGenerator[_DictFilterEchoArgs, _DictFi
354
476
  SELECT * FROM dict_filter_echo(6) WHERE s = 'green'
355
477
  Returns: rows 1 and 4.
356
478
 
479
+ Attributes:
480
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
481
+
357
482
  """
358
483
 
359
484
  class Meta:
@@ -483,6 +608,9 @@ class SpatialFilterExampleFunction(TableFunctionGenerator[_SpatialFilterArgs, _S
483
608
  SELECT * FROM spatial_filter_example(100) WHERE geom && ST_MakeEnvelope(0, 0, 0.5, 0.5)
484
609
  Returns: points in the lower-left quadrant of the unit square.
485
610
 
611
+ Attributes:
612
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
613
+
486
614
  """
487
615
 
488
616
  class Meta:
@@ -148,6 +148,9 @@ class LateMaterializationFunction(TableFunctionGenerator[LateMaterializationFunc
148
148
  -------
149
149
  SELECT row_id, payload FROM late_materialization(100000) ORDER BY ord LIMIT 10
150
150
 
151
+ Attributes:
152
+ FunctionArguments: The argument dataclass type bound to this function.
153
+
151
154
  """
152
155
 
153
156
  FunctionArguments = LateMaterializationFunctionArgs
@@ -77,6 +77,9 @@ class MakeSeriesCountFunction(TableFunctionGenerator[MakeSeriesCountArgs, MakeSe
77
77
  SELECT * FROM make_series(5)
78
78
  Returns: 0, 1, 2, 3, 4
79
79
 
80
+ Attributes:
81
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
82
+
80
83
  """
81
84
 
82
85
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -113,6 +116,9 @@ class MakeSeriesRangeFunction(TableFunctionGenerator[MakeSeriesRangeArgs, MakeSe
113
116
  SELECT * FROM make_series(3, 7)
114
117
  Returns: 3, 4, 5, 6
115
118
 
119
+ Attributes:
120
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
121
+
116
122
  """
117
123
 
118
124
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -149,6 +155,9 @@ class MakeSeriesStepFunction(TableFunctionGenerator[MakeSeriesStepArgs, MakeSeri
149
155
  SELECT * FROM make_series(0, 10, 3)
150
156
  Returns: 0, 3, 6, 9
151
157
 
158
+ Attributes:
159
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
160
+
152
161
  """
153
162
 
154
163
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -195,6 +204,9 @@ class MakeSeriesCsvFunction(TableFunctionGenerator[MakeSeriesCsvArgs, MakeSeries
195
204
  SELECT * FROM make_series('10,20,30')
196
205
  Returns: 10, 20, 30
197
206
 
207
+ Attributes:
208
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
209
+
198
210
  """
199
211
 
200
212
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -243,6 +255,9 @@ class MakeSeriesFloatFunction(TableFunctionGenerator[MakeSeriesFloatArgs, MakeSe
243
255
  SELECT * FROM make_series(0.5)
244
256
  Returns: 0.0, 0.5, 1.0, ..., 4.5
245
257
 
258
+ Attributes:
259
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
260
+
246
261
  """
247
262
 
248
263
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_FLOAT_SCHEMA
@@ -178,6 +178,10 @@ class ProjectedDataFunction(TableFunctionGenerator[ProjectedDataFunctionArgument
178
178
  SELECT id, value FROM projected_data(10) -- Only computes id and value
179
179
  Returns: 10 rows with id and value columns only
180
180
 
181
+ Attributes:
182
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
183
+ BATCH_SIZE: Number of rows emitted per output batch.
184
+
181
185
  """
182
186
 
183
187
  class Meta:
@@ -83,6 +83,9 @@ class ConstantColumnsFunction(TableFunctionGenerator[ConstantColumnsFunctionArgu
83
83
  Returns: [{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"},
84
84
  {"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}]
85
85
 
86
+ Attributes:
87
+ BATCH_SIZE: Number of rows emitted per output batch.
88
+
86
89
  """
87
90
 
88
91
  class Meta:
@@ -188,6 +191,9 @@ class MakePairsIntFunction(TableFunctionGenerator[MakePairsIntArgs, MakePairsInt
188
191
  SELECT * FROM make_pairs(1, 4)
189
192
  Returns: (1,2), (2,4), (3,6)
190
193
 
194
+ Attributes:
195
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
196
+
191
197
  """
192
198
 
193
199
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_INT_SCHEMA
@@ -223,6 +229,9 @@ class MakePairsStrFunction(TableFunctionGenerator[MakePairsStrArgs, MakePairsStr
223
229
  SELECT * FROM make_pairs('row_', '_end')
224
230
  Returns: ('row_0','_end0'), ('row_1','_end1'), ...
225
231
 
232
+ Attributes:
233
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
234
+
226
235
  """
227
236
 
228
237
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_STR_SCHEMA
@@ -284,6 +293,9 @@ class MakePairsIntStrFunction(TableFunctionGenerator[MakePairsIntStrArgs, MakePa
284
293
  SELECT * FROM make_pairs(10, 'item_')
285
294
  Returns: (10, 'item_0'), (11, 'item_1'), ..., (14, 'item_4')
286
295
 
296
+ Attributes:
297
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
298
+
287
299
  """
288
300
 
289
301
  FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_MIXED_SCHEMA
@@ -69,6 +69,11 @@ class SequenceFunction(_BaseSequenceFunction):
69
69
  SELECT * FROM sequence(1000, batch_size := 100)
70
70
  Returns: integers 0-999 in batches of 100 rows each
71
71
 
72
+ Attributes:
73
+ FunctionArguments: The argument dataclass type bound to this function.
74
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
75
+ NUMPY_DTYPE: NumPy dtype used to build the output column(s).
76
+
72
77
  """
73
78
 
74
79
  FunctionArguments = SequenceFunctionArgs
@@ -144,6 +149,10 @@ class NamedParamsEchoFunction(_BaseSequenceFunction):
144
149
  SELECT * FROM named_params_echo(3, greeting := 'hi', multiplier := 10)
145
150
  Returns: rows with id=0..2, greeting='hi', value=id*10, float_value=id*1.0, enabled=true
146
151
 
152
+ Attributes:
153
+ FunctionArguments: The argument dataclass type bound to this function.
154
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
155
+
147
156
  """
148
157
 
149
158
  FunctionArguments = NamedParamsEchoFunctionArgs
@@ -242,6 +251,10 @@ class NestedSequenceFunction(_BaseSequenceFunction):
242
251
  SELECT metadata.index FROM nested_sequence(10)
243
252
  Test projection pushdown with struct field access
244
253
 
254
+ Attributes:
255
+ FunctionArguments: The argument dataclass type bound to this function.
256
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
257
+
245
258
  """
246
259
 
247
260
  class Meta:
@@ -356,6 +369,12 @@ class DoubleSequenceFunction(_BaseSequenceFunction):
356
369
  SELECT * FROM double_sequence(1000, batch_size := 100)
357
370
  Returns: floats 0.0-999.0 in batches of 100 rows each
358
371
 
372
+ Attributes:
373
+ FunctionArguments: The argument dataclass type bound to this function.
374
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
375
+ NUMPY_DTYPE: NumPy dtype used to build the output column(s).
376
+ STATS_ARROW_TYPE: Arrow type used for the column statistics this function reports.
377
+
359
378
  """
360
379
 
361
380
  FunctionArguments = DoubleSequenceFunctionArguments
@@ -441,6 +460,11 @@ class PartitionedSequenceFunction(
441
460
  With count=5 and increment=10:
442
461
  Combined output: [0, 10, 20, 30, 40]
443
462
 
463
+ Attributes:
464
+ MAX_PARTITIONS: Maximum number of partitions this function emits.
465
+ BATCH_SIZE: Number of rows emitted per output batch.
466
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
467
+
444
468
  """
445
469
 
446
470
  class Meta:
@@ -556,6 +580,10 @@ class TenThousandFunction(TableFunctionGenerator[TenThousandFunctionArguments, T
556
580
  SELECT * FROM ten_thousand()
557
581
  Returns: [{"n": 0}, {"n": 1}, ..., {"n": 9999}]
558
582
 
583
+ Attributes:
584
+ BATCH_SIZE: Number of rows emitted per output batch.
585
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
586
+
559
587
  """
560
588
 
561
589
  class Meta:
@@ -75,6 +75,9 @@ class SettingsAwareFunction(TableFunctionGenerator[SettingsAwareFunctionArgument
75
75
  With settings={vgi_verbose_mode: true, greeting: "Hi", multiplier: 2}:
76
76
  Returns: [{"id": 0, "greeting": "Hi", "value": 0.0, "details": "row_0"}, ...]
77
77
 
78
+ Attributes:
79
+ BATCH_SIZE: Number of rows emitted per output batch.
80
+
78
81
  """
79
82
 
80
83
  class Meta:
@@ -207,6 +210,9 @@ class StructSettingsFunction(TableFunctionGenerator[StructSettingsFunctionArgume
207
210
  With config={'start': 10, 'step': 5, 'label': 'item'} and count=3:
208
211
  Returns: [{"n": 10, "label": "item_0"}, {"n": 15, "label": "item_1"}, {"n": 20, "label": "item_2"}]
209
212
 
213
+ Attributes:
214
+ FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
215
+
210
216
  """
211
217
 
212
218
  class Meta:
@@ -0,0 +1,154 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """typed_probe — exercises typed const-argument binding and typed column emit.
4
+
5
+ Const args cover the less-common Arrow scalar types — TIMESTAMP, INTERVAL
6
+ (duration), BLOB and UBIGINT — each with a default so calling ``typed_probe(n)``
7
+ drives the default path and passing named args drives the scalar-extraction
8
+ path. The output echoes the bound values into uint64 / int64 / blob / double
9
+ columns. Values are echoed in normalized integer/byte form so this fixture and
10
+ its vgi-go counterpart produce byte-identical results for the shared test.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import datetime
16
+ from dataclasses import dataclass
17
+ from typing import Annotated, ClassVar
18
+
19
+ import pyarrow as pa
20
+ from vgi_rpc import ArrowSerializableDataclass
21
+ from vgi_rpc.rpc import OutputCollector
22
+
23
+ from vgi.arguments import Arg
24
+ from vgi.schema_utils import schema
25
+ from vgi.table_function import (
26
+ ProcessParams,
27
+ TableFunctionGenerator,
28
+ bind_fixed_schema,
29
+ init_single_worker,
30
+ )
31
+
32
+ _EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.UTC)
33
+
34
+
35
+ def _iv_to_ms(iv: object) -> int:
36
+ """Collapse a duration/interval const to whole milliseconds.
37
+
38
+ A declared default arrives as a ``datetime.timedelta``; a SQL ``INTERVAL``
39
+ literal arrives as a pyarrow ``MonthDayNano`` (DuckDB intervals are
40
+ month-day-nano). Mirror vgi-go's GetScalarDuration collapse — months→30d,
41
+ days→24h — so both implementations agree.
42
+ """
43
+ if isinstance(iv, datetime.timedelta):
44
+ return iv // datetime.timedelta(milliseconds=1)
45
+ months = getattr(iv, "months", 0)
46
+ days = getattr(iv, "days", 0)
47
+ nanos = getattr(iv, "nanoseconds", 0)
48
+ return months * 30 * 24 * 3600 * 1000 + days * 24 * 3600 * 1000 + nanos // 1_000_000
49
+
50
+
51
+ TYPED_PROBE_SCHEMA = schema(
52
+ idx=pa.uint64(),
53
+ ts_us=pa.int64(),
54
+ iv_ms=pa.int64(),
55
+ payload=pa.binary(),
56
+ ub=pa.uint64(),
57
+ f=pa.float64(),
58
+ )
59
+
60
+
61
+ @dataclass(kw_only=True)
62
+ class TypedProbeArgs:
63
+ """Arguments for TypedProbeFunction — one named const per scalar type."""
64
+
65
+ n: Annotated[int, Arg(0, doc="Number of rows to emit", ge=0)]
66
+ ts: Annotated[
67
+ datetime.datetime,
68
+ Arg(
69
+ "ts",
70
+ default=datetime.datetime(2026, 1, 2, 3, 4, 5, tzinfo=datetime.UTC),
71
+ arrow_type=pa.timestamp("us", tz="UTC"),
72
+ doc="Timestamp const (TIMESTAMPTZ)",
73
+ ),
74
+ ]
75
+ iv: Annotated[
76
+ datetime.timedelta,
77
+ Arg(
78
+ "iv",
79
+ default=datetime.timedelta(milliseconds=1500),
80
+ arrow_type=pa.duration("ns"),
81
+ doc="Interval const (INTERVAL)",
82
+ ),
83
+ ]
84
+ blob: Annotated[
85
+ bytes,
86
+ Arg("blob", default=b"vgi", arrow_type=pa.binary(), doc="Blob const (BLOB)"),
87
+ ]
88
+ ub: Annotated[
89
+ int,
90
+ Arg("ub", default=9, arrow_type=pa.uint64(), doc="Unsigned const (UBIGINT)"),
91
+ ]
92
+ f: Annotated[float, Arg("f", default=2.5, doc="Float const (DOUBLE)")]
93
+
94
+
95
+ @dataclass(kw_only=True)
96
+ class TypedProbeState(ArrowSerializableDataclass):
97
+ """Mutable state — the resolved const values plus emit cursor."""
98
+
99
+ n: int
100
+ ts_us: int
101
+ iv_ms: int
102
+ payload: bytes
103
+ ub: int
104
+ f: float
105
+ offset: int = 0
106
+
107
+
108
+ @init_single_worker
109
+ @bind_fixed_schema
110
+ class TypedProbeFunction(TableFunctionGenerator[TypedProbeArgs, TypedProbeState]):
111
+ """Echo typed const args (timestamp/interval/blob/ubigint) into typed columns."""
112
+
113
+ FIXED_SCHEMA: ClassVar[pa.Schema] = TYPED_PROBE_SCHEMA
114
+
115
+ class Meta:
116
+ """Function metadata."""
117
+
118
+ name = "typed_probe"
119
+ description = "Echoes typed const args (timestamp/interval/blob/ubigint) into typed columns"
120
+
121
+ @classmethod
122
+ def initial_state(cls, params: ProcessParams[TypedProbeArgs]) -> TypedProbeState:
123
+ """Resolve const args into normalized integer/byte form."""
124
+ a = params.args
125
+ return TypedProbeState(
126
+ n=a.n,
127
+ ts_us=(a.ts - _EPOCH) // datetime.timedelta(microseconds=1),
128
+ iv_ms=_iv_to_ms(a.iv),
129
+ payload=a.blob,
130
+ ub=a.ub,
131
+ f=a.f,
132
+ )
133
+
134
+ @classmethod
135
+ def process(cls, params: ProcessParams[TypedProbeArgs], state: TypedProbeState, out: OutputCollector) -> None:
136
+ """Emit all rows in a single batch."""
137
+ if state.offset >= state.n:
138
+ out.finish()
139
+ return
140
+ rows = list(range(state.offset, state.n))
141
+ state.offset = state.n
142
+ out.emit(
143
+ pa.RecordBatch.from_pydict(
144
+ {
145
+ "idx": pa.array(rows, type=pa.uint64()),
146
+ "ts_us": pa.array([state.ts_us] * len(rows), type=pa.int64()),
147
+ "iv_ms": pa.array([state.iv_ms] * len(rows), type=pa.int64()),
148
+ "payload": pa.array([state.payload] * len(rows), type=pa.binary()),
149
+ "ub": pa.array([state.ub] * len(rows), type=pa.uint64()),
150
+ "f": pa.array([state.f + i for i in rows], type=pa.float64()),
151
+ },
152
+ schema=TYPED_PROBE_SCHEMA,
153
+ )
154
+ )