PyPI - vgi-python - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

vgi-python 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

vgi/_duckdb.py +3 -0
vgi/_test_fixtures/aggregate/dynamic.py +7 -1
vgi/_test_fixtures/scalar/__init__.py +4 -0
vgi/_test_fixtures/scalar/settings_secrets.py +73 -0
vgi/_test_fixtures/table/__init__.py +4 -0
vgi/_test_fixtures/table/filters.py +128 -0
vgi/_test_fixtures/table/late_materialization.py +3 -0
vgi/_test_fixtures/table/make_series.py +15 -0
vgi/_test_fixtures/table/misc.py +4 -0
vgi/_test_fixtures/table/pairs.py +12 -0
vgi/_test_fixtures/table/sequence.py +28 -0
vgi/_test_fixtures/table/settings.py +6 -0
vgi/_test_fixtures/table/typed_probe.py +154 -0
vgi/_test_fixtures/table_in_out.py +8 -26
vgi/_test_fixtures/worker.py +9 -0
vgi/aggregate_function.py +29 -11
vgi/argument_spec.py +20 -20
vgi/arguments.py +114 -153
vgi/catalog/_descriptor_spec.py +246 -0
vgi/catalog/attach_option.py +14 -171
vgi/catalog/catalog_interface.py +390 -264
vgi/catalog/descriptors.py +59 -26
vgi/catalog/secret_type.py +1 -0
vgi/catalog/setting.py +19 -214
vgi/catalog/storage.py +8 -4
vgi/client/catalog_mixin.py +37 -33
vgi/client/cli_catalog.py +4 -16
vgi/client/cli_schema.py +12 -73
vgi/client/cli_table.py +30 -199
vgi/client/cli_utils.py +74 -19
vgi/client/cli_view.py +12 -74
vgi/client/client.py +104 -79
vgi/exceptions.py +4 -0
vgi/function.py +9 -33
vgi/function_storage.py +11 -46
vgi/function_storage_azure_sql.py +6 -6
vgi/function_storage_cf_do.py +23 -3
vgi/http/worker_page.py +18 -8
vgi/invocation.py +10 -10
vgi/meta_worker.py +7 -7
vgi/metadata.py +111 -46
vgi/otel.py +3 -3
vgi/protocol.py +504 -94
vgi/scalar_function.py +93 -72
vgi/schema_utils.py +2 -2
vgi/secret_protocol.py +22 -3
vgi/secret_service.py +9 -6
vgi/serve.py +11 -11
vgi/table_buffering_function.py +28 -22
vgi/table_filter_pushdown.py +435 -61
vgi/table_function.py +279 -82
vgi/table_in_out_function.py +88 -28
vgi/transactor/client.py +1 -1
vgi/transactor/protocol.py +1 -1
vgi/transactor/server.py +26 -64
vgi/worker.py +125 -272
{vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/METADATA +89 -176
{vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/RECORD +61 -59
{vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/WHEEL +0 -0
{vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/entry_points.txt +0 -0
{vgi_python-0.8.0.dist-info → vgi_python-0.8.1.dist-info}/licenses/LICENSE +0 -0

vgi/_duckdb.py CHANGED Viewed

@@ -32,6 +32,9 @@ def engine_module() -> ModuleType:
     The result is cached for the life of the process.
+    Returns:
+        The resolved engine module (``haybarn`` if available, else ``duckdb``).
     Raises:
         ImportError: if neither engine is installed.

vgi/_test_fixtures/aggregate/dynamic.py CHANGED Viewed

@@ -190,7 +190,7 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
     # when DuckDB batches many partitions into shared buffers.
     @staticmethod
-    def _slice_to_frame(  # noqa: D417
+    def _slice_to_frame(
         partition: WindowPartition,
         subframes: list[tuple[int, int]],
         data_start: int,
@@ -198,11 +198,17 @@ class _DynamicAggregateBase(AggregateFunction[DynamicState]):
         """Slice all partition columns to the frame rows.
         Args:
+            partition: The window partition whose columns are sliced.
+            subframes: List of ``(begin, end)`` index tuples describing the
+                row ranges to include in the frame.
             data_start: Index where data columns begin (header columns are
                 ``[0 .. data_start)``). NULL-drop is applied on data columns
                 only — matches the filtering ``_do_update`` performs in the
                 non-window path.
+        Returns:
+            A table containing only the frame rows across all partition columns.
         """
         num_cols = partition.inputs.num_columns
         cols = [partition.inputs.column(i) for i in range(num_cols)]

vgi/_test_fixtures/scalar/__init__.py CHANGED Viewed

@@ -58,6 +58,8 @@ from vgi._test_fixtures.scalar.random_demo import (
 from vgi._test_fixtures.scalar.settings_secrets import (
     MultiplyBySettingFunction,
     ReturnSecretValueFunction,
+    ScaleBySettingFunction,
+    SecretFieldFunction,
     WhoAmIFunction,
 )
 from vgi._test_fixtures.scalar.type_info import (
@@ -103,6 +105,8 @@ __all__ = [
     "RandomBytesFunction",
     "RandomIntFunction",
     "ReturnSecretValueFunction",
+    "ScaleBySettingFunction",
+    "SecretFieldFunction",
     "SmartFormatPrefixFunction",
     "SmartFormatWidthFunction",
     "SumValuesFunction",

vgi/_test_fixtures/scalar/settings_secrets.py CHANGED Viewed

@@ -42,6 +42,79 @@ class MultiplyBySettingFunction(ScalarFunction):
         return pc.multiply(multiplier, value)
+class ScaleBySettingFunction(ScalarFunction):
+    """Scale the input value by the float (DOUBLE) setting ``scale_factor``.
+    Companion to :class:`MultiplyBySettingFunction`, but reads a floating-point
+    setting rather than an integer one.
+    Example:
+        SQL:    SELECT scale_by_setting(4.0)
+    """
+    class Meta:
+        """Function metadata."""
+        name = "scale_by_setting"
+        description = "Scale the input value by the float setting `scale_factor`"
+        examples = [
+            FunctionExample(
+                sql="SELECT scale_by_setting(4.0)",
+                description="Scale the input value by the float setting's value",
+            ),
+        ]
+    @classmethod
+    def compute(
+        cls,
+        value: Annotated[pa.DoubleArray, Param(doc="Value to scale")],
+        scale_factor: Annotated[pa.Scalar[Any] | None, Setting()],
+    ) -> Annotated[pa.DoubleArray, Returns()]:
+        """Generate the result for each row."""
+        factor = 1.0 if scale_factor is None or scale_factor.as_py() is None else scale_factor.as_py()
+        return pc.multiply(pa.scalar(factor, type=pa.float64()), value)
+class SecretFieldFunction(ScalarFunction):
+    """Look up individual secret fields by name.
+    ``port`` is read by named lookup on the ``vgi_example`` secret and
+    ``secret_string`` by field name; the result mirrors the wire behaviour of
+    the worker-side named/positional secret field accessors.
+    Example:
+        SQL:    SELECT secret_field()
+    """
+    class Meta:
+        """Function metadata."""
+        name = "secret_field"
+        description = "Look up secret fields by name"
+        examples = [
+            FunctionExample(
+                sql="SELECT secret_field()",
+                description="Look up secret fields by name",
+            ),
+        ]
+    @classmethod
+    def compute(
+        cls,
+        vgi_example: Annotated[dict[str, pa.Scalar[Any]], Secret("vgi_example")],
+        _length: Annotated[int, OutputLength()],
+    ) -> Annotated[pa.StringArray, Returns()]:
+        """Generate the result for each row."""
+        port = vgi_example.get("port")
+        name = vgi_example.get("secret_string")
+        port_s = "" if port is None else str(port.as_py())
+        name_s = "" if name is None else str(name.as_py())
+        result = f"port={port_s};name={name_s}"
+        return pa.array([result for _ in range(_length)], type=pa.string())
 class ReturnSecretValueFunction(ScalarFunction):
     """Return the value of a secret.

vgi/_test_fixtures/table/__init__.py CHANGED Viewed

@@ -45,6 +45,7 @@ from vgi._test_fixtures.table.filters import (
     FilterEchoFunction,
     FilterEchoPartitionedFunction,
     FilterEchoTableScanFunction,
+    FilteredColumnsEchoFunction,
     SpatialFilterExampleFunction,
     ValuePruneFunction,
 )
@@ -123,6 +124,7 @@ from vgi._test_fixtures.table.settings import (
     StructSettingsFunction,
 )
 from vgi._test_fixtures.table.transaction_storage import TxCachedValueFunction
+from vgi._test_fixtures.table.typed_probe import TypedProbeFunction
 from vgi._test_fixtures.table.versioned import (
     _CURRENT_VERSION,
     _VERSIONED_CONSTRAINTS_CURRENT,
@@ -137,6 +139,7 @@ from vgi._test_fixtures.table.versioned import (
 )
 __all__ = [
+    "TypedProbeFunction",
     "_CURRENT_VERSION",
     "_VERSIONED_CONSTRAINTS_CURRENT",
     "_VERSIONED_CONSTRAINTS_DATA",
@@ -161,6 +164,7 @@ __all__ = [
     "FilterEchoFunction",
     "FilterEchoPartitionedFunction",
     "FilterEchoTableScanFunction",
+    "FilteredColumnsEchoFunction",
     "GeneratorExceptionFunction",
     "ValuePruneFunction",
     "LateMaterializationFunction",

vgi/_test_fixtures/table/filters.py CHANGED Viewed

@@ -116,6 +116,9 @@ class FilterEchoFunction(TableFunctionGenerator[FilterEchoFunctionArgs, FilterEc
     SELECT * FROM filter_echo(10) WHERE n >= 8
     Returns: rows 8-9 with pushed_filters showing "n >= 8"
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:
@@ -287,6 +290,125 @@ class ValuePruneFunction(TableFunctionGenerator[_ValuePruneArgs, _ValuePruneStat
         state.cursor += size
+# ============================================================================
+# FilteredColumnsEchoFunction — echoes the column-introspection accessors on the
+# pushed-down filter set: filtered_columns(), has_filter_for_column(), and the
+# typed (string-capable) get_column_values(). A query's WHERE clause is reflected
+# back as diagnostic columns so each accessor is observable end-to-end.
+# ============================================================================
+@dataclass(slots=True, frozen=True)
+class _FilteredColumnsEchoArgs:
+    """Arguments for FilteredColumnsEchoFunction."""
+    count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
+    batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
+@dataclass(kw_only=True)
+class _FilteredColumnsEchoState(ArrowSerializableDataclass):
+    """Resolved diagnostics (serialized so the HTTP rehydrate path preserves them)."""
+    count: int
+    filtered_cols: str
+    has_n: bool
+    has_tag: bool
+    tag_values: str
+    cursor: int = 0
+@init_single_worker
+@bind_fixed_schema
+@_cardinality_from_count
+class FilteredColumnsEchoFunction(TableFunctionGenerator[_FilteredColumnsEchoArgs, _FilteredColumnsEchoState]):
+    """Report the columns referenced by pushed-down filters and ``tag``'s values.
+    Surfaces which columns the pushed-down filters reference and the discrete
+    value set resolved for the string column ``tag``.
+    ``filtered_cols`` is the sorted, comma-joined ``filtered_columns()`` set;
+    ``has_n`` / ``has_tag`` are ``has_filter_for_column()``; ``tag_values`` is
+    the sorted, comma-joined ``get_column_values('tag')`` result (``"(none)"``
+    when the predicate is not an enumerable equality/IN on ``tag``).
+    """
+    class Meta:
+        """Metadata for FilteredColumnsEchoFunction."""
+        name = "filtered_columns_echo"
+        description = "Echoes filtered_columns / has_filter_for_column / get_column_values_array"
+        categories = ["generator", "diagnostic"]
+        filter_pushdown = True
+        auto_apply_filters = True
+        projection_pushdown = True
+    FIXED_SCHEMA: ClassVar[pa.Schema] = schema(
+        {
+            "n": pa.int64(),
+            "tag": pa.utf8(),
+            "filtered_cols": pa.utf8(),
+            "has_n": pa.bool_(),
+            "has_tag": pa.bool_(),
+            "tag_values": pa.utf8(),
+        }
+    )
+    @classmethod
+    def initial_state(cls, params: ProcessParams[_FilteredColumnsEchoArgs]) -> _FilteredColumnsEchoState:
+        """Resolve the filter-column diagnostics from the pushed-down filters."""
+        assert params.init_call is not None
+        pf = params.init_call.pushdown_filters
+        jk = params.init_call.join_keys
+        filters = cls.pushdown_filters(pf, join_keys=jk) if pf is not None else None
+        if filters is not None:
+            filtered_cols = ",".join(sorted(filters.filtered_columns))
+            has_n = filters.has_filter_for_column("n")
+            has_tag = filters.has_filter_for_column("tag")
+            tag_arr = filters.get_column_values("tag")
+            if tag_arr is not None:
+                tag_values = ",".join(sorted(str(v) for v in tag_arr.to_pylist() if v is not None))
+            else:
+                tag_values = "(none)"
+        else:
+            filtered_cols, has_n, has_tag, tag_values = "", False, False, "(none)"
+        return _FilteredColumnsEchoState(
+            count=params.args.count,
+            filtered_cols=filtered_cols,
+            has_n=has_n,
+            has_tag=has_tag,
+            tag_values=tag_values,
+        )
+    @classmethod
+    def process(
+        cls,
+        params: ProcessParams[_FilteredColumnsEchoArgs],
+        state: _FilteredColumnsEchoState,
+        out: OutputCollector,
+    ) -> None:
+        """Emit the generated rows, each carrying the resolved diagnostics."""
+        if state.cursor >= state.count:
+            out.finish()
+            return
+        size = min(state.count - state.cursor, params.args.batch_size)
+        ns = list(range(state.cursor, state.cursor + size))
+        out.emit(
+            pa.RecordBatch.from_pydict(
+                {
+                    "n": ns,
+                    "tag": [f"t{i}" for i in ns],
+                    "filtered_cols": [state.filtered_cols] * size,
+                    "has_n": [state.has_n] * size,
+                    "has_tag": [state.has_tag] * size,
+                    "tag_values": [state.tag_values] * size,
+                },
+                schema=params.output_schema,
+            )
+        )
+        state.cursor += size
 # ============================================================================
 # DictFilterEchoFunction — output column declared as a *dictionary* Arrow type
 # (dictionary<int8, utf8>) with no ENUM metadata. DuckDB maps such a column to
@@ -354,6 +476,9 @@ class DictFilterEchoFunction(TableFunctionGenerator[_DictFilterEchoArgs, _DictFi
     SELECT * FROM dict_filter_echo(6) WHERE s = 'green'
     Returns: rows 1 and 4.
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:
@@ -483,6 +608,9 @@ class SpatialFilterExampleFunction(TableFunctionGenerator[_SpatialFilterArgs, _S
     SELECT * FROM spatial_filter_example(100) WHERE geom && ST_MakeEnvelope(0, 0, 0.5, 0.5)
     Returns: points in the lower-left quadrant of the unit square.
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:

vgi/_test_fixtures/table/late_materialization.py CHANGED Viewed

@@ -148,6 +148,9 @@ class LateMaterializationFunction(TableFunctionGenerator[LateMaterializationFunc
     -------
     SELECT row_id, payload FROM late_materialization(100000) ORDER BY ord LIMIT 10
+    Attributes:
+        FunctionArguments: The argument dataclass type bound to this function.
     """
     FunctionArguments = LateMaterializationFunctionArgs

vgi/_test_fixtures/table/make_series.py CHANGED Viewed

@@ -77,6 +77,9 @@ class MakeSeriesCountFunction(TableFunctionGenerator[MakeSeriesCountArgs, MakeSe
         SELECT * FROM make_series(5)
         Returns: 0, 1, 2, 3, 4
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -113,6 +116,9 @@ class MakeSeriesRangeFunction(TableFunctionGenerator[MakeSeriesRangeArgs, MakeSe
         SELECT * FROM make_series(3, 7)
         Returns: 3, 4, 5, 6
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -149,6 +155,9 @@ class MakeSeriesStepFunction(TableFunctionGenerator[MakeSeriesStepArgs, MakeSeri
         SELECT * FROM make_series(0, 10, 3)
         Returns: 0, 3, 6, 9
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -195,6 +204,9 @@ class MakeSeriesCsvFunction(TableFunctionGenerator[MakeSeriesCsvArgs, MakeSeries
         SELECT * FROM make_series('10,20,30')
         Returns: 10, 20, 30
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
@@ -243,6 +255,9 @@ class MakeSeriesFloatFunction(TableFunctionGenerator[MakeSeriesFloatArgs, MakeSe
         SELECT * FROM make_series(0.5)
         Returns: 0.0, 0.5, 1.0, ..., 4.5
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_FLOAT_SCHEMA

vgi/_test_fixtures/table/misc.py CHANGED Viewed

@@ -178,6 +178,10 @@ class ProjectedDataFunction(TableFunctionGenerator[ProjectedDataFunctionArgument
     SELECT id, value FROM projected_data(10)  -- Only computes id and value
     Returns: 10 rows with id and value columns only
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
+        BATCH_SIZE: Number of rows emitted per output batch.
     """
     class Meta:

vgi/_test_fixtures/table/pairs.py CHANGED Viewed

@@ -83,6 +83,9 @@ class ConstantColumnsFunction(TableFunctionGenerator[ConstantColumnsFunctionArgu
     Returns: [{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"},
               {"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}]
+    Attributes:
+        BATCH_SIZE: Number of rows emitted per output batch.
     """
     class Meta:
@@ -188,6 +191,9 @@ class MakePairsIntFunction(TableFunctionGenerator[MakePairsIntArgs, MakePairsInt
         SELECT * FROM make_pairs(1, 4)
         Returns: (1,2), (2,4), (3,6)
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_INT_SCHEMA
@@ -223,6 +229,9 @@ class MakePairsStrFunction(TableFunctionGenerator[MakePairsStrArgs, MakePairsStr
         SELECT * FROM make_pairs('row_', '_end')
         Returns: ('row_0','_end0'), ('row_1','_end1'), ...
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_STR_SCHEMA
@@ -284,6 +293,9 @@ class MakePairsIntStrFunction(TableFunctionGenerator[MakePairsIntStrArgs, MakePa
         SELECT * FROM make_pairs(10, 'item_')
         Returns: (10, 'item_0'), (11, 'item_1'), ..., (14, 'item_4')
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_MIXED_SCHEMA

vgi/_test_fixtures/table/sequence.py CHANGED Viewed

@@ -69,6 +69,11 @@ class SequenceFunction(_BaseSequenceFunction):
     SELECT * FROM sequence(1000, batch_size := 100)
     Returns: integers 0-999 in batches of 100 rows each
+    Attributes:
+        FunctionArguments: The argument dataclass type bound to this function.
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
+        NUMPY_DTYPE: NumPy dtype used to build the output column(s).
     """
     FunctionArguments = SequenceFunctionArgs
@@ -144,6 +149,10 @@ class NamedParamsEchoFunction(_BaseSequenceFunction):
     SELECT * FROM named_params_echo(3, greeting := 'hi', multiplier := 10)
     Returns: rows with id=0..2, greeting='hi', value=id*10, float_value=id*1.0, enabled=true
+    Attributes:
+        FunctionArguments: The argument dataclass type bound to this function.
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     FunctionArguments = NamedParamsEchoFunctionArgs
@@ -242,6 +251,10 @@ class NestedSequenceFunction(_BaseSequenceFunction):
     SELECT metadata.index FROM nested_sequence(10)
     Test projection pushdown with struct field access
+    Attributes:
+        FunctionArguments: The argument dataclass type bound to this function.
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:
@@ -356,6 +369,12 @@ class DoubleSequenceFunction(_BaseSequenceFunction):
     SELECT * FROM double_sequence(1000, batch_size := 100)
     Returns: floats 0.0-999.0 in batches of 100 rows each
+    Attributes:
+        FunctionArguments: The argument dataclass type bound to this function.
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
+        NUMPY_DTYPE: NumPy dtype used to build the output column(s).
+        STATS_ARROW_TYPE: Arrow type used for the column statistics this function reports.
     """
     FunctionArguments = DoubleSequenceFunctionArguments
@@ -441,6 +460,11 @@ class PartitionedSequenceFunction(
     With count=5 and increment=10:
         Combined output: [0, 10, 20, 30, 40]
+    Attributes:
+        MAX_PARTITIONS: Maximum number of partitions this function emits.
+        BATCH_SIZE: Number of rows emitted per output batch.
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:
@@ -556,6 +580,10 @@ class TenThousandFunction(TableFunctionGenerator[TenThousandFunctionArguments, T
     SELECT * FROM ten_thousand()
     Returns: [{"n": 0}, {"n": 1}, ..., {"n": 9999}]
+    Attributes:
+        BATCH_SIZE: Number of rows emitted per output batch.
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:

vgi/_test_fixtures/table/settings.py CHANGED Viewed

@@ -75,6 +75,9 @@ class SettingsAwareFunction(TableFunctionGenerator[SettingsAwareFunctionArgument
     With settings={vgi_verbose_mode: true, greeting: "Hi", multiplier: 2}:
     Returns: [{"id": 0, "greeting": "Hi", "value": 0.0, "details": "row_0"}, ...]
+    Attributes:
+        BATCH_SIZE: Number of rows emitted per output batch.
     """
     class Meta:
@@ -207,6 +210,9 @@ class StructSettingsFunction(TableFunctionGenerator[StructSettingsFunctionArgume
     With config={'start': 10, 'step': 5, 'label': 'item'} and count=3:
     Returns: [{"n": 10, "label": "item_0"}, {"n": 15, "label": "item_1"}, {"n": 20, "label": "item_2"}]
+    Attributes:
+        FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
     """
     class Meta:

vgi/_test_fixtures/table/typed_probe.py ADDED Viewed

@@ -0,0 +1,154 @@
+# Copyright 2025, 2026 Query Farm LLC - https://query.farm
+"""typed_probe — exercises typed const-argument binding and typed column emit.
+Const args cover the less-common Arrow scalar types — TIMESTAMP, INTERVAL
+(duration), BLOB and UBIGINT — each with a default so calling ``typed_probe(n)``
+drives the default path and passing named args drives the scalar-extraction
+path. The output echoes the bound values into uint64 / int64 / blob / double
+columns. Values are echoed in normalized integer/byte form so this fixture and
+its vgi-go counterpart produce byte-identical results for the shared test.
+"""
+from __future__ import annotations
+import datetime
+from dataclasses import dataclass
+from typing import Annotated, ClassVar
+import pyarrow as pa
+from vgi_rpc import ArrowSerializableDataclass
+from vgi_rpc.rpc import OutputCollector
+from vgi.arguments import Arg
+from vgi.schema_utils import schema
+from vgi.table_function import (
+    ProcessParams,
+    TableFunctionGenerator,
+    bind_fixed_schema,
+    init_single_worker,
+)
+_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.UTC)
+def _iv_to_ms(iv: object) -> int:
+    """Collapse a duration/interval const to whole milliseconds.
+    A declared default arrives as a ``datetime.timedelta``; a SQL ``INTERVAL``
+    literal arrives as a pyarrow ``MonthDayNano`` (DuckDB intervals are
+    month-day-nano). Mirror vgi-go's GetScalarDuration collapse — months→30d,
+    days→24h — so both implementations agree.
+    """
+    if isinstance(iv, datetime.timedelta):
+        return iv // datetime.timedelta(milliseconds=1)
+    months = getattr(iv, "months", 0)
+    days = getattr(iv, "days", 0)
+    nanos = getattr(iv, "nanoseconds", 0)
+    return months * 30 * 24 * 3600 * 1000 + days * 24 * 3600 * 1000 + nanos // 1_000_000
+TYPED_PROBE_SCHEMA = schema(
+    idx=pa.uint64(),
+    ts_us=pa.int64(),
+    iv_ms=pa.int64(),
+    payload=pa.binary(),
+    ub=pa.uint64(),
+    f=pa.float64(),
+)
+@dataclass(kw_only=True)
+class TypedProbeArgs:
+    """Arguments for TypedProbeFunction — one named const per scalar type."""
+    n: Annotated[int, Arg(0, doc="Number of rows to emit", ge=0)]
+    ts: Annotated[
+        datetime.datetime,
+        Arg(
+            "ts",
+            default=datetime.datetime(2026, 1, 2, 3, 4, 5, tzinfo=datetime.UTC),
+            arrow_type=pa.timestamp("us", tz="UTC"),
+            doc="Timestamp const (TIMESTAMPTZ)",
+        ),
+    ]
+    iv: Annotated[
+        datetime.timedelta,
+        Arg(
+            "iv",
+            default=datetime.timedelta(milliseconds=1500),
+            arrow_type=pa.duration("ns"),
+            doc="Interval const (INTERVAL)",
+        ),
+    ]
+    blob: Annotated[
+        bytes,
+        Arg("blob", default=b"vgi", arrow_type=pa.binary(), doc="Blob const (BLOB)"),
+    ]
+    ub: Annotated[
+        int,
+        Arg("ub", default=9, arrow_type=pa.uint64(), doc="Unsigned const (UBIGINT)"),
+    ]
+    f: Annotated[float, Arg("f", default=2.5, doc="Float const (DOUBLE)")]
+@dataclass(kw_only=True)
+class TypedProbeState(ArrowSerializableDataclass):
+    """Mutable state — the resolved const values plus emit cursor."""
+    n: int
+    ts_us: int
+    iv_ms: int
+    payload: bytes
+    ub: int
+    f: float
+    offset: int = 0
+@init_single_worker
+@bind_fixed_schema
+class TypedProbeFunction(TableFunctionGenerator[TypedProbeArgs, TypedProbeState]):
+    """Echo typed const args (timestamp/interval/blob/ubigint) into typed columns."""
+    FIXED_SCHEMA: ClassVar[pa.Schema] = TYPED_PROBE_SCHEMA
+    class Meta:
+        """Function metadata."""
+        name = "typed_probe"
+        description = "Echoes typed const args (timestamp/interval/blob/ubigint) into typed columns"
+    @classmethod
+    def initial_state(cls, params: ProcessParams[TypedProbeArgs]) -> TypedProbeState:
+        """Resolve const args into normalized integer/byte form."""
+        a = params.args
+        return TypedProbeState(
+            n=a.n,
+            ts_us=(a.ts - _EPOCH) // datetime.timedelta(microseconds=1),
+            iv_ms=_iv_to_ms(a.iv),
+            payload=a.blob,
+            ub=a.ub,
+            f=a.f,
+        )
+    @classmethod
+    def process(cls, params: ProcessParams[TypedProbeArgs], state: TypedProbeState, out: OutputCollector) -> None:
+        """Emit all rows in a single batch."""
+        if state.offset >= state.n:
+            out.finish()
+            return
+        rows = list(range(state.offset, state.n))
+        state.offset = state.n
+        out.emit(
+            pa.RecordBatch.from_pydict(
+                {
+                    "idx": pa.array(rows, type=pa.uint64()),
+                    "ts_us": pa.array([state.ts_us] * len(rows), type=pa.int64()),
+                    "iv_ms": pa.array([state.iv_ms] * len(rows), type=pa.int64()),
+                    "payload": pa.array([state.payload] * len(rows), type=pa.binary()),
+                    "ub": pa.array([state.ub] * len(rows), type=pa.uint64()),
+                    "f": pa.array([state.f + i for i in rows], type=pa.float64()),
+                },
+                schema=TYPED_PROBE_SCHEMA,
+            )
+        )

vgi-python 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

vgi-python 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl