vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/arguments.py
ADDED
|
@@ -0,0 +1,1747 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Argument parsing and validation for VGI functions.
|
|
4
|
+
|
|
5
|
+
This module provides classes for handling function arguments in VGI:
|
|
6
|
+
|
|
7
|
+
Classes:
|
|
8
|
+
Arguments: Container for positional and named function arguments.
|
|
9
|
+
ArgumentValidationError: Raised when an argument fails validation.
|
|
10
|
+
Arg: Descriptor for declarative argument parsing with optional validation.
|
|
11
|
+
AnyArrow: Sentinel type for arguments accepting multiple Arrow types.
|
|
12
|
+
AnyArrowValue: Wrapper returned when accessing AnyArrow arguments.
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
import types
|
|
18
|
+
import typing
|
|
19
|
+
import warnings
|
|
20
|
+
from collections.abc import Callable, Sequence
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import TYPE_CHECKING, Any, Final, TypeVar, overload
|
|
23
|
+
|
|
24
|
+
import pyarrow as pa
|
|
25
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from pyarrow import Scalar
|
|
29
|
+
|
|
30
|
+
# Python type to Arrow type mapping for Arg type hints
|
|
31
|
+
PYTHON_TO_ARROW: dict[type, pa.DataType] = {
|
|
32
|
+
int: pa.int64(),
|
|
33
|
+
str: pa.utf8(),
|
|
34
|
+
float: pa.float64(),
|
|
35
|
+
bool: pa.bool_(),
|
|
36
|
+
bytes: pa.binary(),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Private mapping used by _python_to_arrow() helper
|
|
40
|
+
_PYTHON_TO_ARROW: dict[type, pa.DataType] = {
|
|
41
|
+
int: pa.int64(),
|
|
42
|
+
float: pa.float64(),
|
|
43
|
+
str: pa.string(),
|
|
44
|
+
bool: pa.bool_(),
|
|
45
|
+
bytes: pa.binary(),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Arrow type to Python scalar type mapping
|
|
49
|
+
# Keys are the type class of Arrow DataType instances (e.g., type(pa.int8()))
|
|
50
|
+
_ARROW_TO_PYTHON: dict[type, type] = {
|
|
51
|
+
# Primitives - integers
|
|
52
|
+
type(pa.int8()): int,
|
|
53
|
+
type(pa.int16()): int,
|
|
54
|
+
type(pa.int32()): int,
|
|
55
|
+
type(pa.int64()): int,
|
|
56
|
+
type(pa.uint8()): int,
|
|
57
|
+
type(pa.uint16()): int,
|
|
58
|
+
type(pa.uint32()): int,
|
|
59
|
+
type(pa.uint64()): int,
|
|
60
|
+
# Primitives - floats
|
|
61
|
+
type(pa.float16()): float,
|
|
62
|
+
type(pa.float32()): float,
|
|
63
|
+
type(pa.float64()): float,
|
|
64
|
+
# Primitives - strings
|
|
65
|
+
type(pa.string()): str,
|
|
66
|
+
type(pa.large_string()): str,
|
|
67
|
+
# Primitives - boolean
|
|
68
|
+
type(pa.bool_()): bool,
|
|
69
|
+
# Primitives - binary
|
|
70
|
+
type(pa.binary()): bytes,
|
|
71
|
+
type(pa.large_binary()): bytes,
|
|
72
|
+
# Nested types
|
|
73
|
+
type(pa.struct([])): dict,
|
|
74
|
+
type(pa.list_(pa.int32())): list,
|
|
75
|
+
type(pa.large_list(pa.int32())): list,
|
|
76
|
+
type(pa.list_(pa.int32(), 3)): list, # FixedSizeListType
|
|
77
|
+
type(pa.map_(pa.string(), pa.int32())): dict,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _python_to_arrow(py_type: type) -> pa.DataType:
|
|
82
|
+
"""Convert a Python type to the corresponding Arrow type.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
py_type: Python type (int, float, str, bool, bytes).
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Corresponding Arrow data type.
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
TypeError: If py_type is not a supported Python type.
|
|
92
|
+
|
|
93
|
+
"""
|
|
94
|
+
if py_type in _PYTHON_TO_ARROW:
|
|
95
|
+
return _PYTHON_TO_ARROW[py_type]
|
|
96
|
+
|
|
97
|
+
supported = ", ".join(t.__name__ for t in _PYTHON_TO_ARROW)
|
|
98
|
+
raise TypeError(
|
|
99
|
+
f"Cannot convert Python type '{py_type.__name__}' to Arrow type. "
|
|
100
|
+
f"Supported types: {supported}. "
|
|
101
|
+
f"Example: _python_to_arrow(int) -> pa.int64()"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# =============================================================================
|
|
106
|
+
# PyArrow Array Class to DataType Mapping (for type inference)
|
|
107
|
+
# =============================================================================
|
|
108
|
+
#
|
|
109
|
+
# These mappings enable inferring Arrow types from array class annotations:
|
|
110
|
+
# Annotated[pa.Int64Array, Param(doc="...")] -> pa.int64()
|
|
111
|
+
#
|
|
112
|
+
# Only simple (non-parameterized) types are included. Complex types that require
|
|
113
|
+
# parameters (e.g., StructArray, ListArray, Decimal128Array) need explicit
|
|
114
|
+
# arrow_type specification.
|
|
115
|
+
|
|
116
|
+
# Simple array classes that can be inferred automatically
|
|
117
|
+
ARRAY_CLASS_TO_DATATYPE: dict[type, pa.DataType] = {
|
|
118
|
+
# Integers
|
|
119
|
+
pa.Int8Array: pa.int8(),
|
|
120
|
+
pa.Int16Array: pa.int16(),
|
|
121
|
+
pa.Int32Array: pa.int32(),
|
|
122
|
+
pa.Int64Array: pa.int64(),
|
|
123
|
+
pa.UInt8Array: pa.uint8(),
|
|
124
|
+
pa.UInt16Array: pa.uint16(),
|
|
125
|
+
pa.UInt32Array: pa.uint32(),
|
|
126
|
+
pa.UInt64Array: pa.uint64(),
|
|
127
|
+
# Floats
|
|
128
|
+
pa.HalfFloatArray: pa.float16(),
|
|
129
|
+
pa.FloatArray: pa.float32(),
|
|
130
|
+
pa.DoubleArray: pa.float64(),
|
|
131
|
+
# Strings/Binary
|
|
132
|
+
pa.StringArray: pa.string(),
|
|
133
|
+
pa.LargeStringArray: pa.large_string(),
|
|
134
|
+
pa.BinaryArray: pa.binary(),
|
|
135
|
+
pa.LargeBinaryArray: pa.large_binary(),
|
|
136
|
+
# Boolean
|
|
137
|
+
pa.BooleanArray: pa.bool_(),
|
|
138
|
+
# Dates (no params needed)
|
|
139
|
+
pa.Date32Array: pa.date32(),
|
|
140
|
+
pa.Date64Array: pa.date64(),
|
|
141
|
+
# Null
|
|
142
|
+
pa.NullArray: pa.null(),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
# Complex array classes that require explicit arrow_type (parameterized types)
|
|
146
|
+
# Using these without arrow_type will raise a helpful error
|
|
147
|
+
COMPLEX_ARRAY_CLASSES: set[type] = {
|
|
148
|
+
# Nested types
|
|
149
|
+
pa.StructArray,
|
|
150
|
+
pa.ListArray,
|
|
151
|
+
pa.LargeListArray,
|
|
152
|
+
pa.FixedSizeListArray,
|
|
153
|
+
pa.MapArray,
|
|
154
|
+
pa.UnionArray,
|
|
155
|
+
# Parameterized types
|
|
156
|
+
pa.DictionaryArray,
|
|
157
|
+
pa.Decimal128Array,
|
|
158
|
+
pa.Decimal256Array,
|
|
159
|
+
pa.FixedSizeBinaryArray,
|
|
160
|
+
# Temporal types with units (require explicit unit specification)
|
|
161
|
+
pa.Time32Array,
|
|
162
|
+
pa.Time64Array,
|
|
163
|
+
pa.TimestampArray,
|
|
164
|
+
pa.DurationArray,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _arrow_type_to_python(arrow_type: pa.DataType) -> type:
|
|
169
|
+
"""Convert an Arrow type to the corresponding Python scalar type.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
arrow_type: Arrow data type instance.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Corresponding Python type for scalar values.
|
|
176
|
+
Returns Any (object) for unknown Arrow types.
|
|
177
|
+
|
|
178
|
+
"""
|
|
179
|
+
arrow_type_class = type(arrow_type)
|
|
180
|
+
return _ARROW_TO_PYTHON.get(arrow_type_class, object)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# Sentinel for missing default value - proper type pattern
|
|
184
|
+
class _MissingType:
|
|
185
|
+
"""Sentinel type for missing default values.
|
|
186
|
+
|
|
187
|
+
This provides better type safety than using `Any` for the sentinel.
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
__slots__ = ()
|
|
191
|
+
|
|
192
|
+
def __repr__(self) -> str:
|
|
193
|
+
return "<MISSING>"
|
|
194
|
+
|
|
195
|
+
def __bool__(self) -> bool:
|
|
196
|
+
return False
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
_MISSING: Final = _MissingType()
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _accepts_none(annotated_inner_type: Any) -> bool:
|
|
203
|
+
"""Whether a declared Arg type allows ``None``.
|
|
204
|
+
|
|
205
|
+
``annotated_inner_type`` is the first type-arg of an
|
|
206
|
+
``Annotated[T, Arg(...)]`` hint — i.e. the user's declared type for
|
|
207
|
+
the field. Returns True iff the type is a union that includes
|
|
208
|
+
``NoneType`` (e.g. ``int | None``, ``Optional[int]``,
|
|
209
|
+
``Union[int, None]``). Used by argument resolvers to reject SQL NULL
|
|
210
|
+
when the user did not opt in to nullable arguments.
|
|
211
|
+
"""
|
|
212
|
+
if annotated_inner_type is type(None):
|
|
213
|
+
return True
|
|
214
|
+
origin = typing.get_origin(annotated_inner_type)
|
|
215
|
+
if origin is typing.Union or origin is types.UnionType:
|
|
216
|
+
return type(None) in typing.get_args(annotated_inner_type)
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
__all__ = [
|
|
221
|
+
"AnyArrow",
|
|
222
|
+
"AnyArrowValue",
|
|
223
|
+
"ARRAY_CLASS_TO_DATATYPE",
|
|
224
|
+
"Arg",
|
|
225
|
+
"ArgumentValidationError",
|
|
226
|
+
"Arguments",
|
|
227
|
+
"COMPLEX_ARRAY_CLASSES",
|
|
228
|
+
"ConstParam",
|
|
229
|
+
"Param",
|
|
230
|
+
"PYTHON_TO_ARROW",
|
|
231
|
+
"Returns",
|
|
232
|
+
"TableInput",
|
|
233
|
+
"TypeBoundPredicate",
|
|
234
|
+
"OutputLength",
|
|
235
|
+
"Setting",
|
|
236
|
+
"Secret",
|
|
237
|
+
"SecretLookupEntry",
|
|
238
|
+
"_extract_setting_secret_params",
|
|
239
|
+
]
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class TableInput:
|
|
243
|
+
"""Sentinel type for table input parameters in table-in-out functions.
|
|
244
|
+
|
|
245
|
+
Use this as the type parameter for Arg to declare which argument receives
|
|
246
|
+
the streaming table input. Every TableInOutFunction must have exactly one
|
|
247
|
+
TableInput argument, and it must be positional (not named).
|
|
248
|
+
|
|
249
|
+
The TableInput argument determines which table expression feeds the function
|
|
250
|
+
when called from SQL. It doesn't correspond to an actual Arrow value - the
|
|
251
|
+
table data arrives as streaming RecordBatches via process().
|
|
252
|
+
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@dataclass(frozen=True, slots=True)
|
|
259
|
+
class AnyArrowValue:
|
|
260
|
+
"""Wrapper for AnyArrow argument values with metadata.
|
|
261
|
+
|
|
262
|
+
When an Arg returns an AnyArrow type, accessing the attribute returns
|
|
263
|
+
an AnyArrowValue instead of just the raw value. This provides access to
|
|
264
|
+
both the value and the argument's position/name for schema lookups.
|
|
265
|
+
|
|
266
|
+
Attributes:
|
|
267
|
+
value: The Python value (from scalar.as_py()).
|
|
268
|
+
position: The positional index from the Arg definition (int for positional,
|
|
269
|
+
str for named arguments).
|
|
270
|
+
name: The Python attribute name of the Arg.
|
|
271
|
+
|
|
272
|
+
Example using Annotated (recommended):
|
|
273
|
+
from typing import Annotated
|
|
274
|
+
|
|
275
|
+
class MyFunction(TableFunctionGenerator):
|
|
276
|
+
col1: Annotated[AnyArrowValue, Arg(0, doc="First column")]
|
|
277
|
+
|
|
278
|
+
def on_bind(self) -> None:
|
|
279
|
+
# self.col1 is an AnyArrowValue
|
|
280
|
+
print(self.col1.value) # The column name
|
|
281
|
+
print(self.col1.position) # The positional index
|
|
282
|
+
|
|
283
|
+
Example using legacy Arg[AnyArrow] syntax:
|
|
284
|
+
class MyFunction(TableFunctionGenerator):
|
|
285
|
+
col1 = Arg[AnyArrow](0, doc="First column") # type: ignore[assignment]
|
|
286
|
+
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
value: Any
|
|
290
|
+
position: int | str
|
|
291
|
+
name: str
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class AnyArrow:
|
|
295
|
+
"""Sentinel type for arguments accepting multiple Arrow types.
|
|
296
|
+
|
|
297
|
+
Use this with ``AnyArrowValue`` in the Annotated pattern when an argument
|
|
298
|
+
should accept multiple valid Arrow types, validated via the ``type_bound``
|
|
299
|
+
parameter. When accessed, returns an AnyArrowValue containing the value
|
|
300
|
+
plus metadata (position and name).
|
|
301
|
+
|
|
302
|
+
Choosing Between Specific Types and AnyArrowValue
|
|
303
|
+
-------------------------------------------------
|
|
304
|
+
- **Single required type**: Use ``Annotated[str, Arg(...)]`` or similar.
|
|
305
|
+
The argument will only accept that exact type.
|
|
306
|
+
|
|
307
|
+
- **Multiple valid types**: Use ``Annotated[AnyArrowValue, Arg(...)]`` with
|
|
308
|
+
``type_bound`` to specify which types are acceptable. For example, numeric
|
|
309
|
+
operations that work on integers, floats, and decimals should use AnyArrowValue.
|
|
310
|
+
|
|
311
|
+
The ``type_bound`` parameter is ONLY meaningful for ``AnyArrowValue`` arguments.
|
|
312
|
+
Using it with other types will emit a warning.
|
|
313
|
+
|
|
314
|
+
Examples using Annotated (recommended):
|
|
315
|
+
from typing import Annotated
|
|
316
|
+
from vgi import Arg, AnyArrowValue
|
|
317
|
+
|
|
318
|
+
# Single type: function only works with strings
|
|
319
|
+
class UpperCaseFunction(TableFunctionGenerator):
|
|
320
|
+
column: Annotated[str, Arg(0, doc="String column to uppercase")]
|
|
321
|
+
|
|
322
|
+
# Multiple types: function works with any numeric type
|
|
323
|
+
class DoubleFunction(TableFunctionGenerator):
|
|
324
|
+
column: Annotated[
|
|
325
|
+
AnyArrowValue,
|
|
326
|
+
Arg(0, type_bound=[pa.types.is_integer, pa.types.is_floating])
|
|
327
|
+
]
|
|
328
|
+
|
|
329
|
+
def on_bind(self) -> None:
|
|
330
|
+
# Access column metadata for dynamic output type
|
|
331
|
+
self._output_type = self.column.value
|
|
332
|
+
|
|
333
|
+
# Any type: function works with all types
|
|
334
|
+
class IdentityFunction(TableFunctionGenerator):
|
|
335
|
+
column: Annotated[AnyArrowValue, Arg(0, doc="Column to pass through")]
|
|
336
|
+
|
|
337
|
+
Accessing Values:
|
|
338
|
+
When using AnyArrowValue, access the value via the ``.value`` attribute::
|
|
339
|
+
|
|
340
|
+
val = self.column.value # The column name as a string
|
|
341
|
+
pos = self.column.position # The positional index
|
|
342
|
+
|
|
343
|
+
Note:
|
|
344
|
+
Unlike TableInput, AnyArrow arguments have actual Arrow values -
|
|
345
|
+
they are just not constrained to a specific Arrow type.
|
|
346
|
+
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
# Type stubs for static analysis - at runtime, Arg[AnyArrow] returns AnyArrowValue
|
|
350
|
+
value: Any
|
|
351
|
+
position: int | str
|
|
352
|
+
name: str
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@dataclass(frozen=True, slots=True)
|
|
356
|
+
class Arguments:
|
|
357
|
+
"""Container for function arguments.
|
|
358
|
+
|
|
359
|
+
Access arguments using get() for Python values:
|
|
360
|
+
|
|
361
|
+
# Positional arguments (by index)
|
|
362
|
+
count = args.get(0) # First argument
|
|
363
|
+
name = args.get(1, default="unnamed") # With default
|
|
364
|
+
|
|
365
|
+
# Named arguments (by string)
|
|
366
|
+
separator = args.get("sep", default=",")
|
|
367
|
+
threshold = args.get("threshold")
|
|
368
|
+
|
|
369
|
+
# With type validation (optional, for strict checking)
|
|
370
|
+
count = args.get(0, type=pa.int64())
|
|
371
|
+
|
|
372
|
+
For direct Arrow Scalar access, use positional/named attributes:
|
|
373
|
+
|
|
374
|
+
scalar = args.positional[0] # pa.Scalar | None
|
|
375
|
+
scalar = args.named["sep"] # pa.Scalar
|
|
376
|
+
|
|
377
|
+
Attributes:
|
|
378
|
+
positional: Tuple of positional argument values as pa.Scalar.
|
|
379
|
+
named: Dictionary mapping argument names to pa.Scalar values.
|
|
380
|
+
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
positional: tuple["Scalar[Any] | None", ...] = ()
|
|
384
|
+
named: dict[str, "Scalar[Any]"] | None = None
|
|
385
|
+
|
|
386
|
+
def get(
|
|
387
|
+
self,
|
|
388
|
+
key: int | str,
|
|
389
|
+
*,
|
|
390
|
+
type: pa.DataType | None = None,
|
|
391
|
+
default: Any = _MISSING,
|
|
392
|
+
) -> Any:
|
|
393
|
+
"""Get argument as Python value.
|
|
394
|
+
|
|
395
|
+
SQL NULL is a real value, distinct from "argument not provided".
|
|
396
|
+
``default`` is consulted only when the caller omitted the argument
|
|
397
|
+
entirely; an explicit SQL NULL returns ``None``.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
key: Positional index (int) or argument name (str).
|
|
401
|
+
type: Expected Arrow type. Raises TypeError if mismatch.
|
|
402
|
+
default: Value to return if argument is omitted (not provided
|
|
403
|
+
by the caller). If not provided, raises an exception for
|
|
404
|
+
missing args. ``default`` is *not* consulted for explicit
|
|
405
|
+
SQL NULL — that case returns ``None``.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
The argument value as a Python object. ``None`` if the caller
|
|
409
|
+
passed an explicit SQL NULL.
|
|
410
|
+
|
|
411
|
+
Raises:
|
|
412
|
+
IndexError: Positional argument not provided (no default).
|
|
413
|
+
KeyError: Named argument not provided (no default).
|
|
414
|
+
TypeError: Argument type doesn't match `type` parameter.
|
|
415
|
+
|
|
416
|
+
"""
|
|
417
|
+
# Get the scalar based on key type. Note: an absent argument means
|
|
418
|
+
# the caller did not write it at all; the C++ extension only ships
|
|
419
|
+
# fields the user supplied, so absence shows up as out-of-range
|
|
420
|
+
# (positional) or missing key (named). A scalar that is present
|
|
421
|
+
# but invalid is an *explicit* SQL NULL passed by the caller.
|
|
422
|
+
if isinstance(key, int):
|
|
423
|
+
# Positional argument
|
|
424
|
+
if key < 0 or key >= len(self.positional) or self.positional[key] is None:
|
|
425
|
+
if default is not _MISSING:
|
|
426
|
+
return default
|
|
427
|
+
raise IndexError(
|
|
428
|
+
f"Argument {key}: index out of range (have {len(self.positional)} positional arguments)"
|
|
429
|
+
)
|
|
430
|
+
scalar = self.positional[key]
|
|
431
|
+
assert scalar is not None # narrowed above
|
|
432
|
+
else:
|
|
433
|
+
# Named argument
|
|
434
|
+
if self.named is None or key not in self.named:
|
|
435
|
+
if default is not _MISSING:
|
|
436
|
+
return default
|
|
437
|
+
raise KeyError(f"Argument '{key}': not found")
|
|
438
|
+
scalar = self.named[key]
|
|
439
|
+
|
|
440
|
+
# Type validation (if requested)
|
|
441
|
+
if type is not None and scalar.type != type:
|
|
442
|
+
if isinstance(key, int):
|
|
443
|
+
raise TypeError(f"Argument {key}: expected {type}, got {scalar.type}")
|
|
444
|
+
else:
|
|
445
|
+
raise TypeError(f"Argument '{key}': expected {type}, got {scalar.type}")
|
|
446
|
+
|
|
447
|
+
return scalar.as_py()
|
|
448
|
+
|
|
449
|
+
def get_varargs(
|
|
450
|
+
self,
|
|
451
|
+
start: int,
|
|
452
|
+
*,
|
|
453
|
+
type: pa.DataType | None = None,
|
|
454
|
+
) -> tuple[Any, ...]:
|
|
455
|
+
"""Get all positional arguments from start position onwards.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
start: Starting positional index (inclusive).
|
|
459
|
+
type: Expected Arrow type for all values. Raises TypeError if mismatch.
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Tuple of argument values as Python objects.
|
|
463
|
+
|
|
464
|
+
"""
|
|
465
|
+
if start < 0:
|
|
466
|
+
raise ValueError(f"start must be non-negative, got {start}")
|
|
467
|
+
|
|
468
|
+
values: list[Any] = []
|
|
469
|
+
for i in range(start, len(self.positional)):
|
|
470
|
+
scalar = self.positional[i]
|
|
471
|
+
|
|
472
|
+
# Handle null values - varargs don't support nulls
|
|
473
|
+
if scalar is None or not scalar.is_valid:
|
|
474
|
+
raise ValueError(f"Argument {i}: value is null (varargs cannot contain nulls)")
|
|
475
|
+
|
|
476
|
+
# Type validation (if requested)
|
|
477
|
+
if type is not None and scalar.type != type:
|
|
478
|
+
raise TypeError(f"Argument {i}: expected {type}, got {scalar.type}")
|
|
479
|
+
|
|
480
|
+
values.append(scalar.as_py())
|
|
481
|
+
|
|
482
|
+
return tuple(values)
|
|
483
|
+
|
|
484
|
+
def encoded_dict(self) -> dict[str, "Scalar[Any] | None"]:
|
|
485
|
+
"""Convert arguments to a dictionary suitable for serialization.
|
|
486
|
+
|
|
487
|
+
Positional arguments are stored with keys "positional_0", "positional_1", etc.
|
|
488
|
+
Named arguments are stored with their actual names prefixed by "named_".
|
|
489
|
+
|
|
490
|
+
The reason why a dictionary is used is to facilitate serialization with Arrow,
|
|
491
|
+
which can easily handle flat structures, but doesn't handle variable typed
|
|
492
|
+
arrays of arbitrary objects.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Dictionary mapping argument names to their values.
|
|
496
|
+
|
|
497
|
+
"""
|
|
498
|
+
return {f"positional_{index}": value for index, value in enumerate(self.positional)} | (
|
|
499
|
+
{f"named_{name}": value for name, value in self.named.items()} if self.named else {}
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
def schema(self) -> pa.Schema:
|
|
503
|
+
"""Return Arrow schema for serializing these Arguments.
|
|
504
|
+
|
|
505
|
+
Creates a schema with one field per argument: "positional_0", "positional_1",
|
|
506
|
+
etc. for positional args, and "named_<name>" for named args. Field types
|
|
507
|
+
are taken directly from scalar values to handle Arrow extension types.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Arrow schema matching the structure returned by encoded_dict().
|
|
511
|
+
|
|
512
|
+
"""
|
|
513
|
+
args_dict = self.encoded_dict()
|
|
514
|
+
fields: list[pa.Field[Any]] = []
|
|
515
|
+
for key, scalar in args_dict.items():
|
|
516
|
+
if scalar is None:
|
|
517
|
+
fields.append(pa.field(key, pa.null()))
|
|
518
|
+
else:
|
|
519
|
+
fields.append(pa.field(key, scalar.type))
|
|
520
|
+
return pa.schema(fields)
|
|
521
|
+
|
|
522
|
+
@staticmethod
|
|
523
|
+
def decode(data: pa.StructScalar) -> "Arguments":
|
|
524
|
+
"""Decode Arguments from a serialized dictionary.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
data: Dictionary containing serialized argument fields.
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
Deserialized Arguments instance.
|
|
531
|
+
|
|
532
|
+
"""
|
|
533
|
+
positional: list[Scalar[Any] | None] = []
|
|
534
|
+
named: dict[str, Scalar[Any]] = {}
|
|
535
|
+
for key, value in data.items():
|
|
536
|
+
if key.startswith("positional_"):
|
|
537
|
+
index = int(key[len("positional_") :])
|
|
538
|
+
while len(positional) <= index:
|
|
539
|
+
positional.append(None)
|
|
540
|
+
positional[index] = value
|
|
541
|
+
elif key.startswith("named_"):
|
|
542
|
+
name = key[len("named_") :]
|
|
543
|
+
named[name] = value
|
|
544
|
+
return Arguments(positional=tuple(positional), named=named or None)
|
|
545
|
+
|
|
546
|
+
def serialize_to_bytes(self) -> bytes:
|
|
547
|
+
"""Serialize Arguments to bytes using Arrow IPC format.
|
|
548
|
+
|
|
549
|
+
Creates a single-row RecordBatch with the arguments encoded as
|
|
550
|
+
a struct column, then serializes it to IPC stream bytes.
|
|
551
|
+
|
|
552
|
+
Builds the batch with explicit types from scalar values to handle
|
|
553
|
+
Arrow extension types (e.g., HUGEINT) that ``from_pylist()`` cannot infer.
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Serialized bytes containing the Arguments.
|
|
557
|
+
|
|
558
|
+
"""
|
|
559
|
+
args_dict = self.encoded_dict()
|
|
560
|
+
fields: list[pa.Field[Any]] = []
|
|
561
|
+
arrays: list[pa.Array[Any]] = []
|
|
562
|
+
for key, scalar in args_dict.items():
|
|
563
|
+
if scalar is None:
|
|
564
|
+
fields.append(pa.field(key, pa.null()))
|
|
565
|
+
arrays.append(pa.nulls(1))
|
|
566
|
+
else:
|
|
567
|
+
fields.append(pa.field(key, scalar.type))
|
|
568
|
+
arrays.append(pa.repeat(scalar, 1)) # type: ignore[call-overload]
|
|
569
|
+
if fields:
|
|
570
|
+
struct_array: pa.StructArray = pa.StructArray.from_arrays(arrays, fields=fields)
|
|
571
|
+
else:
|
|
572
|
+
# Empty args: create a length-1 struct array with no fields
|
|
573
|
+
struct_type = pa.struct([])
|
|
574
|
+
struct_array = pa.array([{}], type=struct_type) # type: ignore[assignment]
|
|
575
|
+
batch = pa.RecordBatch.from_arrays([struct_array], names=["args"])
|
|
576
|
+
sink = pa.BufferOutputStream()
|
|
577
|
+
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
578
|
+
writer.write_batch(batch)
|
|
579
|
+
return sink.getvalue().to_pybytes()
|
|
580
|
+
|
|
581
|
+
@staticmethod
|
|
582
|
+
def deserialize_from_bytes(data: bytes, ipc_validation: Any = None) -> "Arguments":
|
|
583
|
+
"""Deserialize Arguments from bytes.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
data: Bytes serialized via serialize_to_bytes().
|
|
587
|
+
ipc_validation: Unused, accepted for compatibility with
|
|
588
|
+
ArrowSerializableDataclass._convert_value_for_deserialization.
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
Deserialized Arguments instance.
|
|
592
|
+
|
|
593
|
+
"""
|
|
594
|
+
reader = pa.ipc.open_stream(data)
|
|
595
|
+
batch = reader.read_next_batch()
|
|
596
|
+
return Arguments.decode(batch.column("args")[0])
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
class ArgumentValidationError(ValueError):
|
|
600
|
+
"""Raised when an argument fails validation.
|
|
601
|
+
|
|
602
|
+
This exception provides detailed context about what went wrong and
|
|
603
|
+
suggests how to fix the issue.
|
|
604
|
+
|
|
605
|
+
Attributes:
|
|
606
|
+
arg_name: Name of the argument that failed validation.
|
|
607
|
+
value: The invalid value that was provided.
|
|
608
|
+
constraint: Description of the constraint that was violated.
|
|
609
|
+
doc: Documentation string for the argument (if provided).
|
|
610
|
+
valid_range: Human-readable description of valid values.
|
|
611
|
+
default: Default value (if any) that could be used instead.
|
|
612
|
+
suggestions: List of valid values close to the provided value.
|
|
613
|
+
|
|
614
|
+
"""
|
|
615
|
+
|
|
616
|
+
def __init__(
|
|
617
|
+
self,
|
|
618
|
+
message: str,
|
|
619
|
+
*,
|
|
620
|
+
arg_name: str | None = None,
|
|
621
|
+
position: int | str | None = None,
|
|
622
|
+
value: Any = None,
|
|
623
|
+
constraint: str | None = None,
|
|
624
|
+
doc: str | None = None,
|
|
625
|
+
valid_range: str | None = None,
|
|
626
|
+
default: Any = _MISSING,
|
|
627
|
+
choices: Sequence[Any] | None = None,
|
|
628
|
+
) -> None:
|
|
629
|
+
"""Initialize with rich context for helpful error messages.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
message: Base error message.
|
|
633
|
+
arg_name: Attribute name of the Arg descriptor.
|
|
634
|
+
position: Positional index or named key.
|
|
635
|
+
value: The value that failed validation.
|
|
636
|
+
constraint: What constraint was violated (e.g., "must be >= 1").
|
|
637
|
+
doc: Documentation for what this argument does.
|
|
638
|
+
valid_range: Description of valid values.
|
|
639
|
+
default: Default value if any.
|
|
640
|
+
choices: List of valid choices if applicable.
|
|
641
|
+
|
|
642
|
+
"""
|
|
643
|
+
self.arg_name = arg_name
|
|
644
|
+
self.position = position
|
|
645
|
+
self.value = value
|
|
646
|
+
self.constraint = constraint
|
|
647
|
+
self.doc = doc
|
|
648
|
+
self.valid_range = valid_range
|
|
649
|
+
self.default = default
|
|
650
|
+
self.choices = choices
|
|
651
|
+
|
|
652
|
+
# Build detailed message
|
|
653
|
+
full_message = self._build_message(message)
|
|
654
|
+
super().__init__(full_message)
|
|
655
|
+
|
|
656
|
+
def _build_message(self, base_message: str) -> str:
|
|
657
|
+
"""Build a detailed, helpful error message."""
|
|
658
|
+
lines = [base_message, ""]
|
|
659
|
+
|
|
660
|
+
# Add position info
|
|
661
|
+
if self.position is not None:
|
|
662
|
+
if isinstance(self.position, int):
|
|
663
|
+
lines.append(f" Argument: positional argument {self.position}")
|
|
664
|
+
else:
|
|
665
|
+
lines.append(f" Argument: named argument '{self.position}'")
|
|
666
|
+
|
|
667
|
+
# Always show attribute name if set (helps identify where in code to fix)
|
|
668
|
+
if self.arg_name:
|
|
669
|
+
lines.append(f" Attribute: self.{self.arg_name}")
|
|
670
|
+
|
|
671
|
+
# Add value info
|
|
672
|
+
if self.value is not None:
|
|
673
|
+
lines.append(f" Value: {self.value!r}")
|
|
674
|
+
|
|
675
|
+
# Add constraint info
|
|
676
|
+
if self.constraint:
|
|
677
|
+
lines.append(f" Constraint: {self.constraint}")
|
|
678
|
+
|
|
679
|
+
# Add documentation
|
|
680
|
+
if self.doc:
|
|
681
|
+
lines.append("")
|
|
682
|
+
lines.append(f" Purpose: {self.doc}")
|
|
683
|
+
|
|
684
|
+
# Add valid range
|
|
685
|
+
if self.valid_range:
|
|
686
|
+
lines.append(f" Valid values: {self.valid_range}")
|
|
687
|
+
|
|
688
|
+
# Add suggestions for choices
|
|
689
|
+
if self.choices:
|
|
690
|
+
suggestions = self._suggest_similar_choices()
|
|
691
|
+
if suggestions:
|
|
692
|
+
lines.append("")
|
|
693
|
+
lines.append(" Did you mean:")
|
|
694
|
+
for suggestion in suggestions[:3]:
|
|
695
|
+
lines.append(f" - {suggestion!r}")
|
|
696
|
+
|
|
697
|
+
# Add default value hint
|
|
698
|
+
if self.default is not _MISSING:
|
|
699
|
+
lines.append("")
|
|
700
|
+
lines.append(f" Tip: Omit this argument to use default value: {self.default!r}")
|
|
701
|
+
|
|
702
|
+
return "\n".join(lines)
|
|
703
|
+
|
|
704
|
+
def _suggest_similar_choices(self) -> list[Any]:
|
|
705
|
+
"""Find choices similar to the provided value."""
|
|
706
|
+
if not self.choices or self.value is None:
|
|
707
|
+
return []
|
|
708
|
+
|
|
709
|
+
# For strings, find similar by edit distance or prefix
|
|
710
|
+
if isinstance(self.value, str):
|
|
711
|
+
value_lower = self.value.lower()
|
|
712
|
+
scored: list[tuple[int, Any]] = []
|
|
713
|
+
|
|
714
|
+
for choice in self.choices:
|
|
715
|
+
if isinstance(choice, str):
|
|
716
|
+
choice_lower = choice.lower()
|
|
717
|
+
# Prioritize prefix matches
|
|
718
|
+
if choice_lower.startswith(value_lower):
|
|
719
|
+
scored.append((0, choice))
|
|
720
|
+
elif value_lower.startswith(choice_lower):
|
|
721
|
+
scored.append((1, choice))
|
|
722
|
+
# Then substring matches
|
|
723
|
+
elif value_lower in choice_lower or choice_lower in value_lower:
|
|
724
|
+
scored.append((2, choice))
|
|
725
|
+
else:
|
|
726
|
+
# Simple character overlap score
|
|
727
|
+
overlap = len(set(value_lower) & set(choice_lower))
|
|
728
|
+
if overlap > len(value_lower) // 2:
|
|
729
|
+
scored.append((10 - overlap, choice))
|
|
730
|
+
|
|
731
|
+
scored.sort(key=lambda x: x[0])
|
|
732
|
+
return [choice for _, choice in scored]
|
|
733
|
+
|
|
734
|
+
# For numbers, find closest values
|
|
735
|
+
if isinstance(self.value, int | float):
|
|
736
|
+
try:
|
|
737
|
+
numeric_choices = [c for c in self.choices if isinstance(c, int | float)]
|
|
738
|
+
numeric_choices.sort(key=lambda c: abs(c - self.value))
|
|
739
|
+
return numeric_choices
|
|
740
|
+
except TypeError:
|
|
741
|
+
pass
|
|
742
|
+
|
|
743
|
+
return list(self.choices)
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
# TypeVar for Arg generic type
|
|
747
|
+
ArgT = TypeVar("ArgT")
|
|
748
|
+
|
|
749
|
+
# Type alias for type bound predicates (e.g., pa.types.is_integer)
|
|
750
|
+
TypeBoundPredicate = Callable[[pa.DataType], bool]
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
class _ArgFactory:
|
|
754
|
+
"""Factory returned by Arg[type] to capture the type parameter.
|
|
755
|
+
|
|
756
|
+
This allows Arg[str](0) to create an Arg instance with _type_param=str,
|
|
757
|
+
which can be used by extract_argument_specs to infer the Arrow type.
|
|
758
|
+
"""
|
|
759
|
+
|
|
760
|
+
__slots__ = ("_type_param",)
|
|
761
|
+
|
|
762
|
+
def __init__(self, type_param: type) -> None:
|
|
763
|
+
self._type_param = type_param
|
|
764
|
+
|
|
765
|
+
def __call__(
|
|
766
|
+
self,
|
|
767
|
+
position: int | str,
|
|
768
|
+
*,
|
|
769
|
+
default: Any = _MISSING,
|
|
770
|
+
doc: str = "",
|
|
771
|
+
ge: float | int | None = None,
|
|
772
|
+
le: float | int | None = None,
|
|
773
|
+
gt: float | int | None = None,
|
|
774
|
+
lt: float | int | None = None,
|
|
775
|
+
choices: Sequence[Any] | None = None,
|
|
776
|
+
pattern: str | None = None,
|
|
777
|
+
varargs: bool = False,
|
|
778
|
+
arrow_type: pa.DataType | None = None,
|
|
779
|
+
type_bound: "TypeBoundPredicate | Sequence[TypeBoundPredicate] | None" = None,
|
|
780
|
+
const: bool = False,
|
|
781
|
+
is_any: bool = False,
|
|
782
|
+
) -> "Arg[Any]":
|
|
783
|
+
"""Create an Arg instance with the captured type parameter."""
|
|
784
|
+
arg: Arg[Any] = Arg.__new__(Arg)
|
|
785
|
+
# Manually call __init__ logic since we're using __new__
|
|
786
|
+
# Validate constraint combinations
|
|
787
|
+
if ge is not None and gt is not None:
|
|
788
|
+
raise ValueError("Cannot specify both 'ge' and 'gt'")
|
|
789
|
+
if le is not None and lt is not None:
|
|
790
|
+
raise ValueError("Cannot specify both 'le' and 'lt'")
|
|
791
|
+
if varargs:
|
|
792
|
+
if isinstance(position, str):
|
|
793
|
+
raise ValueError("varargs=True requires a positional argument (int), not named")
|
|
794
|
+
if default is not _MISSING:
|
|
795
|
+
raise ValueError("varargs=True cannot have a default value (requires at least 1 value)")
|
|
796
|
+
|
|
797
|
+
# Positional args cannot have defaults — DuckDB's binder always
|
|
798
|
+
# requires the positional argument, so the default would never fire.
|
|
799
|
+
# To make an argument optional, use a named argument (string position).
|
|
800
|
+
if isinstance(position, int) and default is not _MISSING:
|
|
801
|
+
raise ValueError(
|
|
802
|
+
f"Arg(position={position}, default=...): positional arguments cannot "
|
|
803
|
+
f"have a default value. DuckDB's binder always requires the positional "
|
|
804
|
+
f"argument, so the default would never fire. To make this argument "
|
|
805
|
+
f'optional, use a named argument: Arg("{{name}}", default=...).'
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
# Warn if type_bound is used with non-AnyArrow type
|
|
809
|
+
# Check both _type_param (legacy API) and is_any (new Param API)
|
|
810
|
+
if type_bound is not None and self._type_param is not AnyArrow and not is_any:
|
|
811
|
+
type_name = getattr(self._type_param, "__name__", str(self._type_param))
|
|
812
|
+
warnings.warn(
|
|
813
|
+
f"type_bound is only meaningful for Arg[AnyArrow], but was specified for Arg[{type_name}]",
|
|
814
|
+
UserWarning,
|
|
815
|
+
stacklevel=2,
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
arg.position = position
|
|
819
|
+
arg.default = default
|
|
820
|
+
arg.doc = doc
|
|
821
|
+
arg.ge = ge
|
|
822
|
+
arg.le = le
|
|
823
|
+
arg.gt = gt
|
|
824
|
+
arg.lt = lt
|
|
825
|
+
arg.choices = choices
|
|
826
|
+
arg.pattern = pattern
|
|
827
|
+
arg.varargs = varargs
|
|
828
|
+
arg.arrow_type = arrow_type
|
|
829
|
+
arg.type_bound = type_bound
|
|
830
|
+
arg.const = const
|
|
831
|
+
arg.is_any = is_any
|
|
832
|
+
arg._name = None
|
|
833
|
+
arg._compiled_pattern = None
|
|
834
|
+
arg._type_param = self._type_param
|
|
835
|
+
# Set based on legacy Arg[AnyArrow] pattern
|
|
836
|
+
arg._returns_any_arrow_value = self._type_param is AnyArrow
|
|
837
|
+
# Resolution index for value lookup (may differ from position for const params)
|
|
838
|
+
arg._resolution_index = None
|
|
839
|
+
|
|
840
|
+
if pattern is not None:
|
|
841
|
+
arg._compiled_pattern = re.compile(pattern)
|
|
842
|
+
|
|
843
|
+
return arg
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
class Arg[ArgT]:
|
|
847
|
+
"""Descriptor for declarative argument parsing with optional validation.
|
|
848
|
+
|
|
849
|
+
Use as a class attribute to declare function arguments that are automatically
|
|
850
|
+
parsed from self.arguments when accessed. This eliminates the need to override
|
|
851
|
+
__init__ for simple argument parsing.
|
|
852
|
+
|
|
853
|
+
Attributes:
|
|
854
|
+
position: Positional index (int) or named key (str).
|
|
855
|
+
default: Default value if argument not provided. Omit for required arguments.
|
|
856
|
+
doc: Documentation string for this argument.
|
|
857
|
+
ge: Value must be >= this (for numeric types).
|
|
858
|
+
le: Value must be <= this (for numeric types).
|
|
859
|
+
gt: Value must be > this (for numeric types).
|
|
860
|
+
lt: Value must be < this (for numeric types).
|
|
861
|
+
choices: Value must be one of these options.
|
|
862
|
+
pattern: Value must match this regex pattern (for strings).
|
|
863
|
+
|
|
864
|
+
Note:
|
|
865
|
+
For named arguments (string position), the Python attribute name should
|
|
866
|
+
match the SQL key. This is the standard convention::
|
|
867
|
+
|
|
868
|
+
format = Arg[str]("format") # Recommended: attribute == key
|
|
869
|
+
|
|
870
|
+
Avoid using different names::
|
|
871
|
+
|
|
872
|
+
output_format = Arg[str]("format") # Not recommended
|
|
873
|
+
|
|
874
|
+
While this works at runtime, it can cause issues with metadata
|
|
875
|
+
serialization where only one name is preserved.
|
|
876
|
+
|
|
877
|
+
"""
|
|
878
|
+
|
|
879
|
+
__slots__ = (
|
|
880
|
+
"position",
|
|
881
|
+
"default",
|
|
882
|
+
"doc",
|
|
883
|
+
"ge",
|
|
884
|
+
"le",
|
|
885
|
+
"gt",
|
|
886
|
+
"lt",
|
|
887
|
+
"choices",
|
|
888
|
+
"pattern",
|
|
889
|
+
"varargs",
|
|
890
|
+
"arrow_type",
|
|
891
|
+
"type_bound",
|
|
892
|
+
"const",
|
|
893
|
+
"is_any",
|
|
894
|
+
"_name",
|
|
895
|
+
"_compiled_pattern",
|
|
896
|
+
"_type_param",
|
|
897
|
+
"_returns_any_arrow_value",
|
|
898
|
+
"_resolution_index",
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
def __init__(
|
|
902
|
+
self,
|
|
903
|
+
position: int | str,
|
|
904
|
+
*,
|
|
905
|
+
default: ArgT | Any = _MISSING,
|
|
906
|
+
doc: str = "",
|
|
907
|
+
ge: float | int | None = None,
|
|
908
|
+
le: float | int | None = None,
|
|
909
|
+
gt: float | int | None = None,
|
|
910
|
+
lt: float | int | None = None,
|
|
911
|
+
choices: Sequence[ArgT] | None = None,
|
|
912
|
+
pattern: str | None = None,
|
|
913
|
+
varargs: bool = False,
|
|
914
|
+
arrow_type: pa.DataType | None = None,
|
|
915
|
+
type_bound: "TypeBoundPredicate | Sequence[TypeBoundPredicate] | None" = None,
|
|
916
|
+
const: bool = False,
|
|
917
|
+
is_any: bool = False,
|
|
918
|
+
) -> None:
|
|
919
|
+
"""Initialize an Arg descriptor with optional validation.
|
|
920
|
+
|
|
921
|
+
Args:
|
|
922
|
+
position: Positional index (int) or named key (str). Positional
|
|
923
|
+
arguments are always required (DuckDB's binder always supplies
|
|
924
|
+
them); to make an argument optional, pass a string key instead.
|
|
925
|
+
default: Default value if argument not provided. Only valid for
|
|
926
|
+
named (string-position) arguments — passing a default with an
|
|
927
|
+
integer position raises ValueError. Omit for required.
|
|
928
|
+
doc: Documentation string for this argument.
|
|
929
|
+
ge: Minimum value (inclusive). Value must be >= this.
|
|
930
|
+
le: Maximum value (inclusive). Value must be <= this.
|
|
931
|
+
gt: Minimum value (exclusive). Value must be > this.
|
|
932
|
+
lt: Maximum value (exclusive). Value must be < this.
|
|
933
|
+
choices: Allowed values. Value must be one of these.
|
|
934
|
+
pattern: Regex pattern for string validation.
|
|
935
|
+
varargs: If True, collect all remaining positional arguments from this
|
|
936
|
+
position onwards. Returns tuple[ArgT, ...]. Requires at least 1 value.
|
|
937
|
+
Must be positional (not named).
|
|
938
|
+
arrow_type: Explicit Arrow type for this argument. If not provided,
|
|
939
|
+
type is inferred from the type hint using PYTHON_TO_ARROW.
|
|
940
|
+
type_bound: Type predicate(s) for Arg[AnyArrow] column type validation.
|
|
941
|
+
Accepts a single predicate (e.g., pa.types.is_integer) or a sequence
|
|
942
|
+
of predicates where any match is valid (OR logic). Only meaningful
|
|
943
|
+
for Arg[AnyArrow] arguments; issues a warning if used with other types.
|
|
944
|
+
const: If True, marks this argument as constant-folded (ConstParam).
|
|
945
|
+
Constant arguments have their values known at planning time.
|
|
946
|
+
is_any: If True, indicates this argument accepts any Arrow type (AnyArrow).
|
|
947
|
+
Used for tracking when AnyArrow was specified in the type hint.
|
|
948
|
+
|
|
949
|
+
Raises:
|
|
950
|
+
ValueError: If conflicting constraints are specified (e.g., ge and gt),
|
|
951
|
+
or if a default value is supplied with an integer (positional)
|
|
952
|
+
position.
|
|
953
|
+
|
|
954
|
+
"""
|
|
955
|
+
# Validate constraint combinations
|
|
956
|
+
if ge is not None and gt is not None:
|
|
957
|
+
raise ValueError("Cannot specify both 'ge' and 'gt'")
|
|
958
|
+
if le is not None and lt is not None:
|
|
959
|
+
raise ValueError("Cannot specify both 'le' and 'lt'")
|
|
960
|
+
|
|
961
|
+
# Validate varargs constraints
|
|
962
|
+
if varargs:
|
|
963
|
+
if isinstance(position, str):
|
|
964
|
+
raise ValueError("varargs=True requires a positional argument (int), not named")
|
|
965
|
+
if default is not _MISSING:
|
|
966
|
+
raise ValueError("varargs=True cannot have a default value (requires at least 1 value)")
|
|
967
|
+
|
|
968
|
+
# Positional args cannot have defaults. DuckDB's binder always requires
|
|
969
|
+
# a positional argument to be supplied; the default is never consulted
|
|
970
|
+
# through the SQL path. To make an argument optional, declare it as a
|
|
971
|
+
# named argument by passing a string position (e.g. Arg("count", default=10)).
|
|
972
|
+
if isinstance(position, int) and default is not _MISSING:
|
|
973
|
+
raise ValueError(
|
|
974
|
+
f"Arg(position={position}, default=...): positional arguments cannot "
|
|
975
|
+
f"have a default value. DuckDB's binder always requires the positional "
|
|
976
|
+
f"argument, so the default would never fire. To make this argument "
|
|
977
|
+
f'optional, use a named argument: Arg("{{name}}", default=...).'
|
|
978
|
+
)
|
|
979
|
+
|
|
980
|
+
self.position = position
|
|
981
|
+
self.default = default
|
|
982
|
+
self.doc = doc
|
|
983
|
+
self.ge = ge
|
|
984
|
+
self.le = le
|
|
985
|
+
self.gt = gt
|
|
986
|
+
self.lt = lt
|
|
987
|
+
self.choices = choices
|
|
988
|
+
self.pattern = pattern
|
|
989
|
+
self.varargs = varargs
|
|
990
|
+
self.arrow_type = arrow_type
|
|
991
|
+
self.type_bound = type_bound
|
|
992
|
+
self.const = const
|
|
993
|
+
self.is_any = is_any
|
|
994
|
+
self._name: str | None = None
|
|
995
|
+
self._compiled_pattern: re.Pattern[str] | None = None
|
|
996
|
+
self._type_param: type | None = None
|
|
997
|
+
# Set by __init_subclass__ when using Annotated[AnyArrowValue, Arg(...)]
|
|
998
|
+
self._returns_any_arrow_value: bool = False
|
|
999
|
+
# Resolution index for value lookup (may differ from position for const params)
|
|
1000
|
+
# When set, _resolve() uses this instead of position for Arguments.get()
|
|
1001
|
+
self._resolution_index: int | None = None
|
|
1002
|
+
|
|
1003
|
+
# Pre-compile pattern for efficiency
|
|
1004
|
+
if pattern is not None:
|
|
1005
|
+
self._compiled_pattern = re.compile(pattern)
|
|
1006
|
+
|
|
1007
|
+
def __class_getitem__(cls, item: type) -> "_ArgFactory":
|
|
1008
|
+
"""Support Arg[type] syntax to capture the type parameter at runtime.
|
|
1009
|
+
|
|
1010
|
+
When you write Arg[str](0), this method is called first with item=str,
|
|
1011
|
+
and returns an _ArgFactory that will create Arg instances with
|
|
1012
|
+
_type_param set to str.
|
|
1013
|
+
"""
|
|
1014
|
+
return _ArgFactory(item)
|
|
1015
|
+
|
|
1016
|
+
def __set_name__(self, owner: type, name: str) -> None:
|
|
1017
|
+
"""Store the attribute name when assigned to a class."""
|
|
1018
|
+
self._name = name
|
|
1019
|
+
|
|
1020
|
+
@overload
|
|
1021
|
+
def __get__(self, obj: None, objtype: type) -> "Arg[ArgT]": ...
|
|
1022
|
+
|
|
1023
|
+
@overload
|
|
1024
|
+
def __get__(self, obj: object, objtype: type | None = None) -> ArgT: ...
|
|
1025
|
+
|
|
1026
|
+
def __get__(self, obj: object | None, objtype: type | None = None) -> "Arg[ArgT] | ArgT":
|
|
1027
|
+
"""Get the argument value, parsing and caching on first access."""
|
|
1028
|
+
if obj is None:
|
|
1029
|
+
return self # Class-level access returns descriptor
|
|
1030
|
+
|
|
1031
|
+
# Instance access - parse and cache
|
|
1032
|
+
if self._name is None:
|
|
1033
|
+
raise RuntimeError(
|
|
1034
|
+
"Arg descriptor was not properly initialized. "
|
|
1035
|
+
"This typically means the descriptor was accessed before __set_name__ "
|
|
1036
|
+
"was called. Ensure Arg is used as a class attribute, not instantiated "
|
|
1037
|
+
"dynamically."
|
|
1038
|
+
)
|
|
1039
|
+
|
|
1040
|
+
if self._name not in obj.__dict__:
|
|
1041
|
+
obj.__dict__[self._name] = self._resolve(obj)
|
|
1042
|
+
return obj.__dict__[self._name] # type: ignore[no-any-return]
|
|
1043
|
+
|
|
1044
|
+
def _resolve(self, obj: object) -> ArgT:
|
|
1045
|
+
"""Parse argument from obj.invocation.arguments and validate."""
|
|
1046
|
+
invocation = getattr(obj, "invocation", None)
|
|
1047
|
+
if invocation is None:
|
|
1048
|
+
raise RuntimeError(
|
|
1049
|
+
f"Cannot resolve Arg '{self._name}': object {type(obj).__name__} does "
|
|
1050
|
+
f"not have an 'invocation' attribute. Arg descriptors can only be used "
|
|
1051
|
+
f"on classes that have an 'invocation' attribute (e.g., "
|
|
1052
|
+
f"TableInOutFunction, TableFunctionGenerator)."
|
|
1053
|
+
)
|
|
1054
|
+
arguments = invocation.arguments
|
|
1055
|
+
|
|
1056
|
+
# Use _resolution_index if set (for const params with separate tracking)
|
|
1057
|
+
# Otherwise fall back to position
|
|
1058
|
+
lookup_pos: int | str = self._resolution_index if self._resolution_index is not None else self.position
|
|
1059
|
+
|
|
1060
|
+
if self.varargs:
|
|
1061
|
+
# Collect all positional arguments from this position onwards
|
|
1062
|
+
# position is guaranteed to be int (validated in __init__)
|
|
1063
|
+
assert isinstance(lookup_pos, int) # Validated in __init__
|
|
1064
|
+
values = arguments.get_varargs(lookup_pos)
|
|
1065
|
+
if len(values) == 0:
|
|
1066
|
+
raise ArgumentValidationError(
|
|
1067
|
+
f"Argument '{self._name}' requires at least 1 value.",
|
|
1068
|
+
arg_name=self._name,
|
|
1069
|
+
position=self.position,
|
|
1070
|
+
constraint="varargs requires at least 1 value",
|
|
1071
|
+
doc=self.doc if self.doc else None,
|
|
1072
|
+
)
|
|
1073
|
+
# Validate each element
|
|
1074
|
+
for i, val in enumerate(values):
|
|
1075
|
+
self._validate_single(val, index=i)
|
|
1076
|
+
return values # type: ignore[no-any-return] # varargs returns tuple
|
|
1077
|
+
|
|
1078
|
+
if self.default is _MISSING:
|
|
1079
|
+
value: ArgT = arguments.get(lookup_pos)
|
|
1080
|
+
else:
|
|
1081
|
+
value = arguments.get(lookup_pos, default=self.default)
|
|
1082
|
+
|
|
1083
|
+
# Skip validation for None — either an explicit SQL NULL the caller
|
|
1084
|
+
# passed, or default=None for a nullable Arg. Numeric/choice/pattern
|
|
1085
|
+
# constraints don't apply to None and would otherwise TypeError.
|
|
1086
|
+
if value is not None:
|
|
1087
|
+
self._validate(value)
|
|
1088
|
+
|
|
1089
|
+
# Wrap AnyArrow values with metadata for schema lookups
|
|
1090
|
+
if self._returns_any_arrow_value:
|
|
1091
|
+
assert self._name is not None # Set by __set_name__
|
|
1092
|
+
return AnyArrowValue(value, self.position, self._name) # type: ignore[return-value]
|
|
1093
|
+
|
|
1094
|
+
return value
|
|
1095
|
+
|
|
1096
|
+
def _describe_valid_range(self) -> str | None:
|
|
1097
|
+
"""Build a human-readable description of valid values."""
|
|
1098
|
+
parts = []
|
|
1099
|
+
|
|
1100
|
+
# Numeric bounds
|
|
1101
|
+
if self.ge is not None:
|
|
1102
|
+
parts.append(f">= {self.ge}")
|
|
1103
|
+
if self.gt is not None:
|
|
1104
|
+
parts.append(f"> {self.gt}")
|
|
1105
|
+
if self.le is not None:
|
|
1106
|
+
parts.append(f"<= {self.le}")
|
|
1107
|
+
if self.lt is not None:
|
|
1108
|
+
parts.append(f"< {self.lt}")
|
|
1109
|
+
|
|
1110
|
+
if parts:
|
|
1111
|
+
# Format as range if we have both bounds
|
|
1112
|
+
if len(parts) == 2:
|
|
1113
|
+
lower = parts[0]
|
|
1114
|
+
upper = parts[1]
|
|
1115
|
+
return f"{lower} and {upper}"
|
|
1116
|
+
return " and ".join(parts)
|
|
1117
|
+
|
|
1118
|
+
# Choices
|
|
1119
|
+
if self.choices is not None:
|
|
1120
|
+
if len(self.choices) <= 5:
|
|
1121
|
+
return ", ".join(repr(c) for c in self.choices)
|
|
1122
|
+
else:
|
|
1123
|
+
shown = ", ".join(repr(c) for c in list(self.choices)[:4])
|
|
1124
|
+
return f"{shown}, ... ({len(self.choices)} total options)"
|
|
1125
|
+
|
|
1126
|
+
# Pattern
|
|
1127
|
+
if self.pattern is not None:
|
|
1128
|
+
return f"string matching pattern: {self.pattern}"
|
|
1129
|
+
|
|
1130
|
+
return None
|
|
1131
|
+
|
|
1132
|
+
def _reject_none(self) -> "ArgumentValidationError":
|
|
1133
|
+
"""Build the error raised when SQL NULL is passed to a non-Optional Arg.
|
|
1134
|
+
|
|
1135
|
+
Callers ``_parse_arguments`` (table_function.py) and ``_resolve``
|
|
1136
|
+
(this module) hit ``_validate`` with a None value when the user
|
|
1137
|
+
wrote e.g. ``my_func(NULL)``. ``_validate``'s numeric/choice/pattern
|
|
1138
|
+
comparisons would then crash with a Python ``TypeError`` deep in
|
|
1139
|
+
the worker — which surfaces in the C++ extension as an opaque
|
|
1140
|
+
traceback rather than a clean argument error. Callers use this
|
|
1141
|
+
helper to emit a structured error before reaching ``_validate``.
|
|
1142
|
+
"""
|
|
1143
|
+
arg_name = self._name or str(self.position)
|
|
1144
|
+
return ArgumentValidationError(
|
|
1145
|
+
f"Argument '{arg_name}' cannot be NULL.",
|
|
1146
|
+
arg_name=self._name,
|
|
1147
|
+
position=self.position,
|
|
1148
|
+
value=None,
|
|
1149
|
+
constraint="must not be NULL (declare type as `T | None` to accept SQL NULL)",
|
|
1150
|
+
doc=self.doc if self.doc else None,
|
|
1151
|
+
valid_range=self._describe_valid_range(),
|
|
1152
|
+
default=self.default,
|
|
1153
|
+
)
|
|
1154
|
+
|
|
1155
|
+
def _validate(self, value: ArgT) -> None:
|
|
1156
|
+
"""Validate value against all constraints.
|
|
1157
|
+
|
|
1158
|
+
Args:
|
|
1159
|
+
value: The value to validate.
|
|
1160
|
+
|
|
1161
|
+
Raises:
|
|
1162
|
+
ArgumentValidationError: If any constraint is violated.
|
|
1163
|
+
|
|
1164
|
+
"""
|
|
1165
|
+
arg_name = self._name or str(self.position)
|
|
1166
|
+
valid_range = self._describe_valid_range()
|
|
1167
|
+
|
|
1168
|
+
# Numeric range validation
|
|
1169
|
+
# Note: type: ignore needed because ArgT is generic - comparisons only valid
|
|
1170
|
+
# for numeric types, but we can't express "ArgT when constraints are set"
|
|
1171
|
+
if self.ge is not None and value < self.ge: # type: ignore[operator]
|
|
1172
|
+
raise ArgumentValidationError(
|
|
1173
|
+
f"Argument '{arg_name}' is too small.",
|
|
1174
|
+
arg_name=self._name,
|
|
1175
|
+
position=self.position,
|
|
1176
|
+
value=value,
|
|
1177
|
+
constraint=f"must be >= {self.ge}",
|
|
1178
|
+
doc=self.doc if self.doc else None,
|
|
1179
|
+
valid_range=valid_range,
|
|
1180
|
+
default=self.default,
|
|
1181
|
+
)
|
|
1182
|
+
|
|
1183
|
+
if self.le is not None and value > self.le: # type: ignore[operator]
|
|
1184
|
+
raise ArgumentValidationError(
|
|
1185
|
+
f"Argument '{arg_name}' is too large.",
|
|
1186
|
+
arg_name=self._name,
|
|
1187
|
+
position=self.position,
|
|
1188
|
+
value=value,
|
|
1189
|
+
constraint=f"must be <= {self.le}",
|
|
1190
|
+
doc=self.doc if self.doc else None,
|
|
1191
|
+
valid_range=valid_range,
|
|
1192
|
+
default=self.default,
|
|
1193
|
+
)
|
|
1194
|
+
|
|
1195
|
+
if self.gt is not None and value <= self.gt: # type: ignore[operator]
|
|
1196
|
+
raise ArgumentValidationError(
|
|
1197
|
+
f"Argument '{arg_name}' is too small.",
|
|
1198
|
+
arg_name=self._name,
|
|
1199
|
+
position=self.position,
|
|
1200
|
+
value=value,
|
|
1201
|
+
constraint=f"must be > {self.gt}",
|
|
1202
|
+
doc=self.doc if self.doc else None,
|
|
1203
|
+
valid_range=valid_range,
|
|
1204
|
+
default=self.default,
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
if self.lt is not None and value >= self.lt: # type: ignore[operator]
|
|
1208
|
+
raise ArgumentValidationError(
|
|
1209
|
+
f"Argument '{arg_name}' is too large.",
|
|
1210
|
+
arg_name=self._name,
|
|
1211
|
+
position=self.position,
|
|
1212
|
+
value=value,
|
|
1213
|
+
constraint=f"must be < {self.lt}",
|
|
1214
|
+
doc=self.doc if self.doc else None,
|
|
1215
|
+
valid_range=valid_range,
|
|
1216
|
+
default=self.default,
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
# Choices validation
|
|
1220
|
+
if self.choices is not None and value not in self.choices:
|
|
1221
|
+
raise ArgumentValidationError(
|
|
1222
|
+
f"Argument '{arg_name}' has an invalid value.",
|
|
1223
|
+
arg_name=self._name,
|
|
1224
|
+
position=self.position,
|
|
1225
|
+
value=value,
|
|
1226
|
+
constraint="must be one of the allowed choices",
|
|
1227
|
+
doc=self.doc if self.doc else None,
|
|
1228
|
+
valid_range=valid_range,
|
|
1229
|
+
default=self.default,
|
|
1230
|
+
choices=self.choices,
|
|
1231
|
+
)
|
|
1232
|
+
|
|
1233
|
+
# Pattern validation (for strings)
|
|
1234
|
+
if self._compiled_pattern is not None:
|
|
1235
|
+
if not isinstance(value, str):
|
|
1236
|
+
raise ArgumentValidationError(
|
|
1237
|
+
f"Argument '{arg_name}' must be a string for pattern validation.",
|
|
1238
|
+
arg_name=self._name,
|
|
1239
|
+
position=self.position,
|
|
1240
|
+
value=value,
|
|
1241
|
+
constraint=f"must be a string matching pattern '{self.pattern}'",
|
|
1242
|
+
doc=self.doc if self.doc else None,
|
|
1243
|
+
valid_range=valid_range,
|
|
1244
|
+
default=self.default,
|
|
1245
|
+
)
|
|
1246
|
+
if not self._compiled_pattern.match(value):
|
|
1247
|
+
raise ArgumentValidationError(
|
|
1248
|
+
f"Argument '{arg_name}' does not match the required pattern.",
|
|
1249
|
+
arg_name=self._name,
|
|
1250
|
+
position=self.position,
|
|
1251
|
+
value=value,
|
|
1252
|
+
constraint=f"must match pattern '{self.pattern}'",
|
|
1253
|
+
doc=self.doc if self.doc else None,
|
|
1254
|
+
valid_range=valid_range,
|
|
1255
|
+
default=self.default,
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
def _validate_single(self, value: Any, *, index: int) -> None:
|
|
1259
|
+
"""Validate a single value from varargs against all constraints.
|
|
1260
|
+
|
|
1261
|
+
Args:
|
|
1262
|
+
value: The value to validate.
|
|
1263
|
+
index: Index within the varargs tuple (for error messages).
|
|
1264
|
+
|
|
1265
|
+
Raises:
|
|
1266
|
+
ArgumentValidationError: If any constraint is violated.
|
|
1267
|
+
|
|
1268
|
+
"""
|
|
1269
|
+
arg_name = self._name or str(self.position)
|
|
1270
|
+
valid_range = self._describe_valid_range()
|
|
1271
|
+
display_pos = f"{self.position}[{index}]"
|
|
1272
|
+
|
|
1273
|
+
# Numeric range validation
|
|
1274
|
+
if self.ge is not None and value < self.ge:
|
|
1275
|
+
raise ArgumentValidationError(
|
|
1276
|
+
f"Argument '{arg_name}' element {index} is too small.",
|
|
1277
|
+
arg_name=self._name,
|
|
1278
|
+
position=display_pos,
|
|
1279
|
+
value=value,
|
|
1280
|
+
constraint=f"must be >= {self.ge}",
|
|
1281
|
+
doc=self.doc if self.doc else None,
|
|
1282
|
+
valid_range=valid_range,
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1285
|
+
if self.le is not None and value > self.le:
|
|
1286
|
+
raise ArgumentValidationError(
|
|
1287
|
+
f"Argument '{arg_name}' element {index} is too large.",
|
|
1288
|
+
arg_name=self._name,
|
|
1289
|
+
position=display_pos,
|
|
1290
|
+
value=value,
|
|
1291
|
+
constraint=f"must be <= {self.le}",
|
|
1292
|
+
doc=self.doc if self.doc else None,
|
|
1293
|
+
valid_range=valid_range,
|
|
1294
|
+
)
|
|
1295
|
+
|
|
1296
|
+
if self.gt is not None and value <= self.gt:
|
|
1297
|
+
raise ArgumentValidationError(
|
|
1298
|
+
f"Argument '{arg_name}' element {index} is too small.",
|
|
1299
|
+
arg_name=self._name,
|
|
1300
|
+
position=display_pos,
|
|
1301
|
+
value=value,
|
|
1302
|
+
constraint=f"must be > {self.gt}",
|
|
1303
|
+
doc=self.doc if self.doc else None,
|
|
1304
|
+
valid_range=valid_range,
|
|
1305
|
+
)
|
|
1306
|
+
|
|
1307
|
+
if self.lt is not None and value >= self.lt:
|
|
1308
|
+
raise ArgumentValidationError(
|
|
1309
|
+
f"Argument '{arg_name}' element {index} is too large.",
|
|
1310
|
+
arg_name=self._name,
|
|
1311
|
+
position=display_pos,
|
|
1312
|
+
value=value,
|
|
1313
|
+
constraint=f"must be < {self.lt}",
|
|
1314
|
+
doc=self.doc if self.doc else None,
|
|
1315
|
+
valid_range=valid_range,
|
|
1316
|
+
)
|
|
1317
|
+
|
|
1318
|
+
# Choices validation
|
|
1319
|
+
if self.choices is not None and value not in self.choices:
|
|
1320
|
+
raise ArgumentValidationError(
|
|
1321
|
+
f"Argument '{arg_name}' element {index} has an invalid value.",
|
|
1322
|
+
arg_name=self._name,
|
|
1323
|
+
position=display_pos,
|
|
1324
|
+
value=value,
|
|
1325
|
+
constraint="must be one of the allowed choices",
|
|
1326
|
+
doc=self.doc if self.doc else None,
|
|
1327
|
+
valid_range=valid_range,
|
|
1328
|
+
choices=self.choices,
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
# Pattern validation (for strings)
|
|
1332
|
+
if self._compiled_pattern is not None:
|
|
1333
|
+
if not isinstance(value, str):
|
|
1334
|
+
raise ArgumentValidationError(
|
|
1335
|
+
f"Argument '{arg_name}' element {index} must be a string.",
|
|
1336
|
+
arg_name=self._name,
|
|
1337
|
+
position=display_pos,
|
|
1338
|
+
value=value,
|
|
1339
|
+
constraint=f"must be a string matching pattern '{self.pattern}'",
|
|
1340
|
+
doc=self.doc if self.doc else None,
|
|
1341
|
+
valid_range=valid_range,
|
|
1342
|
+
)
|
|
1343
|
+
if not self._compiled_pattern.match(value):
|
|
1344
|
+
raise ArgumentValidationError(
|
|
1345
|
+
f"Argument '{arg_name}' element {index} does not match pattern.",
|
|
1346
|
+
arg_name=self._name,
|
|
1347
|
+
position=display_pos,
|
|
1348
|
+
value=value,
|
|
1349
|
+
constraint=f"must match pattern '{self.pattern}'",
|
|
1350
|
+
doc=self.doc if self.doc else None,
|
|
1351
|
+
valid_range=valid_range,
|
|
1352
|
+
)
|
|
1353
|
+
|
|
1354
|
+
def format_error(self, message: str) -> str:
|
|
1355
|
+
"""Format an error message with argument context.
|
|
1356
|
+
|
|
1357
|
+
Use this method when performing custom validation to produce
|
|
1358
|
+
error messages that include the argument's position and name.
|
|
1359
|
+
|
|
1360
|
+
Args:
|
|
1361
|
+
message: The error message describing what went wrong.
|
|
1362
|
+
|
|
1363
|
+
Returns:
|
|
1364
|
+
Formatted error message prefixed with argument context.
|
|
1365
|
+
|
|
1366
|
+
"""
|
|
1367
|
+
# Use the attribute name if available
|
|
1368
|
+
name = self._name or str(self.position)
|
|
1369
|
+
return f"Argument '{name}': {message}"
|
|
1370
|
+
|
|
1371
|
+
def validate_type_bound(self, field_type: pa.DataType) -> None:
|
|
1372
|
+
"""Validate that the field type satisfies the type bound predicate(s).
|
|
1373
|
+
|
|
1374
|
+
This method is called during function initialization for Arg[AnyArrow]
|
|
1375
|
+
arguments that have type_bound specified.
|
|
1376
|
+
|
|
1377
|
+
If multiple predicates are provided, uses OR logic (any match is valid).
|
|
1378
|
+
|
|
1379
|
+
Args:
|
|
1380
|
+
field_type: The Arrow type of the column to validate.
|
|
1381
|
+
|
|
1382
|
+
Raises:
|
|
1383
|
+
SchemaValidationError: If the type bound is not satisfied.
|
|
1384
|
+
|
|
1385
|
+
"""
|
|
1386
|
+
from vgi.exceptions import SchemaValidationError
|
|
1387
|
+
|
|
1388
|
+
if self.type_bound is None:
|
|
1389
|
+
return
|
|
1390
|
+
|
|
1391
|
+
# Normalize to sequence
|
|
1392
|
+
if callable(self.type_bound):
|
|
1393
|
+
predicates: list[TypeBoundPredicate] = [self.type_bound]
|
|
1394
|
+
else:
|
|
1395
|
+
predicates = list(self.type_bound)
|
|
1396
|
+
|
|
1397
|
+
# OR logic: at least one predicate must pass
|
|
1398
|
+
if not any(predicate(field_type) for predicate in predicates):
|
|
1399
|
+
predicate_names = [getattr(p, "__name__", str(p)) for p in predicates]
|
|
1400
|
+
raise SchemaValidationError(
|
|
1401
|
+
self.format_error(f"column type {field_type} does not match any of: {', '.join(predicate_names)}")
|
|
1402
|
+
)
|
|
1403
|
+
|
|
1404
|
+
def __repr__(self) -> str:
|
|
1405
|
+
"""Return a string representation of this Arg."""
|
|
1406
|
+
parts = [repr(self.position)]
|
|
1407
|
+
|
|
1408
|
+
if self.default is not _MISSING:
|
|
1409
|
+
parts.append(f"default={self.default!r}")
|
|
1410
|
+
if self.doc:
|
|
1411
|
+
parts.append(f"doc={self.doc!r}")
|
|
1412
|
+
if self.ge is not None:
|
|
1413
|
+
parts.append(f"ge={self.ge!r}")
|
|
1414
|
+
if self.le is not None:
|
|
1415
|
+
parts.append(f"le={self.le!r}")
|
|
1416
|
+
if self.gt is not None:
|
|
1417
|
+
parts.append(f"gt={self.gt!r}")
|
|
1418
|
+
if self.lt is not None:
|
|
1419
|
+
parts.append(f"lt={self.lt!r}")
|
|
1420
|
+
if self.choices is not None:
|
|
1421
|
+
parts.append(f"choices={self.choices!r}")
|
|
1422
|
+
if self.pattern is not None:
|
|
1423
|
+
parts.append(f"pattern={self.pattern!r}")
|
|
1424
|
+
if self.varargs:
|
|
1425
|
+
parts.append("varargs=True")
|
|
1426
|
+
if self.arrow_type is not None:
|
|
1427
|
+
parts.append(f"arrow_type={self.arrow_type!r}")
|
|
1428
|
+
if self.type_bound is not None:
|
|
1429
|
+
if callable(self.type_bound):
|
|
1430
|
+
name = getattr(self.type_bound, "__name__", str(self.type_bound))
|
|
1431
|
+
parts.append(f"type_bound={name}")
|
|
1432
|
+
else:
|
|
1433
|
+
names = [getattr(p, "__name__", str(p)) for p in self.type_bound]
|
|
1434
|
+
parts.append(f"type_bound=[{', '.join(names)}]")
|
|
1435
|
+
if self.const:
|
|
1436
|
+
parts.append("const=True")
|
|
1437
|
+
if self.is_any:
|
|
1438
|
+
parts.append("is_any=True")
|
|
1439
|
+
|
|
1440
|
+
return f"Arg({', '.join(parts)})"
|
|
1441
|
+
|
|
1442
|
+
|
|
1443
|
+
# =============================================================================
|
|
1444
|
+
# Param, ConstParam, Returns - Dataclasses for Scalar Function Annotations
|
|
1445
|
+
# =============================================================================
|
|
1446
|
+
#
|
|
1447
|
+
# These dataclasses follow the Pydantic v2 pattern: use inside Annotated[]
|
|
1448
|
+
# for native mypy support without # type: ignore comments.
|
|
1449
|
+
#
|
|
1450
|
+
# Example:
|
|
1451
|
+
# @classmethod
|
|
1452
|
+
# def compute(
|
|
1453
|
+
# cls,
|
|
1454
|
+
# column: Annotated[pa.Array, Param(pa.int64(), "Input column")],
|
|
1455
|
+
# factor: Annotated[int, ConstParam("Multiplication factor")],
|
|
1456
|
+
# ) -> Annotated[pa.Array, Returns(pa.int64())]:
|
|
1457
|
+
# return pc.multiply(column, factor)
|
|
1458
|
+
# =============================================================================
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
@dataclass(frozen=True, slots=True)
|
|
1462
|
+
class Param:
|
|
1463
|
+
"""Metadata for columnar parameters in compute() or class-level declarations.
|
|
1464
|
+
|
|
1465
|
+
Use with Annotated to declare parameters that receive pa.Array values
|
|
1466
|
+
at runtime. The type information is used for catalog registration and
|
|
1467
|
+
argument validation.
|
|
1468
|
+
|
|
1469
|
+
For ScalarFunction compute() methods, position is inferred from parameter order.
|
|
1470
|
+
|
|
1471
|
+
Args:
|
|
1472
|
+
position: Explicit column position (for class-level attributes).
|
|
1473
|
+
None means position is inferred from method signature order.
|
|
1474
|
+
arrow_type: The Arrow data type, Python type
|
|
1475
|
+
(int/str/float/bool/bytes), or None for AnyArrow (accepts any type).
|
|
1476
|
+
doc: Documentation string describing this parameter.
|
|
1477
|
+
type_bound: Type predicate(s) for validating input column types.
|
|
1478
|
+
Only meaningful when arrow_type is None (AnyArrow).
|
|
1479
|
+
varargs: If True, this parameter collects all remaining positional
|
|
1480
|
+
arguments as a list of arrays.
|
|
1481
|
+
|
|
1482
|
+
Example (ScalarFunction compute() - position inferred):
|
|
1483
|
+
class AddColumns(ScalarFunction):
|
|
1484
|
+
@classmethod
|
|
1485
|
+
def compute(
|
|
1486
|
+
cls,
|
|
1487
|
+
left: Annotated[pa.Array, Param(pa.int64(), "First value")],
|
|
1488
|
+
right: Annotated[pa.Array, Param(pa.int64(), "Second value")],
|
|
1489
|
+
) -> Annotated[pa.Array, Returns(pa.int64())]:
|
|
1490
|
+
return pc.add(left, right)
|
|
1491
|
+
|
|
1492
|
+
Example (AnyArrow with type_bound):
|
|
1493
|
+
class Double(ScalarFunction):
|
|
1494
|
+
@classmethod
|
|
1495
|
+
def compute(
|
|
1496
|
+
cls,
|
|
1497
|
+
value: Annotated[pa.Array, Param(doc="Numeric value",
|
|
1498
|
+
type_bound=pa.types.is_numeric)],
|
|
1499
|
+
) -> Annotated[pa.Array, Returns()]:
|
|
1500
|
+
return pc.multiply(value, 2)
|
|
1501
|
+
|
|
1502
|
+
"""
|
|
1503
|
+
|
|
1504
|
+
# Keep arrow_type first for backwards compatibility with Param(pa.int64(), "doc")
|
|
1505
|
+
arrow_type: pa.DataType | type | None = None
|
|
1506
|
+
doc: str = ""
|
|
1507
|
+
type_bound: "TypeBoundPredicate | Sequence[TypeBoundPredicate] | None" = None
|
|
1508
|
+
varargs: bool = False
|
|
1509
|
+
position: int | None = None
|
|
1510
|
+
|
|
1511
|
+
|
|
1512
|
+
@dataclass(frozen=True, slots=True)
|
|
1513
|
+
class ConstParam:
|
|
1514
|
+
"""Metadata for constant scalar parameters in compute().
|
|
1515
|
+
|
|
1516
|
+
Use with Annotated to declare parameters that receive constant (non-columnar)
|
|
1517
|
+
values known at planning time. The type is inferred from the Annotated first
|
|
1518
|
+
argument (e.g., `Annotated[int, ConstParam(...)]` infers pa.int64()).
|
|
1519
|
+
|
|
1520
|
+
Args:
|
|
1521
|
+
doc: Documentation string describing this parameter.
|
|
1522
|
+
arrow_type: Optional explicit Arrow type. If not provided, type is
|
|
1523
|
+
inferred from the Annotated first argument.
|
|
1524
|
+
position: Position in the argument list
|
|
1525
|
+
(optional for ScalarFunction where position is inferred from signature).
|
|
1526
|
+
|
|
1527
|
+
"""
|
|
1528
|
+
|
|
1529
|
+
doc: str = ""
|
|
1530
|
+
arrow_type: pa.DataType | type | None = None
|
|
1531
|
+
# Position in the argument list
|
|
1532
|
+
position: int | None = None
|
|
1533
|
+
# Phase when this const param is needed (aggregate functions only).
|
|
1534
|
+
# "all" = every callback, "update" = only update, "finalize" = only finalize.
|
|
1535
|
+
phase: str = "all"
|
|
1536
|
+
|
|
1537
|
+
|
|
1538
|
+
@dataclass(frozen=True, slots=True)
|
|
1539
|
+
class Setting:
|
|
1540
|
+
"""Metadata for settings parameter in compute().
|
|
1541
|
+
|
|
1542
|
+
Use with Annotated to declare parameters that receive setting values
|
|
1543
|
+
from the DuckDB session. Settings are string key-value pairs.
|
|
1544
|
+
|
|
1545
|
+
Args:
|
|
1546
|
+
key: The setting key name. If not provided, uses the parameter name.
|
|
1547
|
+
|
|
1548
|
+
"""
|
|
1549
|
+
|
|
1550
|
+
key: str | None = None
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
@dataclass(frozen=True, slots=True)
|
|
1554
|
+
class Secret:
|
|
1555
|
+
"""Metadata for secrets parameter in compute() or on_bind().
|
|
1556
|
+
|
|
1557
|
+
Use with Annotated to declare parameters that receive secret values
|
|
1558
|
+
from the DuckDB SecretManager. Secrets contain multiple key-value pairs
|
|
1559
|
+
where keys are strings and values can be any DuckDB type.
|
|
1560
|
+
|
|
1561
|
+
Args:
|
|
1562
|
+
secret_type: The secret type to look up (e.g., "vgi_example", "s3").
|
|
1563
|
+
Required — C++ enforces type matching.
|
|
1564
|
+
name: Optional secret name for name-based lookup.
|
|
1565
|
+
scope: Optional static scope for pre-resolution (resolved before first bind call).
|
|
1566
|
+
|
|
1567
|
+
Examples:
|
|
1568
|
+
Secret("vgi_example") — unscoped lookup by type
|
|
1569
|
+
Secret("s3", name="my_cred") — type + name-based lookup
|
|
1570
|
+
Secret("s3", scope="s3://bucket/") — type + scope (pre-resolved)
|
|
1571
|
+
Secret("s3", name="my_cred", scope="s3://bucket/") — all three
|
|
1572
|
+
|
|
1573
|
+
"""
|
|
1574
|
+
|
|
1575
|
+
secret_type: str
|
|
1576
|
+
name: str | None = None
|
|
1577
|
+
scope: str | None = None
|
|
1578
|
+
|
|
1579
|
+
|
|
1580
|
+
@dataclass(frozen=True, slots=True)
|
|
1581
|
+
class SecretLookupEntry(ArrowSerializableDataclass):
|
|
1582
|
+
"""A request to look up a specific secret.
|
|
1583
|
+
|
|
1584
|
+
Used both in function metadata (static requirements from annotations)
|
|
1585
|
+
and in runtime requests (dynamic scoped lookups). Also used directly
|
|
1586
|
+
as the catalog-level secret requirement type (replacing the former
|
|
1587
|
+
``CatalogSecretRequirement`` which had identical fields).
|
|
1588
|
+
|
|
1589
|
+
Extends ``ArrowSerializableDataclass`` so it can be serialized in
|
|
1590
|
+
catalog ``FunctionInfo`` payloads.
|
|
1591
|
+
|
|
1592
|
+
secret_type is required — C++ enforces type matching.
|
|
1593
|
+
|
|
1594
|
+
Supported lookup patterns:
|
|
1595
|
+
- By type only: SecretLookupEntry(secret_type="s3")
|
|
1596
|
+
- By type + scope: SecretLookupEntry(secret_type="s3", scope="s3://bucket/")
|
|
1597
|
+
- By type + name: SecretLookupEntry(secret_type="s3", secret_name="my_cred")
|
|
1598
|
+
- By type + scope + name: all three fields set
|
|
1599
|
+
"""
|
|
1600
|
+
|
|
1601
|
+
secret_type: str
|
|
1602
|
+
scope: str | None = None
|
|
1603
|
+
secret_name: str | None = None
|
|
1604
|
+
|
|
1605
|
+
def to_dict(self) -> dict[str, str | None]:
|
|
1606
|
+
"""Convert to dictionary for serialization."""
|
|
1607
|
+
return {
|
|
1608
|
+
"secret_type": self.secret_type,
|
|
1609
|
+
"secret_name": self.secret_name,
|
|
1610
|
+
"scope": self.scope,
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
@staticmethod
|
|
1614
|
+
def from_dict(d: dict[str, Any]) -> "SecretLookupEntry":
|
|
1615
|
+
"""Create from dictionary."""
|
|
1616
|
+
return SecretLookupEntry(
|
|
1617
|
+
secret_type=d["secret_type"],
|
|
1618
|
+
secret_name=d.get("secret_name"),
|
|
1619
|
+
scope=d.get("scope"),
|
|
1620
|
+
)
|
|
1621
|
+
|
|
1622
|
+
|
|
1623
|
+
def _extract_setting_secret_params(
|
|
1624
|
+
method: Any,
|
|
1625
|
+
) -> tuple[dict[str, str], dict[str, Secret]]:
|
|
1626
|
+
"""Extract Setting/Secret annotations from a method signature.
|
|
1627
|
+
|
|
1628
|
+
Parses the method's type hints to find parameters annotated with
|
|
1629
|
+
Setting() or Secret(), returning mappings from parameter name to key/Secret.
|
|
1630
|
+
|
|
1631
|
+
Handles ``from __future__ import annotations`` (string annotations)
|
|
1632
|
+
using an eval-with-namespace fallback.
|
|
1633
|
+
|
|
1634
|
+
Args:
|
|
1635
|
+
method: The method to inspect (e.g., compute, on_bind).
|
|
1636
|
+
|
|
1637
|
+
Returns:
|
|
1638
|
+
Tuple of (setting_params, secret_params) where:
|
|
1639
|
+
- setting_params: dict mapping ``param_name -> setting_key``
|
|
1640
|
+
- secret_params: dict mapping ``param_name -> Secret`` instance
|
|
1641
|
+
|
|
1642
|
+
"""
|
|
1643
|
+
import contextlib
|
|
1644
|
+
import inspect
|
|
1645
|
+
from typing import get_type_hints
|
|
1646
|
+
|
|
1647
|
+
sig = inspect.signature(method)
|
|
1648
|
+
|
|
1649
|
+
# Try to get type hints (handles PEP 563 string annotations)
|
|
1650
|
+
hints: dict[str, Any] = {}
|
|
1651
|
+
with contextlib.suppress(Exception):
|
|
1652
|
+
hints = get_type_hints(method, include_extras=True)
|
|
1653
|
+
|
|
1654
|
+
# Fallback for `from __future__ import annotations`
|
|
1655
|
+
if not hints:
|
|
1656
|
+
import pyarrow as pa
|
|
1657
|
+
|
|
1658
|
+
raw_annotations = getattr(method, "__annotations__", {})
|
|
1659
|
+
from typing import Annotated
|
|
1660
|
+
|
|
1661
|
+
# Create a mock pa module with subscriptable Scalar for eval
|
|
1662
|
+
# (pa.Scalar[Any] isn't subscriptable in PyArrow at runtime)
|
|
1663
|
+
class _MockScalar:
|
|
1664
|
+
def __class_getitem__(cls, _item: Any) -> Any:
|
|
1665
|
+
return Any
|
|
1666
|
+
|
|
1667
|
+
class _MockPa:
|
|
1668
|
+
Scalar = _MockScalar
|
|
1669
|
+
|
|
1670
|
+
def __getattr__(self, attr_name: str) -> Any:
|
|
1671
|
+
return getattr(pa, attr_name)
|
|
1672
|
+
|
|
1673
|
+
eval_namespace = {
|
|
1674
|
+
**getattr(method, "__globals__", {}),
|
|
1675
|
+
"Annotated": Annotated,
|
|
1676
|
+
"Setting": Setting,
|
|
1677
|
+
"Secret": Secret,
|
|
1678
|
+
"pa": _MockPa(),
|
|
1679
|
+
}
|
|
1680
|
+
for name, annotation in raw_annotations.items():
|
|
1681
|
+
if isinstance(annotation, str):
|
|
1682
|
+
with contextlib.suppress(Exception):
|
|
1683
|
+
hints[name] = eval(annotation, eval_namespace) # noqa: S307
|
|
1684
|
+
else:
|
|
1685
|
+
hints[name] = annotation
|
|
1686
|
+
|
|
1687
|
+
setting_params: dict[str, str] = {}
|
|
1688
|
+
secret_params: dict[str, Secret] = {}
|
|
1689
|
+
|
|
1690
|
+
for name in sig.parameters:
|
|
1691
|
+
if name in ("self", "cls"):
|
|
1692
|
+
continue
|
|
1693
|
+
|
|
1694
|
+
hint = hints.get(name)
|
|
1695
|
+
if hint is None or not hasattr(hint, "__metadata__"):
|
|
1696
|
+
continue
|
|
1697
|
+
|
|
1698
|
+
for meta in hint.__metadata__:
|
|
1699
|
+
if isinstance(meta, Setting):
|
|
1700
|
+
setting_key = meta.key if meta.key is not None else name
|
|
1701
|
+
setting_params[name] = setting_key
|
|
1702
|
+
break
|
|
1703
|
+
if isinstance(meta, Secret):
|
|
1704
|
+
secret_params[name] = meta
|
|
1705
|
+
break
|
|
1706
|
+
|
|
1707
|
+
return setting_params, secret_params
|
|
1708
|
+
|
|
1709
|
+
|
|
1710
|
+
@dataclass(frozen=True, slots=True)
|
|
1711
|
+
class Auth:
|
|
1712
|
+
"""Metadata for auth context parameter in compute().
|
|
1713
|
+
|
|
1714
|
+
Use with Annotated to declare a parameter that receives the AuthContext
|
|
1715
|
+
for the current request. Returns AuthContext.anonymous() when no
|
|
1716
|
+
authentication is configured (including stdio transport).
|
|
1717
|
+
|
|
1718
|
+
"""
|
|
1719
|
+
|
|
1720
|
+
|
|
1721
|
+
@dataclass(frozen=True, slots=True)
|
|
1722
|
+
class OutputLength:
|
|
1723
|
+
"""Metadata for output length parameter in compute().
|
|
1724
|
+
|
|
1725
|
+
Use with Annotated to declare a parameter that receives the number of rows
|
|
1726
|
+
in the input batch. This is useful for scalar functions that don't take
|
|
1727
|
+
any column arguments but need to know how many output values to produce.
|
|
1728
|
+
|
|
1729
|
+
"""
|
|
1730
|
+
|
|
1731
|
+
pass
|
|
1732
|
+
|
|
1733
|
+
|
|
1734
|
+
@dataclass(frozen=True, slots=True)
|
|
1735
|
+
class Returns:
|
|
1736
|
+
"""Metadata for compute() return type.
|
|
1737
|
+
|
|
1738
|
+
Use with Annotated to declare the output Arrow type for catalog registration.
|
|
1739
|
+
The annotation indicates that compute() returns a pa.Array of the specified type.
|
|
1740
|
+
|
|
1741
|
+
Args:
|
|
1742
|
+
arrow_type: The Arrow data type of the output, or None for AnyArrow
|
|
1743
|
+
(dynamic output type determined at bind time).
|
|
1744
|
+
|
|
1745
|
+
"""
|
|
1746
|
+
|
|
1747
|
+
arrow_type: pa.DataType | None = None
|