vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/arguments.py ADDED
@@ -0,0 +1,1747 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Argument parsing and validation for VGI functions.
4
+
5
+ This module provides classes for handling function arguments in VGI:
6
+
7
+ Classes:
8
+ Arguments: Container for positional and named function arguments.
9
+ ArgumentValidationError: Raised when an argument fails validation.
10
+ Arg: Descriptor for declarative argument parsing with optional validation.
11
+ AnyArrow: Sentinel type for arguments accepting multiple Arrow types.
12
+ AnyArrowValue: Wrapper returned when accessing AnyArrow arguments.
13
+
14
+ """
15
+
16
+ import re
17
+ import types
18
+ import typing
19
+ import warnings
20
+ from collections.abc import Callable, Sequence
21
+ from dataclasses import dataclass
22
+ from typing import TYPE_CHECKING, Any, Final, TypeVar, overload
23
+
24
+ import pyarrow as pa
25
+ from vgi_rpc import ArrowSerializableDataclass
26
+
27
+ if TYPE_CHECKING:
28
+ from pyarrow import Scalar
29
+
30
+ # Python type to Arrow type mapping for Arg type hints
31
+ PYTHON_TO_ARROW: dict[type, pa.DataType] = {
32
+ int: pa.int64(),
33
+ str: pa.utf8(),
34
+ float: pa.float64(),
35
+ bool: pa.bool_(),
36
+ bytes: pa.binary(),
37
+ }
38
+
39
+ # Private mapping used by _python_to_arrow() helper
40
+ _PYTHON_TO_ARROW: dict[type, pa.DataType] = {
41
+ int: pa.int64(),
42
+ float: pa.float64(),
43
+ str: pa.string(),
44
+ bool: pa.bool_(),
45
+ bytes: pa.binary(),
46
+ }
47
+
48
+ # Arrow type to Python scalar type mapping
49
+ # Keys are the type class of Arrow DataType instances (e.g., type(pa.int8()))
50
+ _ARROW_TO_PYTHON: dict[type, type] = {
51
+ # Primitives - integers
52
+ type(pa.int8()): int,
53
+ type(pa.int16()): int,
54
+ type(pa.int32()): int,
55
+ type(pa.int64()): int,
56
+ type(pa.uint8()): int,
57
+ type(pa.uint16()): int,
58
+ type(pa.uint32()): int,
59
+ type(pa.uint64()): int,
60
+ # Primitives - floats
61
+ type(pa.float16()): float,
62
+ type(pa.float32()): float,
63
+ type(pa.float64()): float,
64
+ # Primitives - strings
65
+ type(pa.string()): str,
66
+ type(pa.large_string()): str,
67
+ # Primitives - boolean
68
+ type(pa.bool_()): bool,
69
+ # Primitives - binary
70
+ type(pa.binary()): bytes,
71
+ type(pa.large_binary()): bytes,
72
+ # Nested types
73
+ type(pa.struct([])): dict,
74
+ type(pa.list_(pa.int32())): list,
75
+ type(pa.large_list(pa.int32())): list,
76
+ type(pa.list_(pa.int32(), 3)): list, # FixedSizeListType
77
+ type(pa.map_(pa.string(), pa.int32())): dict,
78
+ }
79
+
80
+
81
+ def _python_to_arrow(py_type: type) -> pa.DataType:
82
+ """Convert a Python type to the corresponding Arrow type.
83
+
84
+ Args:
85
+ py_type: Python type (int, float, str, bool, bytes).
86
+
87
+ Returns:
88
+ Corresponding Arrow data type.
89
+
90
+ Raises:
91
+ TypeError: If py_type is not a supported Python type.
92
+
93
+ """
94
+ if py_type in _PYTHON_TO_ARROW:
95
+ return _PYTHON_TO_ARROW[py_type]
96
+
97
+ supported = ", ".join(t.__name__ for t in _PYTHON_TO_ARROW)
98
+ raise TypeError(
99
+ f"Cannot convert Python type '{py_type.__name__}' to Arrow type. "
100
+ f"Supported types: {supported}. "
101
+ f"Example: _python_to_arrow(int) -> pa.int64()"
102
+ )
103
+
104
+
105
+ # =============================================================================
106
+ # PyArrow Array Class to DataType Mapping (for type inference)
107
+ # =============================================================================
108
+ #
109
+ # These mappings enable inferring Arrow types from array class annotations:
110
+ # Annotated[pa.Int64Array, Param(doc="...")] -> pa.int64()
111
+ #
112
+ # Only simple (non-parameterized) types are included. Complex types that require
113
+ # parameters (e.g., StructArray, ListArray, Decimal128Array) need explicit
114
+ # arrow_type specification.
115
+
116
+ # Simple array classes that can be inferred automatically
117
+ ARRAY_CLASS_TO_DATATYPE: dict[type, pa.DataType] = {
118
+ # Integers
119
+ pa.Int8Array: pa.int8(),
120
+ pa.Int16Array: pa.int16(),
121
+ pa.Int32Array: pa.int32(),
122
+ pa.Int64Array: pa.int64(),
123
+ pa.UInt8Array: pa.uint8(),
124
+ pa.UInt16Array: pa.uint16(),
125
+ pa.UInt32Array: pa.uint32(),
126
+ pa.UInt64Array: pa.uint64(),
127
+ # Floats
128
+ pa.HalfFloatArray: pa.float16(),
129
+ pa.FloatArray: pa.float32(),
130
+ pa.DoubleArray: pa.float64(),
131
+ # Strings/Binary
132
+ pa.StringArray: pa.string(),
133
+ pa.LargeStringArray: pa.large_string(),
134
+ pa.BinaryArray: pa.binary(),
135
+ pa.LargeBinaryArray: pa.large_binary(),
136
+ # Boolean
137
+ pa.BooleanArray: pa.bool_(),
138
+ # Dates (no params needed)
139
+ pa.Date32Array: pa.date32(),
140
+ pa.Date64Array: pa.date64(),
141
+ # Null
142
+ pa.NullArray: pa.null(),
143
+ }
144
+
145
+ # Complex array classes that require explicit arrow_type (parameterized types)
146
+ # Using these without arrow_type will raise a helpful error
147
+ COMPLEX_ARRAY_CLASSES: set[type] = {
148
+ # Nested types
149
+ pa.StructArray,
150
+ pa.ListArray,
151
+ pa.LargeListArray,
152
+ pa.FixedSizeListArray,
153
+ pa.MapArray,
154
+ pa.UnionArray,
155
+ # Parameterized types
156
+ pa.DictionaryArray,
157
+ pa.Decimal128Array,
158
+ pa.Decimal256Array,
159
+ pa.FixedSizeBinaryArray,
160
+ # Temporal types with units (require explicit unit specification)
161
+ pa.Time32Array,
162
+ pa.Time64Array,
163
+ pa.TimestampArray,
164
+ pa.DurationArray,
165
+ }
166
+
167
+
168
+ def _arrow_type_to_python(arrow_type: pa.DataType) -> type:
169
+ """Convert an Arrow type to the corresponding Python scalar type.
170
+
171
+ Args:
172
+ arrow_type: Arrow data type instance.
173
+
174
+ Returns:
175
+ Corresponding Python type for scalar values.
176
+ Returns Any (object) for unknown Arrow types.
177
+
178
+ """
179
+ arrow_type_class = type(arrow_type)
180
+ return _ARROW_TO_PYTHON.get(arrow_type_class, object)
181
+
182
+
183
+ # Sentinel for missing default value - proper type pattern
184
+ class _MissingType:
185
+ """Sentinel type for missing default values.
186
+
187
+ This provides better type safety than using `Any` for the sentinel.
188
+ """
189
+
190
+ __slots__ = ()
191
+
192
+ def __repr__(self) -> str:
193
+ return "<MISSING>"
194
+
195
+ def __bool__(self) -> bool:
196
+ return False
197
+
198
+
199
+ _MISSING: Final = _MissingType()
200
+
201
+
202
+ def _accepts_none(annotated_inner_type: Any) -> bool:
203
+ """Whether a declared Arg type allows ``None``.
204
+
205
+ ``annotated_inner_type`` is the first type-arg of an
206
+ ``Annotated[T, Arg(...)]`` hint — i.e. the user's declared type for
207
+ the field. Returns True iff the type is a union that includes
208
+ ``NoneType`` (e.g. ``int | None``, ``Optional[int]``,
209
+ ``Union[int, None]``). Used by argument resolvers to reject SQL NULL
210
+ when the user did not opt in to nullable arguments.
211
+ """
212
+ if annotated_inner_type is type(None):
213
+ return True
214
+ origin = typing.get_origin(annotated_inner_type)
215
+ if origin is typing.Union or origin is types.UnionType:
216
+ return type(None) in typing.get_args(annotated_inner_type)
217
+ return False
218
+
219
+
220
+ __all__ = [
221
+ "AnyArrow",
222
+ "AnyArrowValue",
223
+ "ARRAY_CLASS_TO_DATATYPE",
224
+ "Arg",
225
+ "ArgumentValidationError",
226
+ "Arguments",
227
+ "COMPLEX_ARRAY_CLASSES",
228
+ "ConstParam",
229
+ "Param",
230
+ "PYTHON_TO_ARROW",
231
+ "Returns",
232
+ "TableInput",
233
+ "TypeBoundPredicate",
234
+ "OutputLength",
235
+ "Setting",
236
+ "Secret",
237
+ "SecretLookupEntry",
238
+ "_extract_setting_secret_params",
239
+ ]
240
+
241
+
242
+ class TableInput:
243
+ """Sentinel type for table input parameters in table-in-out functions.
244
+
245
+ Use this as the type parameter for Arg to declare which argument receives
246
+ the streaming table input. Every TableInOutFunction must have exactly one
247
+ TableInput argument, and it must be positional (not named).
248
+
249
+ The TableInput argument determines which table expression feeds the function
250
+ when called from SQL. It doesn't correspond to an actual Arrow value - the
251
+ table data arrives as streaming RecordBatches via process().
252
+
253
+ """
254
+
255
+ pass
256
+
257
+
258
+ @dataclass(frozen=True, slots=True)
259
+ class AnyArrowValue:
260
+ """Wrapper for AnyArrow argument values with metadata.
261
+
262
+ When an Arg returns an AnyArrow type, accessing the attribute returns
263
+ an AnyArrowValue instead of just the raw value. This provides access to
264
+ both the value and the argument's position/name for schema lookups.
265
+
266
+ Attributes:
267
+ value: The Python value (from scalar.as_py()).
268
+ position: The positional index from the Arg definition (int for positional,
269
+ str for named arguments).
270
+ name: The Python attribute name of the Arg.
271
+
272
+ Example using Annotated (recommended):
273
+ from typing import Annotated
274
+
275
+ class MyFunction(TableFunctionGenerator):
276
+ col1: Annotated[AnyArrowValue, Arg(0, doc="First column")]
277
+
278
+ def on_bind(self) -> None:
279
+ # self.col1 is an AnyArrowValue
280
+ print(self.col1.value) # The column name
281
+ print(self.col1.position) # The positional index
282
+
283
+ Example using legacy Arg[AnyArrow] syntax:
284
+ class MyFunction(TableFunctionGenerator):
285
+ col1 = Arg[AnyArrow](0, doc="First column") # type: ignore[assignment]
286
+
287
+ """
288
+
289
+ value: Any
290
+ position: int | str
291
+ name: str
292
+
293
+
294
+ class AnyArrow:
295
+ """Sentinel type for arguments accepting multiple Arrow types.
296
+
297
+ Use this with ``AnyArrowValue`` in the Annotated pattern when an argument
298
+ should accept multiple valid Arrow types, validated via the ``type_bound``
299
+ parameter. When accessed, returns an AnyArrowValue containing the value
300
+ plus metadata (position and name).
301
+
302
+ Choosing Between Specific Types and AnyArrowValue
303
+ -------------------------------------------------
304
+ - **Single required type**: Use ``Annotated[str, Arg(...)]`` or similar.
305
+ The argument will only accept that exact type.
306
+
307
+ - **Multiple valid types**: Use ``Annotated[AnyArrowValue, Arg(...)]`` with
308
+ ``type_bound`` to specify which types are acceptable. For example, numeric
309
+ operations that work on integers, floats, and decimals should use AnyArrowValue.
310
+
311
+ The ``type_bound`` parameter is ONLY meaningful for ``AnyArrowValue`` arguments.
312
+ Using it with other types will emit a warning.
313
+
314
+ Examples using Annotated (recommended):
315
+ from typing import Annotated
316
+ from vgi import Arg, AnyArrowValue
317
+
318
+ # Single type: function only works with strings
319
+ class UpperCaseFunction(TableFunctionGenerator):
320
+ column: Annotated[str, Arg(0, doc="String column to uppercase")]
321
+
322
+ # Multiple types: function works with any numeric type
323
+ class DoubleFunction(TableFunctionGenerator):
324
+ column: Annotated[
325
+ AnyArrowValue,
326
+ Arg(0, type_bound=[pa.types.is_integer, pa.types.is_floating])
327
+ ]
328
+
329
+ def on_bind(self) -> None:
330
+ # Access column metadata for dynamic output type
331
+ self._output_type = self.column.value
332
+
333
+ # Any type: function works with all types
334
+ class IdentityFunction(TableFunctionGenerator):
335
+ column: Annotated[AnyArrowValue, Arg(0, doc="Column to pass through")]
336
+
337
+ Accessing Values:
338
+ When using AnyArrowValue, access the value via the ``.value`` attribute::
339
+
340
+ val = self.column.value # The column name as a string
341
+ pos = self.column.position # The positional index
342
+
343
+ Note:
344
+ Unlike TableInput, AnyArrow arguments have actual Arrow values -
345
+ they are just not constrained to a specific Arrow type.
346
+
347
+ """
348
+
349
+ # Type stubs for static analysis - at runtime, Arg[AnyArrow] returns AnyArrowValue
350
+ value: Any
351
+ position: int | str
352
+ name: str
353
+
354
+
355
+ @dataclass(frozen=True, slots=True)
356
+ class Arguments:
357
+ """Container for function arguments.
358
+
359
+ Access arguments using get() for Python values:
360
+
361
+ # Positional arguments (by index)
362
+ count = args.get(0) # First argument
363
+ name = args.get(1, default="unnamed") # With default
364
+
365
+ # Named arguments (by string)
366
+ separator = args.get("sep", default=",")
367
+ threshold = args.get("threshold")
368
+
369
+ # With type validation (optional, for strict checking)
370
+ count = args.get(0, type=pa.int64())
371
+
372
+ For direct Arrow Scalar access, use positional/named attributes:
373
+
374
+ scalar = args.positional[0] # pa.Scalar | None
375
+ scalar = args.named["sep"] # pa.Scalar
376
+
377
+ Attributes:
378
+ positional: Tuple of positional argument values as pa.Scalar.
379
+ named: Dictionary mapping argument names to pa.Scalar values.
380
+
381
+ """
382
+
383
+ positional: tuple["Scalar[Any] | None", ...] = ()
384
+ named: dict[str, "Scalar[Any]"] | None = None
385
+
386
+ def get(
387
+ self,
388
+ key: int | str,
389
+ *,
390
+ type: pa.DataType | None = None,
391
+ default: Any = _MISSING,
392
+ ) -> Any:
393
+ """Get argument as Python value.
394
+
395
+ SQL NULL is a real value, distinct from "argument not provided".
396
+ ``default`` is consulted only when the caller omitted the argument
397
+ entirely; an explicit SQL NULL returns ``None``.
398
+
399
+ Args:
400
+ key: Positional index (int) or argument name (str).
401
+ type: Expected Arrow type. Raises TypeError if mismatch.
402
+ default: Value to return if argument is omitted (not provided
403
+ by the caller). If not provided, raises an exception for
404
+ missing args. ``default`` is *not* consulted for explicit
405
+ SQL NULL — that case returns ``None``.
406
+
407
+ Returns:
408
+ The argument value as a Python object. ``None`` if the caller
409
+ passed an explicit SQL NULL.
410
+
411
+ Raises:
412
+ IndexError: Positional argument not provided (no default).
413
+ KeyError: Named argument not provided (no default).
414
+ TypeError: Argument type doesn't match `type` parameter.
415
+
416
+ """
417
+ # Get the scalar based on key type. Note: an absent argument means
418
+ # the caller did not write it at all; the C++ extension only ships
419
+ # fields the user supplied, so absence shows up as out-of-range
420
+ # (positional) or missing key (named). A scalar that is present
421
+ # but invalid is an *explicit* SQL NULL passed by the caller.
422
+ if isinstance(key, int):
423
+ # Positional argument
424
+ if key < 0 or key >= len(self.positional) or self.positional[key] is None:
425
+ if default is not _MISSING:
426
+ return default
427
+ raise IndexError(
428
+ f"Argument {key}: index out of range (have {len(self.positional)} positional arguments)"
429
+ )
430
+ scalar = self.positional[key]
431
+ assert scalar is not None # narrowed above
432
+ else:
433
+ # Named argument
434
+ if self.named is None or key not in self.named:
435
+ if default is not _MISSING:
436
+ return default
437
+ raise KeyError(f"Argument '{key}': not found")
438
+ scalar = self.named[key]
439
+
440
+ # Type validation (if requested)
441
+ if type is not None and scalar.type != type:
442
+ if isinstance(key, int):
443
+ raise TypeError(f"Argument {key}: expected {type}, got {scalar.type}")
444
+ else:
445
+ raise TypeError(f"Argument '{key}': expected {type}, got {scalar.type}")
446
+
447
+ return scalar.as_py()
448
+
449
+ def get_varargs(
450
+ self,
451
+ start: int,
452
+ *,
453
+ type: pa.DataType | None = None,
454
+ ) -> tuple[Any, ...]:
455
+ """Get all positional arguments from start position onwards.
456
+
457
+ Args:
458
+ start: Starting positional index (inclusive).
459
+ type: Expected Arrow type for all values. Raises TypeError if mismatch.
460
+
461
+ Returns:
462
+ Tuple of argument values as Python objects.
463
+
464
+ """
465
+ if start < 0:
466
+ raise ValueError(f"start must be non-negative, got {start}")
467
+
468
+ values: list[Any] = []
469
+ for i in range(start, len(self.positional)):
470
+ scalar = self.positional[i]
471
+
472
+ # Handle null values - varargs don't support nulls
473
+ if scalar is None or not scalar.is_valid:
474
+ raise ValueError(f"Argument {i}: value is null (varargs cannot contain nulls)")
475
+
476
+ # Type validation (if requested)
477
+ if type is not None and scalar.type != type:
478
+ raise TypeError(f"Argument {i}: expected {type}, got {scalar.type}")
479
+
480
+ values.append(scalar.as_py())
481
+
482
+ return tuple(values)
483
+
484
+ def encoded_dict(self) -> dict[str, "Scalar[Any] | None"]:
485
+ """Convert arguments to a dictionary suitable for serialization.
486
+
487
+ Positional arguments are stored with keys "positional_0", "positional_1", etc.
488
+ Named arguments are stored with their actual names prefixed by "named_".
489
+
490
+ The reason why a dictionary is used is to facilitate serialization with Arrow,
491
+ which can easily handle flat structures, but doesn't handle variable typed
492
+ arrays of arbitrary objects.
493
+
494
+ Returns:
495
+ Dictionary mapping argument names to their values.
496
+
497
+ """
498
+ return {f"positional_{index}": value for index, value in enumerate(self.positional)} | (
499
+ {f"named_{name}": value for name, value in self.named.items()} if self.named else {}
500
+ )
501
+
502
+ def schema(self) -> pa.Schema:
503
+ """Return Arrow schema for serializing these Arguments.
504
+
505
+ Creates a schema with one field per argument: "positional_0", "positional_1",
506
+ etc. for positional args, and "named_<name>" for named args. Field types
507
+ are taken directly from scalar values to handle Arrow extension types.
508
+
509
+ Returns:
510
+ Arrow schema matching the structure returned by encoded_dict().
511
+
512
+ """
513
+ args_dict = self.encoded_dict()
514
+ fields: list[pa.Field[Any]] = []
515
+ for key, scalar in args_dict.items():
516
+ if scalar is None:
517
+ fields.append(pa.field(key, pa.null()))
518
+ else:
519
+ fields.append(pa.field(key, scalar.type))
520
+ return pa.schema(fields)
521
+
522
+ @staticmethod
523
+ def decode(data: pa.StructScalar) -> "Arguments":
524
+ """Decode Arguments from a serialized dictionary.
525
+
526
+ Args:
527
+ data: Dictionary containing serialized argument fields.
528
+
529
+ Returns:
530
+ Deserialized Arguments instance.
531
+
532
+ """
533
+ positional: list[Scalar[Any] | None] = []
534
+ named: dict[str, Scalar[Any]] = {}
535
+ for key, value in data.items():
536
+ if key.startswith("positional_"):
537
+ index = int(key[len("positional_") :])
538
+ while len(positional) <= index:
539
+ positional.append(None)
540
+ positional[index] = value
541
+ elif key.startswith("named_"):
542
+ name = key[len("named_") :]
543
+ named[name] = value
544
+ return Arguments(positional=tuple(positional), named=named or None)
545
+
546
+ def serialize_to_bytes(self) -> bytes:
547
+ """Serialize Arguments to bytes using Arrow IPC format.
548
+
549
+ Creates a single-row RecordBatch with the arguments encoded as
550
+ a struct column, then serializes it to IPC stream bytes.
551
+
552
+ Builds the batch with explicit types from scalar values to handle
553
+ Arrow extension types (e.g., HUGEINT) that ``from_pylist()`` cannot infer.
554
+
555
+ Returns:
556
+ Serialized bytes containing the Arguments.
557
+
558
+ """
559
+ args_dict = self.encoded_dict()
560
+ fields: list[pa.Field[Any]] = []
561
+ arrays: list[pa.Array[Any]] = []
562
+ for key, scalar in args_dict.items():
563
+ if scalar is None:
564
+ fields.append(pa.field(key, pa.null()))
565
+ arrays.append(pa.nulls(1))
566
+ else:
567
+ fields.append(pa.field(key, scalar.type))
568
+ arrays.append(pa.repeat(scalar, 1)) # type: ignore[call-overload]
569
+ if fields:
570
+ struct_array: pa.StructArray = pa.StructArray.from_arrays(arrays, fields=fields)
571
+ else:
572
+ # Empty args: create a length-1 struct array with no fields
573
+ struct_type = pa.struct([])
574
+ struct_array = pa.array([{}], type=struct_type) # type: ignore[assignment]
575
+ batch = pa.RecordBatch.from_arrays([struct_array], names=["args"])
576
+ sink = pa.BufferOutputStream()
577
+ with pa.ipc.new_stream(sink, batch.schema) as writer:
578
+ writer.write_batch(batch)
579
+ return sink.getvalue().to_pybytes()
580
+
581
+ @staticmethod
582
+ def deserialize_from_bytes(data: bytes, ipc_validation: Any = None) -> "Arguments":
583
+ """Deserialize Arguments from bytes.
584
+
585
+ Args:
586
+ data: Bytes serialized via serialize_to_bytes().
587
+ ipc_validation: Unused, accepted for compatibility with
588
+ ArrowSerializableDataclass._convert_value_for_deserialization.
589
+
590
+ Returns:
591
+ Deserialized Arguments instance.
592
+
593
+ """
594
+ reader = pa.ipc.open_stream(data)
595
+ batch = reader.read_next_batch()
596
+ return Arguments.decode(batch.column("args")[0])
597
+
598
+
599
+ class ArgumentValidationError(ValueError):
600
+ """Raised when an argument fails validation.
601
+
602
+ This exception provides detailed context about what went wrong and
603
+ suggests how to fix the issue.
604
+
605
+ Attributes:
606
+ arg_name: Name of the argument that failed validation.
607
+ value: The invalid value that was provided.
608
+ constraint: Description of the constraint that was violated.
609
+ doc: Documentation string for the argument (if provided).
610
+ valid_range: Human-readable description of valid values.
611
+ default: Default value (if any) that could be used instead.
612
+ suggestions: List of valid values close to the provided value.
613
+
614
+ """
615
+
616
+ def __init__(
617
+ self,
618
+ message: str,
619
+ *,
620
+ arg_name: str | None = None,
621
+ position: int | str | None = None,
622
+ value: Any = None,
623
+ constraint: str | None = None,
624
+ doc: str | None = None,
625
+ valid_range: str | None = None,
626
+ default: Any = _MISSING,
627
+ choices: Sequence[Any] | None = None,
628
+ ) -> None:
629
+ """Initialize with rich context for helpful error messages.
630
+
631
+ Args:
632
+ message: Base error message.
633
+ arg_name: Attribute name of the Arg descriptor.
634
+ position: Positional index or named key.
635
+ value: The value that failed validation.
636
+ constraint: What constraint was violated (e.g., "must be >= 1").
637
+ doc: Documentation for what this argument does.
638
+ valid_range: Description of valid values.
639
+ default: Default value if any.
640
+ choices: List of valid choices if applicable.
641
+
642
+ """
643
+ self.arg_name = arg_name
644
+ self.position = position
645
+ self.value = value
646
+ self.constraint = constraint
647
+ self.doc = doc
648
+ self.valid_range = valid_range
649
+ self.default = default
650
+ self.choices = choices
651
+
652
+ # Build detailed message
653
+ full_message = self._build_message(message)
654
+ super().__init__(full_message)
655
+
656
+ def _build_message(self, base_message: str) -> str:
657
+ """Build a detailed, helpful error message."""
658
+ lines = [base_message, ""]
659
+
660
+ # Add position info
661
+ if self.position is not None:
662
+ if isinstance(self.position, int):
663
+ lines.append(f" Argument: positional argument {self.position}")
664
+ else:
665
+ lines.append(f" Argument: named argument '{self.position}'")
666
+
667
+ # Always show attribute name if set (helps identify where in code to fix)
668
+ if self.arg_name:
669
+ lines.append(f" Attribute: self.{self.arg_name}")
670
+
671
+ # Add value info
672
+ if self.value is not None:
673
+ lines.append(f" Value: {self.value!r}")
674
+
675
+ # Add constraint info
676
+ if self.constraint:
677
+ lines.append(f" Constraint: {self.constraint}")
678
+
679
+ # Add documentation
680
+ if self.doc:
681
+ lines.append("")
682
+ lines.append(f" Purpose: {self.doc}")
683
+
684
+ # Add valid range
685
+ if self.valid_range:
686
+ lines.append(f" Valid values: {self.valid_range}")
687
+
688
+ # Add suggestions for choices
689
+ if self.choices:
690
+ suggestions = self._suggest_similar_choices()
691
+ if suggestions:
692
+ lines.append("")
693
+ lines.append(" Did you mean:")
694
+ for suggestion in suggestions[:3]:
695
+ lines.append(f" - {suggestion!r}")
696
+
697
+ # Add default value hint
698
+ if self.default is not _MISSING:
699
+ lines.append("")
700
+ lines.append(f" Tip: Omit this argument to use default value: {self.default!r}")
701
+
702
+ return "\n".join(lines)
703
+
704
+ def _suggest_similar_choices(self) -> list[Any]:
705
+ """Find choices similar to the provided value."""
706
+ if not self.choices or self.value is None:
707
+ return []
708
+
709
+ # For strings, find similar by edit distance or prefix
710
+ if isinstance(self.value, str):
711
+ value_lower = self.value.lower()
712
+ scored: list[tuple[int, Any]] = []
713
+
714
+ for choice in self.choices:
715
+ if isinstance(choice, str):
716
+ choice_lower = choice.lower()
717
+ # Prioritize prefix matches
718
+ if choice_lower.startswith(value_lower):
719
+ scored.append((0, choice))
720
+ elif value_lower.startswith(choice_lower):
721
+ scored.append((1, choice))
722
+ # Then substring matches
723
+ elif value_lower in choice_lower or choice_lower in value_lower:
724
+ scored.append((2, choice))
725
+ else:
726
+ # Simple character overlap score
727
+ overlap = len(set(value_lower) & set(choice_lower))
728
+ if overlap > len(value_lower) // 2:
729
+ scored.append((10 - overlap, choice))
730
+
731
+ scored.sort(key=lambda x: x[0])
732
+ return [choice for _, choice in scored]
733
+
734
+ # For numbers, find closest values
735
+ if isinstance(self.value, int | float):
736
+ try:
737
+ numeric_choices = [c for c in self.choices if isinstance(c, int | float)]
738
+ numeric_choices.sort(key=lambda c: abs(c - self.value))
739
+ return numeric_choices
740
+ except TypeError:
741
+ pass
742
+
743
+ return list(self.choices)
744
+
745
+
746
+ # TypeVar for Arg generic type
747
+ ArgT = TypeVar("ArgT")
748
+
749
+ # Type alias for type bound predicates (e.g., pa.types.is_integer)
750
+ TypeBoundPredicate = Callable[[pa.DataType], bool]
751
+
752
+
753
+ class _ArgFactory:
754
+ """Factory returned by Arg[type] to capture the type parameter.
755
+
756
+ This allows Arg[str](0) to create an Arg instance with _type_param=str,
757
+ which can be used by extract_argument_specs to infer the Arrow type.
758
+ """
759
+
760
+ __slots__ = ("_type_param",)
761
+
762
+ def __init__(self, type_param: type) -> None:
763
+ self._type_param = type_param
764
+
765
+ def __call__(
766
+ self,
767
+ position: int | str,
768
+ *,
769
+ default: Any = _MISSING,
770
+ doc: str = "",
771
+ ge: float | int | None = None,
772
+ le: float | int | None = None,
773
+ gt: float | int | None = None,
774
+ lt: float | int | None = None,
775
+ choices: Sequence[Any] | None = None,
776
+ pattern: str | None = None,
777
+ varargs: bool = False,
778
+ arrow_type: pa.DataType | None = None,
779
+ type_bound: "TypeBoundPredicate | Sequence[TypeBoundPredicate] | None" = None,
780
+ const: bool = False,
781
+ is_any: bool = False,
782
+ ) -> "Arg[Any]":
783
+ """Create an Arg instance with the captured type parameter."""
784
+ arg: Arg[Any] = Arg.__new__(Arg)
785
+ # Manually call __init__ logic since we're using __new__
786
+ # Validate constraint combinations
787
+ if ge is not None and gt is not None:
788
+ raise ValueError("Cannot specify both 'ge' and 'gt'")
789
+ if le is not None and lt is not None:
790
+ raise ValueError("Cannot specify both 'le' and 'lt'")
791
+ if varargs:
792
+ if isinstance(position, str):
793
+ raise ValueError("varargs=True requires a positional argument (int), not named")
794
+ if default is not _MISSING:
795
+ raise ValueError("varargs=True cannot have a default value (requires at least 1 value)")
796
+
797
+ # Positional args cannot have defaults — DuckDB's binder always
798
+ # requires the positional argument, so the default would never fire.
799
+ # To make an argument optional, use a named argument (string position).
800
+ if isinstance(position, int) and default is not _MISSING:
801
+ raise ValueError(
802
+ f"Arg(position={position}, default=...): positional arguments cannot "
803
+ f"have a default value. DuckDB's binder always requires the positional "
804
+ f"argument, so the default would never fire. To make this argument "
805
+ f'optional, use a named argument: Arg("{{name}}", default=...).'
806
+ )
807
+
808
+ # Warn if type_bound is used with non-AnyArrow type
809
+ # Check both _type_param (legacy API) and is_any (new Param API)
810
+ if type_bound is not None and self._type_param is not AnyArrow and not is_any:
811
+ type_name = getattr(self._type_param, "__name__", str(self._type_param))
812
+ warnings.warn(
813
+ f"type_bound is only meaningful for Arg[AnyArrow], but was specified for Arg[{type_name}]",
814
+ UserWarning,
815
+ stacklevel=2,
816
+ )
817
+
818
+ arg.position = position
819
+ arg.default = default
820
+ arg.doc = doc
821
+ arg.ge = ge
822
+ arg.le = le
823
+ arg.gt = gt
824
+ arg.lt = lt
825
+ arg.choices = choices
826
+ arg.pattern = pattern
827
+ arg.varargs = varargs
828
+ arg.arrow_type = arrow_type
829
+ arg.type_bound = type_bound
830
+ arg.const = const
831
+ arg.is_any = is_any
832
+ arg._name = None
833
+ arg._compiled_pattern = None
834
+ arg._type_param = self._type_param
835
+ # Set based on legacy Arg[AnyArrow] pattern
836
+ arg._returns_any_arrow_value = self._type_param is AnyArrow
837
+ # Resolution index for value lookup (may differ from position for const params)
838
+ arg._resolution_index = None
839
+
840
+ if pattern is not None:
841
+ arg._compiled_pattern = re.compile(pattern)
842
+
843
+ return arg
844
+
845
+
846
+ class Arg[ArgT]:
847
+ """Descriptor for declarative argument parsing with optional validation.
848
+
849
+ Use as a class attribute to declare function arguments that are automatically
850
+ parsed from self.arguments when accessed. This eliminates the need to override
851
+ __init__ for simple argument parsing.
852
+
853
+ Attributes:
854
+ position: Positional index (int) or named key (str).
855
+ default: Default value if argument not provided. Omit for required arguments.
856
+ doc: Documentation string for this argument.
857
+ ge: Value must be >= this (for numeric types).
858
+ le: Value must be <= this (for numeric types).
859
+ gt: Value must be > this (for numeric types).
860
+ lt: Value must be < this (for numeric types).
861
+ choices: Value must be one of these options.
862
+ pattern: Value must match this regex pattern (for strings).
863
+
864
+ Note:
865
+ For named arguments (string position), the Python attribute name should
866
+ match the SQL key. This is the standard convention::
867
+
868
+ format = Arg[str]("format") # Recommended: attribute == key
869
+
870
+ Avoid using different names::
871
+
872
+ output_format = Arg[str]("format") # Not recommended
873
+
874
+ While this works at runtime, it can cause issues with metadata
875
+ serialization where only one name is preserved.
876
+
877
+ """
878
+
879
+ __slots__ = (
880
+ "position",
881
+ "default",
882
+ "doc",
883
+ "ge",
884
+ "le",
885
+ "gt",
886
+ "lt",
887
+ "choices",
888
+ "pattern",
889
+ "varargs",
890
+ "arrow_type",
891
+ "type_bound",
892
+ "const",
893
+ "is_any",
894
+ "_name",
895
+ "_compiled_pattern",
896
+ "_type_param",
897
+ "_returns_any_arrow_value",
898
+ "_resolution_index",
899
+ )
900
+
901
+ def __init__(
902
+ self,
903
+ position: int | str,
904
+ *,
905
+ default: ArgT | Any = _MISSING,
906
+ doc: str = "",
907
+ ge: float | int | None = None,
908
+ le: float | int | None = None,
909
+ gt: float | int | None = None,
910
+ lt: float | int | None = None,
911
+ choices: Sequence[ArgT] | None = None,
912
+ pattern: str | None = None,
913
+ varargs: bool = False,
914
+ arrow_type: pa.DataType | None = None,
915
+ type_bound: "TypeBoundPredicate | Sequence[TypeBoundPredicate] | None" = None,
916
+ const: bool = False,
917
+ is_any: bool = False,
918
+ ) -> None:
919
+ """Initialize an Arg descriptor with optional validation.
920
+
921
+ Args:
922
+ position: Positional index (int) or named key (str). Positional
923
+ arguments are always required (DuckDB's binder always supplies
924
+ them); to make an argument optional, pass a string key instead.
925
+ default: Default value if argument not provided. Only valid for
926
+ named (string-position) arguments — passing a default with an
927
+ integer position raises ValueError. Omit for required.
928
+ doc: Documentation string for this argument.
929
+ ge: Minimum value (inclusive). Value must be >= this.
930
+ le: Maximum value (inclusive). Value must be <= this.
931
+ gt: Minimum value (exclusive). Value must be > this.
932
+ lt: Maximum value (exclusive). Value must be < this.
933
+ choices: Allowed values. Value must be one of these.
934
+ pattern: Regex pattern for string validation.
935
+ varargs: If True, collect all remaining positional arguments from this
936
+ position onwards. Returns tuple[ArgT, ...]. Requires at least 1 value.
937
+ Must be positional (not named).
938
+ arrow_type: Explicit Arrow type for this argument. If not provided,
939
+ type is inferred from the type hint using PYTHON_TO_ARROW.
940
+ type_bound: Type predicate(s) for Arg[AnyArrow] column type validation.
941
+ Accepts a single predicate (e.g., pa.types.is_integer) or a sequence
942
+ of predicates where any match is valid (OR logic). Only meaningful
943
+ for Arg[AnyArrow] arguments; issues a warning if used with other types.
944
+ const: If True, marks this argument as constant-folded (ConstParam).
945
+ Constant arguments have their values known at planning time.
946
+ is_any: If True, indicates this argument accepts any Arrow type (AnyArrow).
947
+ Used for tracking when AnyArrow was specified in the type hint.
948
+
949
+ Raises:
950
+ ValueError: If conflicting constraints are specified (e.g., ge and gt),
951
+ or if a default value is supplied with an integer (positional)
952
+ position.
953
+
954
+ """
955
+ # Validate constraint combinations
956
+ if ge is not None and gt is not None:
957
+ raise ValueError("Cannot specify both 'ge' and 'gt'")
958
+ if le is not None and lt is not None:
959
+ raise ValueError("Cannot specify both 'le' and 'lt'")
960
+
961
+ # Validate varargs constraints
962
+ if varargs:
963
+ if isinstance(position, str):
964
+ raise ValueError("varargs=True requires a positional argument (int), not named")
965
+ if default is not _MISSING:
966
+ raise ValueError("varargs=True cannot have a default value (requires at least 1 value)")
967
+
968
+ # Positional args cannot have defaults. DuckDB's binder always requires
969
+ # a positional argument to be supplied; the default is never consulted
970
+ # through the SQL path. To make an argument optional, declare it as a
971
+ # named argument by passing a string position (e.g. Arg("count", default=10)).
972
+ if isinstance(position, int) and default is not _MISSING:
973
+ raise ValueError(
974
+ f"Arg(position={position}, default=...): positional arguments cannot "
975
+ f"have a default value. DuckDB's binder always requires the positional "
976
+ f"argument, so the default would never fire. To make this argument "
977
+ f'optional, use a named argument: Arg("{{name}}", default=...).'
978
+ )
979
+
980
+ self.position = position
981
+ self.default = default
982
+ self.doc = doc
983
+ self.ge = ge
984
+ self.le = le
985
+ self.gt = gt
986
+ self.lt = lt
987
+ self.choices = choices
988
+ self.pattern = pattern
989
+ self.varargs = varargs
990
+ self.arrow_type = arrow_type
991
+ self.type_bound = type_bound
992
+ self.const = const
993
+ self.is_any = is_any
994
+ self._name: str | None = None
995
+ self._compiled_pattern: re.Pattern[str] | None = None
996
+ self._type_param: type | None = None
997
+ # Set by __init_subclass__ when using Annotated[AnyArrowValue, Arg(...)]
998
+ self._returns_any_arrow_value: bool = False
999
+ # Resolution index for value lookup (may differ from position for const params)
1000
+ # When set, _resolve() uses this instead of position for Arguments.get()
1001
+ self._resolution_index: int | None = None
1002
+
1003
+ # Pre-compile pattern for efficiency
1004
+ if pattern is not None:
1005
+ self._compiled_pattern = re.compile(pattern)
1006
+
1007
+ def __class_getitem__(cls, item: type) -> "_ArgFactory":
1008
+ """Support Arg[type] syntax to capture the type parameter at runtime.
1009
+
1010
+ When you write Arg[str](0), this method is called first with item=str,
1011
+ and returns an _ArgFactory that will create Arg instances with
1012
+ _type_param set to str.
1013
+ """
1014
+ return _ArgFactory(item)
1015
+
1016
+ def __set_name__(self, owner: type, name: str) -> None:
1017
+ """Store the attribute name when assigned to a class."""
1018
+ self._name = name
1019
+
1020
+ @overload
1021
+ def __get__(self, obj: None, objtype: type) -> "Arg[ArgT]": ...
1022
+
1023
+ @overload
1024
+ def __get__(self, obj: object, objtype: type | None = None) -> ArgT: ...
1025
+
1026
+ def __get__(self, obj: object | None, objtype: type | None = None) -> "Arg[ArgT] | ArgT":
1027
+ """Get the argument value, parsing and caching on first access."""
1028
+ if obj is None:
1029
+ return self # Class-level access returns descriptor
1030
+
1031
+ # Instance access - parse and cache
1032
+ if self._name is None:
1033
+ raise RuntimeError(
1034
+ "Arg descriptor was not properly initialized. "
1035
+ "This typically means the descriptor was accessed before __set_name__ "
1036
+ "was called. Ensure Arg is used as a class attribute, not instantiated "
1037
+ "dynamically."
1038
+ )
1039
+
1040
+ if self._name not in obj.__dict__:
1041
+ obj.__dict__[self._name] = self._resolve(obj)
1042
+ return obj.__dict__[self._name] # type: ignore[no-any-return]
1043
+
1044
+ def _resolve(self, obj: object) -> ArgT:
1045
+ """Parse argument from obj.invocation.arguments and validate."""
1046
+ invocation = getattr(obj, "invocation", None)
1047
+ if invocation is None:
1048
+ raise RuntimeError(
1049
+ f"Cannot resolve Arg '{self._name}': object {type(obj).__name__} does "
1050
+ f"not have an 'invocation' attribute. Arg descriptors can only be used "
1051
+ f"on classes that have an 'invocation' attribute (e.g., "
1052
+ f"TableInOutFunction, TableFunctionGenerator)."
1053
+ )
1054
+ arguments = invocation.arguments
1055
+
1056
+ # Use _resolution_index if set (for const params with separate tracking)
1057
+ # Otherwise fall back to position
1058
+ lookup_pos: int | str = self._resolution_index if self._resolution_index is not None else self.position
1059
+
1060
+ if self.varargs:
1061
+ # Collect all positional arguments from this position onwards
1062
+ # position is guaranteed to be int (validated in __init__)
1063
+ assert isinstance(lookup_pos, int) # Validated in __init__
1064
+ values = arguments.get_varargs(lookup_pos)
1065
+ if len(values) == 0:
1066
+ raise ArgumentValidationError(
1067
+ f"Argument '{self._name}' requires at least 1 value.",
1068
+ arg_name=self._name,
1069
+ position=self.position,
1070
+ constraint="varargs requires at least 1 value",
1071
+ doc=self.doc if self.doc else None,
1072
+ )
1073
+ # Validate each element
1074
+ for i, val in enumerate(values):
1075
+ self._validate_single(val, index=i)
1076
+ return values # type: ignore[no-any-return] # varargs returns tuple
1077
+
1078
+ if self.default is _MISSING:
1079
+ value: ArgT = arguments.get(lookup_pos)
1080
+ else:
1081
+ value = arguments.get(lookup_pos, default=self.default)
1082
+
1083
+ # Skip validation for None — either an explicit SQL NULL the caller
1084
+ # passed, or default=None for a nullable Arg. Numeric/choice/pattern
1085
+ # constraints don't apply to None and would otherwise TypeError.
1086
+ if value is not None:
1087
+ self._validate(value)
1088
+
1089
+ # Wrap AnyArrow values with metadata for schema lookups
1090
+ if self._returns_any_arrow_value:
1091
+ assert self._name is not None # Set by __set_name__
1092
+ return AnyArrowValue(value, self.position, self._name) # type: ignore[return-value]
1093
+
1094
+ return value
1095
+
1096
+ def _describe_valid_range(self) -> str | None:
1097
+ """Build a human-readable description of valid values."""
1098
+ parts = []
1099
+
1100
+ # Numeric bounds
1101
+ if self.ge is not None:
1102
+ parts.append(f">= {self.ge}")
1103
+ if self.gt is not None:
1104
+ parts.append(f"> {self.gt}")
1105
+ if self.le is not None:
1106
+ parts.append(f"<= {self.le}")
1107
+ if self.lt is not None:
1108
+ parts.append(f"< {self.lt}")
1109
+
1110
+ if parts:
1111
+ # Format as range if we have both bounds
1112
+ if len(parts) == 2:
1113
+ lower = parts[0]
1114
+ upper = parts[1]
1115
+ return f"{lower} and {upper}"
1116
+ return " and ".join(parts)
1117
+
1118
+ # Choices
1119
+ if self.choices is not None:
1120
+ if len(self.choices) <= 5:
1121
+ return ", ".join(repr(c) for c in self.choices)
1122
+ else:
1123
+ shown = ", ".join(repr(c) for c in list(self.choices)[:4])
1124
+ return f"{shown}, ... ({len(self.choices)} total options)"
1125
+
1126
+ # Pattern
1127
+ if self.pattern is not None:
1128
+ return f"string matching pattern: {self.pattern}"
1129
+
1130
+ return None
1131
+
1132
+ def _reject_none(self) -> "ArgumentValidationError":
1133
+ """Build the error raised when SQL NULL is passed to a non-Optional Arg.
1134
+
1135
+ Callers ``_parse_arguments`` (table_function.py) and ``_resolve``
1136
+ (this module) hit ``_validate`` with a None value when the user
1137
+ wrote e.g. ``my_func(NULL)``. ``_validate``'s numeric/choice/pattern
1138
+ comparisons would then crash with a Python ``TypeError`` deep in
1139
+ the worker — which surfaces in the C++ extension as an opaque
1140
+ traceback rather than a clean argument error. Callers use this
1141
+ helper to emit a structured error before reaching ``_validate``.
1142
+ """
1143
+ arg_name = self._name or str(self.position)
1144
+ return ArgumentValidationError(
1145
+ f"Argument '{arg_name}' cannot be NULL.",
1146
+ arg_name=self._name,
1147
+ position=self.position,
1148
+ value=None,
1149
+ constraint="must not be NULL (declare type as `T | None` to accept SQL NULL)",
1150
+ doc=self.doc if self.doc else None,
1151
+ valid_range=self._describe_valid_range(),
1152
+ default=self.default,
1153
+ )
1154
+
1155
+ def _validate(self, value: ArgT) -> None:
1156
+ """Validate value against all constraints.
1157
+
1158
+ Args:
1159
+ value: The value to validate.
1160
+
1161
+ Raises:
1162
+ ArgumentValidationError: If any constraint is violated.
1163
+
1164
+ """
1165
+ arg_name = self._name or str(self.position)
1166
+ valid_range = self._describe_valid_range()
1167
+
1168
+ # Numeric range validation
1169
+ # Note: type: ignore needed because ArgT is generic - comparisons only valid
1170
+ # for numeric types, but we can't express "ArgT when constraints are set"
1171
+ if self.ge is not None and value < self.ge: # type: ignore[operator]
1172
+ raise ArgumentValidationError(
1173
+ f"Argument '{arg_name}' is too small.",
1174
+ arg_name=self._name,
1175
+ position=self.position,
1176
+ value=value,
1177
+ constraint=f"must be >= {self.ge}",
1178
+ doc=self.doc if self.doc else None,
1179
+ valid_range=valid_range,
1180
+ default=self.default,
1181
+ )
1182
+
1183
+ if self.le is not None and value > self.le: # type: ignore[operator]
1184
+ raise ArgumentValidationError(
1185
+ f"Argument '{arg_name}' is too large.",
1186
+ arg_name=self._name,
1187
+ position=self.position,
1188
+ value=value,
1189
+ constraint=f"must be <= {self.le}",
1190
+ doc=self.doc if self.doc else None,
1191
+ valid_range=valid_range,
1192
+ default=self.default,
1193
+ )
1194
+
1195
+ if self.gt is not None and value <= self.gt: # type: ignore[operator]
1196
+ raise ArgumentValidationError(
1197
+ f"Argument '{arg_name}' is too small.",
1198
+ arg_name=self._name,
1199
+ position=self.position,
1200
+ value=value,
1201
+ constraint=f"must be > {self.gt}",
1202
+ doc=self.doc if self.doc else None,
1203
+ valid_range=valid_range,
1204
+ default=self.default,
1205
+ )
1206
+
1207
+ if self.lt is not None and value >= self.lt: # type: ignore[operator]
1208
+ raise ArgumentValidationError(
1209
+ f"Argument '{arg_name}' is too large.",
1210
+ arg_name=self._name,
1211
+ position=self.position,
1212
+ value=value,
1213
+ constraint=f"must be < {self.lt}",
1214
+ doc=self.doc if self.doc else None,
1215
+ valid_range=valid_range,
1216
+ default=self.default,
1217
+ )
1218
+
1219
+ # Choices validation
1220
+ if self.choices is not None and value not in self.choices:
1221
+ raise ArgumentValidationError(
1222
+ f"Argument '{arg_name}' has an invalid value.",
1223
+ arg_name=self._name,
1224
+ position=self.position,
1225
+ value=value,
1226
+ constraint="must be one of the allowed choices",
1227
+ doc=self.doc if self.doc else None,
1228
+ valid_range=valid_range,
1229
+ default=self.default,
1230
+ choices=self.choices,
1231
+ )
1232
+
1233
+ # Pattern validation (for strings)
1234
+ if self._compiled_pattern is not None:
1235
+ if not isinstance(value, str):
1236
+ raise ArgumentValidationError(
1237
+ f"Argument '{arg_name}' must be a string for pattern validation.",
1238
+ arg_name=self._name,
1239
+ position=self.position,
1240
+ value=value,
1241
+ constraint=f"must be a string matching pattern '{self.pattern}'",
1242
+ doc=self.doc if self.doc else None,
1243
+ valid_range=valid_range,
1244
+ default=self.default,
1245
+ )
1246
+ if not self._compiled_pattern.match(value):
1247
+ raise ArgumentValidationError(
1248
+ f"Argument '{arg_name}' does not match the required pattern.",
1249
+ arg_name=self._name,
1250
+ position=self.position,
1251
+ value=value,
1252
+ constraint=f"must match pattern '{self.pattern}'",
1253
+ doc=self.doc if self.doc else None,
1254
+ valid_range=valid_range,
1255
+ default=self.default,
1256
+ )
1257
+
1258
+ def _validate_single(self, value: Any, *, index: int) -> None:
1259
+ """Validate a single value from varargs against all constraints.
1260
+
1261
+ Args:
1262
+ value: The value to validate.
1263
+ index: Index within the varargs tuple (for error messages).
1264
+
1265
+ Raises:
1266
+ ArgumentValidationError: If any constraint is violated.
1267
+
1268
+ """
1269
+ arg_name = self._name or str(self.position)
1270
+ valid_range = self._describe_valid_range()
1271
+ display_pos = f"{self.position}[{index}]"
1272
+
1273
+ # Numeric range validation
1274
+ if self.ge is not None and value < self.ge:
1275
+ raise ArgumentValidationError(
1276
+ f"Argument '{arg_name}' element {index} is too small.",
1277
+ arg_name=self._name,
1278
+ position=display_pos,
1279
+ value=value,
1280
+ constraint=f"must be >= {self.ge}",
1281
+ doc=self.doc if self.doc else None,
1282
+ valid_range=valid_range,
1283
+ )
1284
+
1285
+ if self.le is not None and value > self.le:
1286
+ raise ArgumentValidationError(
1287
+ f"Argument '{arg_name}' element {index} is too large.",
1288
+ arg_name=self._name,
1289
+ position=display_pos,
1290
+ value=value,
1291
+ constraint=f"must be <= {self.le}",
1292
+ doc=self.doc if self.doc else None,
1293
+ valid_range=valid_range,
1294
+ )
1295
+
1296
+ if self.gt is not None and value <= self.gt:
1297
+ raise ArgumentValidationError(
1298
+ f"Argument '{arg_name}' element {index} is too small.",
1299
+ arg_name=self._name,
1300
+ position=display_pos,
1301
+ value=value,
1302
+ constraint=f"must be > {self.gt}",
1303
+ doc=self.doc if self.doc else None,
1304
+ valid_range=valid_range,
1305
+ )
1306
+
1307
+ if self.lt is not None and value >= self.lt:
1308
+ raise ArgumentValidationError(
1309
+ f"Argument '{arg_name}' element {index} is too large.",
1310
+ arg_name=self._name,
1311
+ position=display_pos,
1312
+ value=value,
1313
+ constraint=f"must be < {self.lt}",
1314
+ doc=self.doc if self.doc else None,
1315
+ valid_range=valid_range,
1316
+ )
1317
+
1318
+ # Choices validation
1319
+ if self.choices is not None and value not in self.choices:
1320
+ raise ArgumentValidationError(
1321
+ f"Argument '{arg_name}' element {index} has an invalid value.",
1322
+ arg_name=self._name,
1323
+ position=display_pos,
1324
+ value=value,
1325
+ constraint="must be one of the allowed choices",
1326
+ doc=self.doc if self.doc else None,
1327
+ valid_range=valid_range,
1328
+ choices=self.choices,
1329
+ )
1330
+
1331
+ # Pattern validation (for strings)
1332
+ if self._compiled_pattern is not None:
1333
+ if not isinstance(value, str):
1334
+ raise ArgumentValidationError(
1335
+ f"Argument '{arg_name}' element {index} must be a string.",
1336
+ arg_name=self._name,
1337
+ position=display_pos,
1338
+ value=value,
1339
+ constraint=f"must be a string matching pattern '{self.pattern}'",
1340
+ doc=self.doc if self.doc else None,
1341
+ valid_range=valid_range,
1342
+ )
1343
+ if not self._compiled_pattern.match(value):
1344
+ raise ArgumentValidationError(
1345
+ f"Argument '{arg_name}' element {index} does not match pattern.",
1346
+ arg_name=self._name,
1347
+ position=display_pos,
1348
+ value=value,
1349
+ constraint=f"must match pattern '{self.pattern}'",
1350
+ doc=self.doc if self.doc else None,
1351
+ valid_range=valid_range,
1352
+ )
1353
+
1354
+ def format_error(self, message: str) -> str:
1355
+ """Format an error message with argument context.
1356
+
1357
+ Use this method when performing custom validation to produce
1358
+ error messages that include the argument's position and name.
1359
+
1360
+ Args:
1361
+ message: The error message describing what went wrong.
1362
+
1363
+ Returns:
1364
+ Formatted error message prefixed with argument context.
1365
+
1366
+ """
1367
+ # Use the attribute name if available
1368
+ name = self._name or str(self.position)
1369
+ return f"Argument '{name}': {message}"
1370
+
1371
+ def validate_type_bound(self, field_type: pa.DataType) -> None:
1372
+ """Validate that the field type satisfies the type bound predicate(s).
1373
+
1374
+ This method is called during function initialization for Arg[AnyArrow]
1375
+ arguments that have type_bound specified.
1376
+
1377
+ If multiple predicates are provided, uses OR logic (any match is valid).
1378
+
1379
+ Args:
1380
+ field_type: The Arrow type of the column to validate.
1381
+
1382
+ Raises:
1383
+ SchemaValidationError: If the type bound is not satisfied.
1384
+
1385
+ """
1386
+ from vgi.exceptions import SchemaValidationError
1387
+
1388
+ if self.type_bound is None:
1389
+ return
1390
+
1391
+ # Normalize to sequence
1392
+ if callable(self.type_bound):
1393
+ predicates: list[TypeBoundPredicate] = [self.type_bound]
1394
+ else:
1395
+ predicates = list(self.type_bound)
1396
+
1397
+ # OR logic: at least one predicate must pass
1398
+ if not any(predicate(field_type) for predicate in predicates):
1399
+ predicate_names = [getattr(p, "__name__", str(p)) for p in predicates]
1400
+ raise SchemaValidationError(
1401
+ self.format_error(f"column type {field_type} does not match any of: {', '.join(predicate_names)}")
1402
+ )
1403
+
1404
+ def __repr__(self) -> str:
1405
+ """Return a string representation of this Arg."""
1406
+ parts = [repr(self.position)]
1407
+
1408
+ if self.default is not _MISSING:
1409
+ parts.append(f"default={self.default!r}")
1410
+ if self.doc:
1411
+ parts.append(f"doc={self.doc!r}")
1412
+ if self.ge is not None:
1413
+ parts.append(f"ge={self.ge!r}")
1414
+ if self.le is not None:
1415
+ parts.append(f"le={self.le!r}")
1416
+ if self.gt is not None:
1417
+ parts.append(f"gt={self.gt!r}")
1418
+ if self.lt is not None:
1419
+ parts.append(f"lt={self.lt!r}")
1420
+ if self.choices is not None:
1421
+ parts.append(f"choices={self.choices!r}")
1422
+ if self.pattern is not None:
1423
+ parts.append(f"pattern={self.pattern!r}")
1424
+ if self.varargs:
1425
+ parts.append("varargs=True")
1426
+ if self.arrow_type is not None:
1427
+ parts.append(f"arrow_type={self.arrow_type!r}")
1428
+ if self.type_bound is not None:
1429
+ if callable(self.type_bound):
1430
+ name = getattr(self.type_bound, "__name__", str(self.type_bound))
1431
+ parts.append(f"type_bound={name}")
1432
+ else:
1433
+ names = [getattr(p, "__name__", str(p)) for p in self.type_bound]
1434
+ parts.append(f"type_bound=[{', '.join(names)}]")
1435
+ if self.const:
1436
+ parts.append("const=True")
1437
+ if self.is_any:
1438
+ parts.append("is_any=True")
1439
+
1440
+ return f"Arg({', '.join(parts)})"
1441
+
1442
+
1443
+ # =============================================================================
1444
+ # Param, ConstParam, Returns - Dataclasses for Scalar Function Annotations
1445
+ # =============================================================================
1446
+ #
1447
+ # These dataclasses follow the Pydantic v2 pattern: use inside Annotated[]
1448
+ # for native mypy support without # type: ignore comments.
1449
+ #
1450
+ # Example:
1451
+ # @classmethod
1452
+ # def compute(
1453
+ # cls,
1454
+ # column: Annotated[pa.Array, Param(pa.int64(), "Input column")],
1455
+ # factor: Annotated[int, ConstParam("Multiplication factor")],
1456
+ # ) -> Annotated[pa.Array, Returns(pa.int64())]:
1457
+ # return pc.multiply(column, factor)
1458
+ # =============================================================================
1459
+
1460
+
1461
+ @dataclass(frozen=True, slots=True)
1462
+ class Param:
1463
+ """Metadata for columnar parameters in compute() or class-level declarations.
1464
+
1465
+ Use with Annotated to declare parameters that receive pa.Array values
1466
+ at runtime. The type information is used for catalog registration and
1467
+ argument validation.
1468
+
1469
+ For ScalarFunction compute() methods, position is inferred from parameter order.
1470
+
1471
+ Args:
1472
+ position: Explicit column position (for class-level attributes).
1473
+ None means position is inferred from method signature order.
1474
+ arrow_type: The Arrow data type, Python type
1475
+ (int/str/float/bool/bytes), or None for AnyArrow (accepts any type).
1476
+ doc: Documentation string describing this parameter.
1477
+ type_bound: Type predicate(s) for validating input column types.
1478
+ Only meaningful when arrow_type is None (AnyArrow).
1479
+ varargs: If True, this parameter collects all remaining positional
1480
+ arguments as a list of arrays.
1481
+
1482
+ Example (ScalarFunction compute() - position inferred):
1483
+ class AddColumns(ScalarFunction):
1484
+ @classmethod
1485
+ def compute(
1486
+ cls,
1487
+ left: Annotated[pa.Array, Param(pa.int64(), "First value")],
1488
+ right: Annotated[pa.Array, Param(pa.int64(), "Second value")],
1489
+ ) -> Annotated[pa.Array, Returns(pa.int64())]:
1490
+ return pc.add(left, right)
1491
+
1492
+ Example (AnyArrow with type_bound):
1493
+ class Double(ScalarFunction):
1494
+ @classmethod
1495
+ def compute(
1496
+ cls,
1497
+ value: Annotated[pa.Array, Param(doc="Numeric value",
1498
+ type_bound=pa.types.is_numeric)],
1499
+ ) -> Annotated[pa.Array, Returns()]:
1500
+ return pc.multiply(value, 2)
1501
+
1502
+ """
1503
+
1504
+ # Keep arrow_type first for backwards compatibility with Param(pa.int64(), "doc")
1505
+ arrow_type: pa.DataType | type | None = None
1506
+ doc: str = ""
1507
+ type_bound: "TypeBoundPredicate | Sequence[TypeBoundPredicate] | None" = None
1508
+ varargs: bool = False
1509
+ position: int | None = None
1510
+
1511
+
1512
+ @dataclass(frozen=True, slots=True)
1513
+ class ConstParam:
1514
+ """Metadata for constant scalar parameters in compute().
1515
+
1516
+ Use with Annotated to declare parameters that receive constant (non-columnar)
1517
+ values known at planning time. The type is inferred from the Annotated first
1518
+ argument (e.g., `Annotated[int, ConstParam(...)]` infers pa.int64()).
1519
+
1520
+ Args:
1521
+ doc: Documentation string describing this parameter.
1522
+ arrow_type: Optional explicit Arrow type. If not provided, type is
1523
+ inferred from the Annotated first argument.
1524
+ position: Position in the argument list
1525
+ (optional for ScalarFunction where position is inferred from signature).
1526
+
1527
+ """
1528
+
1529
+ doc: str = ""
1530
+ arrow_type: pa.DataType | type | None = None
1531
+ # Position in the argument list
1532
+ position: int | None = None
1533
+ # Phase when this const param is needed (aggregate functions only).
1534
+ # "all" = every callback, "update" = only update, "finalize" = only finalize.
1535
+ phase: str = "all"
1536
+
1537
+
1538
+ @dataclass(frozen=True, slots=True)
1539
+ class Setting:
1540
+ """Metadata for settings parameter in compute().
1541
+
1542
+ Use with Annotated to declare parameters that receive setting values
1543
+ from the DuckDB session. Settings are string key-value pairs.
1544
+
1545
+ Args:
1546
+ key: The setting key name. If not provided, uses the parameter name.
1547
+
1548
+ """
1549
+
1550
+ key: str | None = None
1551
+
1552
+
1553
+ @dataclass(frozen=True, slots=True)
1554
+ class Secret:
1555
+ """Metadata for secrets parameter in compute() or on_bind().
1556
+
1557
+ Use with Annotated to declare parameters that receive secret values
1558
+ from the DuckDB SecretManager. Secrets contain multiple key-value pairs
1559
+ where keys are strings and values can be any DuckDB type.
1560
+
1561
+ Args:
1562
+ secret_type: The secret type to look up (e.g., "vgi_example", "s3").
1563
+ Required — C++ enforces type matching.
1564
+ name: Optional secret name for name-based lookup.
1565
+ scope: Optional static scope for pre-resolution (resolved before first bind call).
1566
+
1567
+ Examples:
1568
+ Secret("vgi_example") — unscoped lookup by type
1569
+ Secret("s3", name="my_cred") — type + name-based lookup
1570
+ Secret("s3", scope="s3://bucket/") — type + scope (pre-resolved)
1571
+ Secret("s3", name="my_cred", scope="s3://bucket/") — all three
1572
+
1573
+ """
1574
+
1575
+ secret_type: str
1576
+ name: str | None = None
1577
+ scope: str | None = None
1578
+
1579
+
1580
+ @dataclass(frozen=True, slots=True)
1581
+ class SecretLookupEntry(ArrowSerializableDataclass):
1582
+ """A request to look up a specific secret.
1583
+
1584
+ Used both in function metadata (static requirements from annotations)
1585
+ and in runtime requests (dynamic scoped lookups). Also used directly
1586
+ as the catalog-level secret requirement type (replacing the former
1587
+ ``CatalogSecretRequirement`` which had identical fields).
1588
+
1589
+ Extends ``ArrowSerializableDataclass`` so it can be serialized in
1590
+ catalog ``FunctionInfo`` payloads.
1591
+
1592
+ secret_type is required — C++ enforces type matching.
1593
+
1594
+ Supported lookup patterns:
1595
+ - By type only: SecretLookupEntry(secret_type="s3")
1596
+ - By type + scope: SecretLookupEntry(secret_type="s3", scope="s3://bucket/")
1597
+ - By type + name: SecretLookupEntry(secret_type="s3", secret_name="my_cred")
1598
+ - By type + scope + name: all three fields set
1599
+ """
1600
+
1601
+ secret_type: str
1602
+ scope: str | None = None
1603
+ secret_name: str | None = None
1604
+
1605
+ def to_dict(self) -> dict[str, str | None]:
1606
+ """Convert to dictionary for serialization."""
1607
+ return {
1608
+ "secret_type": self.secret_type,
1609
+ "secret_name": self.secret_name,
1610
+ "scope": self.scope,
1611
+ }
1612
+
1613
+ @staticmethod
1614
+ def from_dict(d: dict[str, Any]) -> "SecretLookupEntry":
1615
+ """Create from dictionary."""
1616
+ return SecretLookupEntry(
1617
+ secret_type=d["secret_type"],
1618
+ secret_name=d.get("secret_name"),
1619
+ scope=d.get("scope"),
1620
+ )
1621
+
1622
+
1623
+ def _extract_setting_secret_params(
1624
+ method: Any,
1625
+ ) -> tuple[dict[str, str], dict[str, Secret]]:
1626
+ """Extract Setting/Secret annotations from a method signature.
1627
+
1628
+ Parses the method's type hints to find parameters annotated with
1629
+ Setting() or Secret(), returning mappings from parameter name to key/Secret.
1630
+
1631
+ Handles ``from __future__ import annotations`` (string annotations)
1632
+ using an eval-with-namespace fallback.
1633
+
1634
+ Args:
1635
+ method: The method to inspect (e.g., compute, on_bind).
1636
+
1637
+ Returns:
1638
+ Tuple of (setting_params, secret_params) where:
1639
+ - setting_params: dict mapping ``param_name -> setting_key``
1640
+ - secret_params: dict mapping ``param_name -> Secret`` instance
1641
+
1642
+ """
1643
+ import contextlib
1644
+ import inspect
1645
+ from typing import get_type_hints
1646
+
1647
+ sig = inspect.signature(method)
1648
+
1649
+ # Try to get type hints (handles PEP 563 string annotations)
1650
+ hints: dict[str, Any] = {}
1651
+ with contextlib.suppress(Exception):
1652
+ hints = get_type_hints(method, include_extras=True)
1653
+
1654
+ # Fallback for `from __future__ import annotations`
1655
+ if not hints:
1656
+ import pyarrow as pa
1657
+
1658
+ raw_annotations = getattr(method, "__annotations__", {})
1659
+ from typing import Annotated
1660
+
1661
+ # Create a mock pa module with subscriptable Scalar for eval
1662
+ # (pa.Scalar[Any] isn't subscriptable in PyArrow at runtime)
1663
+ class _MockScalar:
1664
+ def __class_getitem__(cls, _item: Any) -> Any:
1665
+ return Any
1666
+
1667
+ class _MockPa:
1668
+ Scalar = _MockScalar
1669
+
1670
+ def __getattr__(self, attr_name: str) -> Any:
1671
+ return getattr(pa, attr_name)
1672
+
1673
+ eval_namespace = {
1674
+ **getattr(method, "__globals__", {}),
1675
+ "Annotated": Annotated,
1676
+ "Setting": Setting,
1677
+ "Secret": Secret,
1678
+ "pa": _MockPa(),
1679
+ }
1680
+ for name, annotation in raw_annotations.items():
1681
+ if isinstance(annotation, str):
1682
+ with contextlib.suppress(Exception):
1683
+ hints[name] = eval(annotation, eval_namespace) # noqa: S307
1684
+ else:
1685
+ hints[name] = annotation
1686
+
1687
+ setting_params: dict[str, str] = {}
1688
+ secret_params: dict[str, Secret] = {}
1689
+
1690
+ for name in sig.parameters:
1691
+ if name in ("self", "cls"):
1692
+ continue
1693
+
1694
+ hint = hints.get(name)
1695
+ if hint is None or not hasattr(hint, "__metadata__"):
1696
+ continue
1697
+
1698
+ for meta in hint.__metadata__:
1699
+ if isinstance(meta, Setting):
1700
+ setting_key = meta.key if meta.key is not None else name
1701
+ setting_params[name] = setting_key
1702
+ break
1703
+ if isinstance(meta, Secret):
1704
+ secret_params[name] = meta
1705
+ break
1706
+
1707
+ return setting_params, secret_params
1708
+
1709
+
1710
+ @dataclass(frozen=True, slots=True)
1711
+ class Auth:
1712
+ """Metadata for auth context parameter in compute().
1713
+
1714
+ Use with Annotated to declare a parameter that receives the AuthContext
1715
+ for the current request. Returns AuthContext.anonymous() when no
1716
+ authentication is configured (including stdio transport).
1717
+
1718
+ """
1719
+
1720
+
1721
+ @dataclass(frozen=True, slots=True)
1722
+ class OutputLength:
1723
+ """Metadata for output length parameter in compute().
1724
+
1725
+ Use with Annotated to declare a parameter that receives the number of rows
1726
+ in the input batch. This is useful for scalar functions that don't take
1727
+ any column arguments but need to know how many output values to produce.
1728
+
1729
+ """
1730
+
1731
+ pass
1732
+
1733
+
1734
+ @dataclass(frozen=True, slots=True)
1735
+ class Returns:
1736
+ """Metadata for compute() return type.
1737
+
1738
+ Use with Annotated to declare the output Arrow type for catalog registration.
1739
+ The annotation indicates that compute() returns a pa.Array of the specified type.
1740
+
1741
+ Args:
1742
+ arrow_type: The Arrow data type of the output, or None for AnyArrow
1743
+ (dynamic output type determined at bind time).
1744
+
1745
+ """
1746
+
1747
+ arrow_type: pa.DataType | None = None