vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/argument_spec.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Arrow-based serialization of function argument specifications.
|
|
4
|
+
|
|
5
|
+
This module provides classes and functions for serializing function argument
|
|
6
|
+
specifications to Apache Arrow schemas. This enables functions to describe
|
|
7
|
+
their argument signatures (types, positions, special markers) in a format
|
|
8
|
+
that can be transmitted over IPC and understood by DuckDB for function
|
|
9
|
+
registration.
|
|
10
|
+
|
|
11
|
+
The serialization uses a single Arrow schema where:
|
|
12
|
+
- Positional arguments come first (field order = position index)
|
|
13
|
+
- Named arguments follow (marked with metadata)
|
|
14
|
+
- Special types (TableInput, AnyArrow, varargs) use field metadata markers
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import warnings
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from typing import Annotated, Any, get_args, get_origin, get_type_hints
|
|
21
|
+
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
|
|
24
|
+
from vgi.arguments import PYTHON_TO_ARROW, AnyArrow, AnyArrowValue, Arg, TableInput
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"ArgumentSpec",
|
|
28
|
+
"argument_specs_to_schema",
|
|
29
|
+
"extract_argument_specs",
|
|
30
|
+
"schema_to_argument_specs",
|
|
31
|
+
# Metadata constants for parsing schemas
|
|
32
|
+
"VGI_ARG_KEY",
|
|
33
|
+
"VGI_ARG_NAMED",
|
|
34
|
+
"VGI_TYPE_KEY",
|
|
35
|
+
"VGI_TYPE_TABLE",
|
|
36
|
+
"VGI_TYPE_ANY",
|
|
37
|
+
"VGI_VARARGS_KEY",
|
|
38
|
+
"VGI_VARARGS_TRUE",
|
|
39
|
+
"VGI_CONST_KEY",
|
|
40
|
+
"VGI_CONST_TRUE",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# =============================================================================
|
|
44
|
+
# Metadata Keys
|
|
45
|
+
# =============================================================================
|
|
46
|
+
|
|
47
|
+
# Key indicating a named argument (not positional)
|
|
48
|
+
VGI_ARG_KEY = b"vgi_arg"
|
|
49
|
+
VGI_ARG_NAMED = b"named"
|
|
50
|
+
|
|
51
|
+
# Key indicating special argument types
|
|
52
|
+
VGI_TYPE_KEY = b"vgi_type"
|
|
53
|
+
VGI_TYPE_TABLE = b"table"
|
|
54
|
+
VGI_TYPE_ANY = b"any"
|
|
55
|
+
|
|
56
|
+
# Key indicating varargs (collects remaining positional arguments)
|
|
57
|
+
VGI_VARARGS_KEY = b"vgi_varargs"
|
|
58
|
+
VGI_VARARGS_TRUE = b"true"
|
|
59
|
+
|
|
60
|
+
# Key indicating constant-folded argument (scalar value, not array)
|
|
61
|
+
VGI_CONST_KEY = b"vgi_const"
|
|
62
|
+
VGI_CONST_TRUE = b"true"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _argument_spec_sort_key(spec: "ArgumentSpec") -> tuple[int, int | str]:
|
|
66
|
+
"""Sort key: positional first (by index), then named (alphabetically)."""
|
|
67
|
+
if isinstance(spec.position, int):
|
|
68
|
+
return (0, spec.position)
|
|
69
|
+
return (1, spec.position)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# =============================================================================
|
|
73
|
+
# ArgumentSpec Dataclass
|
|
74
|
+
# =============================================================================
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(frozen=True, slots=True)
|
|
78
|
+
class ArgumentSpec:
|
|
79
|
+
"""Specification for a single function argument.
|
|
80
|
+
|
|
81
|
+
This represents one argument in a function's signature, capturing:
|
|
82
|
+
- The argument's name and position (positional index or named key)
|
|
83
|
+
- The exact Arrow data type
|
|
84
|
+
- Special markers for table input, any-type, and varargs
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
name: Python attribute name for the argument.
|
|
88
|
+
position: Positional index (int) for positional args, or the named key
|
|
89
|
+
(str) for named arguments.
|
|
90
|
+
arrow_type: The Arrow data type. Use pa.null() for TableInput and
|
|
91
|
+
AnyArrow types.
|
|
92
|
+
is_table_input: True if this argument receives streaming table input
|
|
93
|
+
(Arg[TableInput]).
|
|
94
|
+
is_any_type: True if this argument accepts any Arrow type
|
|
95
|
+
(Arg[AnyArrow]).
|
|
96
|
+
is_varargs: True if this argument collects all remaining positional
|
|
97
|
+
arguments (varargs=True).
|
|
98
|
+
is_const: True if this argument is constant-folded (ConstParam).
|
|
99
|
+
Constant arguments are scalar values known at planning time,
|
|
100
|
+
rather than columnar data processed at runtime.
|
|
101
|
+
|
|
102
|
+
Note:
|
|
103
|
+
For named arguments, the Python attribute name (``name``) and the SQL
|
|
104
|
+
key (``position``) are assumed to be identical. This is the standard
|
|
105
|
+
convention::
|
|
106
|
+
|
|
107
|
+
format = Arg[str]("format") # name="format", position="format"
|
|
108
|
+
|
|
109
|
+
If they differ, the ``position`` value will be lost during schema
|
|
110
|
+
round-trip serialization, as only ``name`` is stored in the Arrow
|
|
111
|
+
schema field name.
|
|
112
|
+
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
name: str
|
|
116
|
+
position: int | str
|
|
117
|
+
arrow_type: pa.DataType
|
|
118
|
+
is_table_input: bool = False
|
|
119
|
+
is_any_type: bool = False
|
|
120
|
+
is_varargs: bool = False
|
|
121
|
+
is_const: bool = False
|
|
122
|
+
|
|
123
|
+
def __repr__(self) -> str:
|
|
124
|
+
"""Return concise repr showing key attributes."""
|
|
125
|
+
# Build position display: integer or quoted string
|
|
126
|
+
pos = self.position if isinstance(self.position, int) else f'"{self.position}"'
|
|
127
|
+
|
|
128
|
+
# Build flags list (only show if True)
|
|
129
|
+
flags = []
|
|
130
|
+
if self.is_table_input:
|
|
131
|
+
flags.append("table_input")
|
|
132
|
+
if self.is_any_type:
|
|
133
|
+
flags.append("any_type")
|
|
134
|
+
if self.is_varargs:
|
|
135
|
+
flags.append("varargs")
|
|
136
|
+
if self.is_const:
|
|
137
|
+
flags.append("const")
|
|
138
|
+
|
|
139
|
+
flags_str = f", flags=[{', '.join(flags)}]" if flags else ""
|
|
140
|
+
|
|
141
|
+
return f'ArgumentSpec(name="{self.name}", pos={pos}, type={self.arrow_type}{flags_str})'
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# =============================================================================
|
|
145
|
+
# Serialization Functions
|
|
146
|
+
# =============================================================================
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def argument_specs_to_schema(specs: Sequence[ArgumentSpec]) -> pa.Schema:
|
|
150
|
+
"""Convert ArgumentSpecs to a single Arrow schema.
|
|
151
|
+
|
|
152
|
+
The schema encodes the argument specifications as follows:
|
|
153
|
+
- Positional arguments come first, in order (field index = position index)
|
|
154
|
+
- Named arguments follow, each with metadata {b"vgi_arg": b"named"}
|
|
155
|
+
- Special types are indicated via metadata:
|
|
156
|
+
- TableInput: {b"vgi_type": b"table"}
|
|
157
|
+
- AnyArrow: {b"vgi_type": b"any"}
|
|
158
|
+
- varargs: {b"vgi_varargs": b"true"}
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
specs: Sequence of ArgumentSpec objects to serialize.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Arrow schema with one field per argument.
|
|
165
|
+
|
|
166
|
+
"""
|
|
167
|
+
sorted_specs = sorted(specs, key=_argument_spec_sort_key)
|
|
168
|
+
|
|
169
|
+
# Validate contiguous positional indices
|
|
170
|
+
positional_indices = [spec.position for spec in sorted_specs if isinstance(spec.position, int)]
|
|
171
|
+
if positional_indices:
|
|
172
|
+
expected = list(range(len(positional_indices)))
|
|
173
|
+
if positional_indices != expected:
|
|
174
|
+
warnings.warn(
|
|
175
|
+
f"Positional argument indices are not contiguous starting from 0. "
|
|
176
|
+
f"Found: {positional_indices}, expected: {expected}. "
|
|
177
|
+
f"This may indicate a bug.",
|
|
178
|
+
stacklevel=2,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
fields: list[pa.Field[Any]] = []
|
|
182
|
+
for spec in sorted_specs:
|
|
183
|
+
# Build metadata dict
|
|
184
|
+
metadata: dict[bytes, bytes] = {}
|
|
185
|
+
|
|
186
|
+
if isinstance(spec.position, str):
|
|
187
|
+
metadata[VGI_ARG_KEY] = VGI_ARG_NAMED
|
|
188
|
+
|
|
189
|
+
if spec.is_table_input:
|
|
190
|
+
metadata[VGI_TYPE_KEY] = VGI_TYPE_TABLE
|
|
191
|
+
elif spec.is_any_type:
|
|
192
|
+
metadata[VGI_TYPE_KEY] = VGI_TYPE_ANY
|
|
193
|
+
|
|
194
|
+
if spec.is_varargs:
|
|
195
|
+
metadata[VGI_VARARGS_KEY] = VGI_VARARGS_TRUE
|
|
196
|
+
|
|
197
|
+
if spec.is_const:
|
|
198
|
+
metadata[VGI_CONST_KEY] = VGI_CONST_TRUE
|
|
199
|
+
|
|
200
|
+
# Create field with or without metadata
|
|
201
|
+
field = pa.field(
|
|
202
|
+
spec.name,
|
|
203
|
+
spec.arrow_type,
|
|
204
|
+
metadata=metadata if metadata else None,
|
|
205
|
+
)
|
|
206
|
+
fields.append(field)
|
|
207
|
+
|
|
208
|
+
return pa.schema(fields)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def schema_to_argument_specs(schema: pa.Schema) -> list[ArgumentSpec]:
|
|
212
|
+
"""Convert Arrow schema back to ArgumentSpecs.
|
|
213
|
+
|
|
214
|
+
Parses the schema fields and their metadata to reconstruct the original
|
|
215
|
+
ArgumentSpec objects.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
schema: Arrow schema with argument fields.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
List of ArgumentSpec objects in schema field order.
|
|
222
|
+
|
|
223
|
+
"""
|
|
224
|
+
specs: list[ArgumentSpec] = []
|
|
225
|
+
position_index = 0
|
|
226
|
+
|
|
227
|
+
for field in schema:
|
|
228
|
+
metadata = field.metadata or {}
|
|
229
|
+
|
|
230
|
+
# Determine position
|
|
231
|
+
is_named = metadata.get(VGI_ARG_KEY) == VGI_ARG_NAMED
|
|
232
|
+
if is_named:
|
|
233
|
+
position: int | str = field.name
|
|
234
|
+
else:
|
|
235
|
+
position = position_index
|
|
236
|
+
position_index += 1
|
|
237
|
+
|
|
238
|
+
# Check special type markers
|
|
239
|
+
vgi_type = metadata.get(VGI_TYPE_KEY)
|
|
240
|
+
is_table_input = vgi_type == VGI_TYPE_TABLE
|
|
241
|
+
is_any_type = vgi_type == VGI_TYPE_ANY
|
|
242
|
+
|
|
243
|
+
# Check varargs
|
|
244
|
+
is_varargs = metadata.get(VGI_VARARGS_KEY) == VGI_VARARGS_TRUE
|
|
245
|
+
|
|
246
|
+
# Check const
|
|
247
|
+
is_const = metadata.get(VGI_CONST_KEY) == VGI_CONST_TRUE
|
|
248
|
+
|
|
249
|
+
specs.append(
|
|
250
|
+
ArgumentSpec(
|
|
251
|
+
name=field.name,
|
|
252
|
+
position=position,
|
|
253
|
+
arrow_type=field.type,
|
|
254
|
+
is_table_input=is_table_input,
|
|
255
|
+
is_any_type=is_any_type,
|
|
256
|
+
is_varargs=is_varargs,
|
|
257
|
+
is_const=is_const,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return specs
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# =============================================================================
|
|
265
|
+
# Extraction from Function Classes
|
|
266
|
+
# =============================================================================
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def extract_argument_specs(
|
|
270
|
+
cls: type,
|
|
271
|
+
) -> list[ArgumentSpec]:
|
|
272
|
+
"""Extract ArgumentSpecs from a function class with Arg descriptors.
|
|
273
|
+
|
|
274
|
+
Walks the class hierarchy to find all Arg descriptors and creates
|
|
275
|
+
ArgumentSpec objects with Arrow types determined by:
|
|
276
|
+
1. Explicit arrow_type on Arg (highest priority)
|
|
277
|
+
2. Type annotation with PYTHON_TO_ARROW mapping
|
|
278
|
+
3. Default to pa.null() with warning for unknown types
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
cls: Function class with Arg descriptors.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
List of ArgumentSpec objects, sorted by position (positional first,
|
|
285
|
+
then named).
|
|
286
|
+
|
|
287
|
+
"""
|
|
288
|
+
specs: list[ArgumentSpec] = []
|
|
289
|
+
seen_names: set[str] = set()
|
|
290
|
+
|
|
291
|
+
# Get type hints for type inference and detecting TableInput/AnyArrow
|
|
292
|
+
try:
|
|
293
|
+
hints = get_type_hints(cls)
|
|
294
|
+
except (NameError, AttributeError):
|
|
295
|
+
hints = {}
|
|
296
|
+
|
|
297
|
+
# Check for new Param/ConstParam API (ScalarFunction subclasses)
|
|
298
|
+
# These are stored in _compute_params and _const_params class attributes
|
|
299
|
+
compute_params: dict[str, Arg[Any]] = getattr(cls, "_compute_params", {})
|
|
300
|
+
const_params: dict[str, Arg[Any]] = getattr(cls, "_const_params", {})
|
|
301
|
+
|
|
302
|
+
for param_name, param_arg in compute_params.items():
|
|
303
|
+
seen_names.add(param_name)
|
|
304
|
+
# Use arrow_type from Param() which stores the pa.DataType
|
|
305
|
+
param_arrow_type = param_arg.arrow_type if param_arg.arrow_type is not None else pa.null()
|
|
306
|
+
specs.append(
|
|
307
|
+
ArgumentSpec(
|
|
308
|
+
name=param_name,
|
|
309
|
+
position=param_arg.position,
|
|
310
|
+
arrow_type=param_arrow_type,
|
|
311
|
+
is_table_input=False,
|
|
312
|
+
is_any_type=param_arg.is_any,
|
|
313
|
+
is_varargs=param_arg.varargs,
|
|
314
|
+
is_const=False,
|
|
315
|
+
)
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
for const_name, const_arg in const_params.items():
|
|
319
|
+
seen_names.add(const_name)
|
|
320
|
+
# ConstParam stores arrow_type from the Python type mapping
|
|
321
|
+
const_arrow_type = const_arg.arrow_type if const_arg.arrow_type is not None else pa.null()
|
|
322
|
+
specs.append(
|
|
323
|
+
ArgumentSpec(
|
|
324
|
+
name=const_name,
|
|
325
|
+
position=const_arg.position,
|
|
326
|
+
arrow_type=const_arrow_type,
|
|
327
|
+
is_table_input=False,
|
|
328
|
+
is_any_type=const_arg.is_any,
|
|
329
|
+
is_varargs=const_arg.varargs,
|
|
330
|
+
is_const=True,
|
|
331
|
+
)
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Check for FunctionArguments dataclass (typed generic pattern)
|
|
335
|
+
# e.g., class MyFunc(TableFunctionGenerator[MyArgs]):
|
|
336
|
+
# where MyArgs has fields like: count: Annotated[int, Arg(0, doc="...")]
|
|
337
|
+
args_class = getattr(cls, "FunctionArguments", None)
|
|
338
|
+
if args_class is not None:
|
|
339
|
+
try:
|
|
340
|
+
args_hints = get_type_hints(args_class, include_extras=True)
|
|
341
|
+
except (NameError, AttributeError):
|
|
342
|
+
args_hints = {}
|
|
343
|
+
|
|
344
|
+
for field_name, field_hint in args_hints.items():
|
|
345
|
+
if field_name.startswith("_") or field_name in seen_names:
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
if get_origin(field_hint) is not Annotated:
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
# Extract Arg from Annotated metadata
|
|
352
|
+
type_args = get_args(field_hint)
|
|
353
|
+
base_type = type_args[0]
|
|
354
|
+
arg_instance: Arg[Any] | None = None
|
|
355
|
+
for meta in type_args[1:]:
|
|
356
|
+
if isinstance(meta, Arg):
|
|
357
|
+
arg_instance = meta
|
|
358
|
+
break
|
|
359
|
+
|
|
360
|
+
if arg_instance is None:
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
seen_names.add(field_name)
|
|
364
|
+
|
|
365
|
+
# For varargs, unwrap list[T] to get the element type T for inference
|
|
366
|
+
infer_type = base_type
|
|
367
|
+
if arg_instance.varargs and get_origin(base_type) is list:
|
|
368
|
+
type_args = get_args(base_type)
|
|
369
|
+
if type_args:
|
|
370
|
+
infer_type = type_args[0]
|
|
371
|
+
|
|
372
|
+
is_table_input = infer_type is TableInput
|
|
373
|
+
is_any_type = infer_type is AnyArrow or infer_type is AnyArrowValue
|
|
374
|
+
|
|
375
|
+
# Determine Arrow type
|
|
376
|
+
arrow_type: pa.DataType
|
|
377
|
+
if arg_instance.arrow_type is not None:
|
|
378
|
+
arrow_type = arg_instance.arrow_type
|
|
379
|
+
elif is_table_input or is_any_type:
|
|
380
|
+
arrow_type = pa.null()
|
|
381
|
+
elif infer_type in PYTHON_TO_ARROW:
|
|
382
|
+
arrow_type = PYTHON_TO_ARROW[infer_type]
|
|
383
|
+
else:
|
|
384
|
+
arrow_type = pa.null()
|
|
385
|
+
|
|
386
|
+
specs.append(
|
|
387
|
+
ArgumentSpec(
|
|
388
|
+
name=field_name,
|
|
389
|
+
position=arg_instance.position,
|
|
390
|
+
arrow_type=arrow_type,
|
|
391
|
+
is_table_input=is_table_input,
|
|
392
|
+
is_any_type=is_any_type,
|
|
393
|
+
is_varargs=arg_instance.varargs,
|
|
394
|
+
is_const=getattr(arg_instance, "const", False),
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Walk MRO to find all Arg descriptors (legacy API)
|
|
399
|
+
for klass in cls.__mro__:
|
|
400
|
+
if klass is object:
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
for attr_name, attr_value in vars(klass).items():
|
|
404
|
+
if attr_name.startswith("_"):
|
|
405
|
+
continue
|
|
406
|
+
if attr_name in seen_names:
|
|
407
|
+
continue
|
|
408
|
+
|
|
409
|
+
if isinstance(attr_value, Arg):
|
|
410
|
+
seen_names.add(attr_name)
|
|
411
|
+
arg_legacy: Arg[Any] = attr_value
|
|
412
|
+
|
|
413
|
+
# Check for special types (AnyArrow, TableInput)
|
|
414
|
+
# Priority: Arg subscript type (Arg[AnyArrow]) > class type hint
|
|
415
|
+
# Also check _returns_any_arrow_value for Annotated[AnyArrowValue, ...]
|
|
416
|
+
hint = hints.get(attr_name)
|
|
417
|
+
type_param = getattr(arg_legacy, "_type_param", None)
|
|
418
|
+
is_table_input = type_param is TableInput or hint is TableInput
|
|
419
|
+
is_any_type = (
|
|
420
|
+
type_param is AnyArrow
|
|
421
|
+
or hint is AnyArrow
|
|
422
|
+
or hint is AnyArrowValue
|
|
423
|
+
or getattr(arg_legacy, "_returns_any_arrow_value", False)
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Determine Arrow type using priority order:
|
|
427
|
+
# 1. Explicit arrow_type on Arg
|
|
428
|
+
# 2. Type parameter from Arg[type] subscript (e.g., Arg[str])
|
|
429
|
+
# 3. Type hint with PYTHON_TO_ARROW mapping
|
|
430
|
+
# 4. Default to pa.null() with warning
|
|
431
|
+
legacy_arrow_type: pa.DataType
|
|
432
|
+
if arg_legacy.arrow_type is not None:
|
|
433
|
+
legacy_arrow_type = arg_legacy.arrow_type
|
|
434
|
+
elif is_table_input or is_any_type:
|
|
435
|
+
legacy_arrow_type = pa.null()
|
|
436
|
+
elif (
|
|
437
|
+
hasattr(arg_legacy, "_type_param")
|
|
438
|
+
and arg_legacy._type_param is not None
|
|
439
|
+
and arg_legacy._type_param in PYTHON_TO_ARROW
|
|
440
|
+
):
|
|
441
|
+
# Use type from Arg[type] subscript
|
|
442
|
+
legacy_arrow_type = PYTHON_TO_ARROW[arg_legacy._type_param]
|
|
443
|
+
elif hint is not None and hint in PYTHON_TO_ARROW:
|
|
444
|
+
legacy_arrow_type = PYTHON_TO_ARROW[hint]
|
|
445
|
+
else:
|
|
446
|
+
warnings.warn(
|
|
447
|
+
f"Cannot determine Arrow type for argument '{attr_name}'. "
|
|
448
|
+
f"Add explicit arrow_type to Arg or add type annotation. "
|
|
449
|
+
f"Defaulting to pa.null().",
|
|
450
|
+
stacklevel=2,
|
|
451
|
+
)
|
|
452
|
+
legacy_arrow_type = pa.null()
|
|
453
|
+
|
|
454
|
+
# Check varargs flag
|
|
455
|
+
is_varargs = arg_legacy.varargs
|
|
456
|
+
|
|
457
|
+
# Check const flag
|
|
458
|
+
is_const = getattr(arg_legacy, "const", False)
|
|
459
|
+
|
|
460
|
+
specs.append(
|
|
461
|
+
ArgumentSpec(
|
|
462
|
+
name=attr_name,
|
|
463
|
+
position=arg_legacy.position,
|
|
464
|
+
arrow_type=legacy_arrow_type,
|
|
465
|
+
is_table_input=is_table_input,
|
|
466
|
+
is_any_type=is_any_type,
|
|
467
|
+
is_varargs=is_varargs,
|
|
468
|
+
is_const=is_const,
|
|
469
|
+
)
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
return sorted(specs, key=_argument_spec_sort_key)
|