vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""make_pairs_*, repeat_value_*, and constant_columns generators."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Annotated, Any, ClassVar
|
|
9
|
+
|
|
10
|
+
import pyarrow as pa
|
|
11
|
+
from vgi_rpc import ArrowSerializableDataclass, Transient
|
|
12
|
+
from vgi_rpc.rpc import OutputCollector
|
|
13
|
+
|
|
14
|
+
from vgi._test_fixtures.table._common import (
|
|
15
|
+
_cardinality_from_count,
|
|
16
|
+
)
|
|
17
|
+
from vgi.arguments import Arg
|
|
18
|
+
from vgi.invocation import BindResponse
|
|
19
|
+
from vgi.metadata import FunctionExample
|
|
20
|
+
from vgi.schema_utils import schema
|
|
21
|
+
from vgi.table_function import (
|
|
22
|
+
BindParams,
|
|
23
|
+
ProcessParams,
|
|
24
|
+
TableFunctionGenerator,
|
|
25
|
+
bind_fixed_schema,
|
|
26
|
+
init_single_worker,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(slots=True, frozen=True)
|
|
31
|
+
class ConstantColumnsFunctionArguments:
|
|
32
|
+
"""Arguments for ConstantColumnsFunction."""
|
|
33
|
+
|
|
34
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
|
|
35
|
+
values: Annotated[
|
|
36
|
+
tuple[Any, ...],
|
|
37
|
+
Arg(
|
|
38
|
+
1,
|
|
39
|
+
varargs=True,
|
|
40
|
+
doc="Values to fill each column (at least one required)",
|
|
41
|
+
),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(kw_only=True)
|
|
46
|
+
class ConstantColumnsState(ArrowSerializableDataclass):
|
|
47
|
+
"""Mutable state for ConstantColumnsFunction."""
|
|
48
|
+
|
|
49
|
+
remaining: int
|
|
50
|
+
full_batch: Annotated[pa.RecordBatch | None, Transient()] = field(repr=False, default=None)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@init_single_worker
|
|
54
|
+
@_cardinality_from_count
|
|
55
|
+
class ConstantColumnsFunction(TableFunctionGenerator[ConstantColumnsFunctionArguments, ConstantColumnsState]):
|
|
56
|
+
"""Generates a table with constant values in each column based on varargs.
|
|
57
|
+
|
|
58
|
+
USE CASE
|
|
59
|
+
--------
|
|
60
|
+
Demonstrates varargs with AnyArrow type where the output schema is
|
|
61
|
+
determined by the types of the values provided. Each vararg value
|
|
62
|
+
becomes a column filled with that constant value for all rows.
|
|
63
|
+
|
|
64
|
+
This shows how varargs can accept mixed types and produce a dynamic
|
|
65
|
+
output schema based on the argument types.
|
|
66
|
+
|
|
67
|
+
SCHEMA
|
|
68
|
+
------
|
|
69
|
+
Output schema is dynamic based on the types of provided values.
|
|
70
|
+
Column names are auto-generated as col_0, col_1, col_2, etc.
|
|
71
|
+
|
|
72
|
+
Example: constant_columns(3, 42, 'hello', 3.14)
|
|
73
|
+
Output schema: {"col_0": int64, "col_1": string, "col_2": double}
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
-------
|
|
77
|
+
SELECT * FROM constant_columns(3, 42, 'hello')
|
|
78
|
+
Returns: [{"col_0": 42, "col_1": "hello"},
|
|
79
|
+
{"col_0": 42, "col_1": "hello"},
|
|
80
|
+
{"col_0": 42, "col_1": "hello"}]
|
|
81
|
+
|
|
82
|
+
SELECT * FROM constant_columns(2, 1, 2, 3, 'apple')
|
|
83
|
+
Returns: [{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"},
|
|
84
|
+
{"col_0": 1, "col_1": 2, "col_2": 3, "col_3": "apple"}]
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
class Meta:
|
|
89
|
+
"""Metadata for ConstantColumnsFunction."""
|
|
90
|
+
|
|
91
|
+
name = "constant_columns"
|
|
92
|
+
description = "Generates rows with constant values from varargs"
|
|
93
|
+
categories = ["generator", "utility"]
|
|
94
|
+
examples = [
|
|
95
|
+
FunctionExample(
|
|
96
|
+
sql="SELECT * FROM constant_columns(5, 42, 'hello')",
|
|
97
|
+
description="Generate 5 rows with columns containing 42 and 'hello'",
|
|
98
|
+
),
|
|
99
|
+
FunctionExample(
|
|
100
|
+
sql="SELECT * FROM constant_columns(3, 1, 2, 3, 'test')",
|
|
101
|
+
description="Generate 3 rows with 4 columns of mixed types",
|
|
102
|
+
),
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
BATCH_SIZE: ClassVar[int] = 2048
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def on_bind(cls, params: BindParams[ConstantColumnsFunctionArguments]) -> BindResponse:
|
|
109
|
+
"""Return output schema with one column per vararg, typed by value."""
|
|
110
|
+
return BindResponse(output_schema=schema({f"col_{i}": v.type for i, v in enumerate(params.args.values)}))
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def initial_state(cls, params: ProcessParams[ConstantColumnsFunctionArguments]) -> ConstantColumnsState:
|
|
114
|
+
"""Create initial state with pre-built full batch."""
|
|
115
|
+
arrays = [pa.repeat(scalar, cls.BATCH_SIZE) for scalar in params.args.values]
|
|
116
|
+
full_batch = pa.RecordBatch.from_arrays(arrays, schema=params.output_schema)
|
|
117
|
+
return ConstantColumnsState(remaining=params.args.count, full_batch=full_batch)
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def process(
|
|
121
|
+
cls,
|
|
122
|
+
params: ProcessParams[ConstantColumnsFunctionArguments],
|
|
123
|
+
state: ConstantColumnsState,
|
|
124
|
+
out: OutputCollector,
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Generate rows with constant values in each column."""
|
|
127
|
+
if state.remaining <= 0:
|
|
128
|
+
out.finish()
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
if state.full_batch is None:
|
|
132
|
+
arrays = [pa.repeat(scalar, cls.BATCH_SIZE) for scalar in params.args.values]
|
|
133
|
+
state.full_batch = pa.RecordBatch.from_arrays(arrays, schema=params.output_schema)
|
|
134
|
+
if state.remaining >= cls.BATCH_SIZE:
|
|
135
|
+
out.emit(state.full_batch)
|
|
136
|
+
state.remaining -= cls.BATCH_SIZE
|
|
137
|
+
else:
|
|
138
|
+
out.emit(state.full_batch.slice(0, state.remaining))
|
|
139
|
+
state.remaining = 0
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ============================================================================
|
|
143
|
+
|
|
144
|
+
MAKE_PAIRS_INT_SCHEMA = schema(a=pa.int64(), b=pa.int64())
|
|
145
|
+
MAKE_PAIRS_STR_SCHEMA = schema(a=pa.string(), b=pa.string())
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass(kw_only=True)
|
|
149
|
+
class MakePairsIntArgs:
|
|
150
|
+
"""Arguments for integer make_pairs."""
|
|
151
|
+
|
|
152
|
+
start: Annotated[int, Arg(0, doc="Start value")]
|
|
153
|
+
stop: Annotated[int, Arg(1, doc="Stop value")]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass(kw_only=True)
|
|
157
|
+
class MakePairsStrArgs:
|
|
158
|
+
"""Arguments for string make_pairs."""
|
|
159
|
+
|
|
160
|
+
prefix: Annotated[str, Arg(0, doc="Prefix for column a")]
|
|
161
|
+
suffix: Annotated[str, Arg(1, doc="Suffix for column b")]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass(kw_only=True)
|
|
165
|
+
class MakePairsIntState(ArrowSerializableDataclass):
|
|
166
|
+
"""State for integer make_pairs."""
|
|
167
|
+
|
|
168
|
+
a_vals: list[int] = field(default_factory=list)
|
|
169
|
+
b_vals: list[int] = field(default_factory=list)
|
|
170
|
+
done: bool = False
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass(kw_only=True)
|
|
174
|
+
class MakePairsStrState(ArrowSerializableDataclass):
|
|
175
|
+
"""State for string make_pairs."""
|
|
176
|
+
|
|
177
|
+
a_vals: list[str] = field(default_factory=list)
|
|
178
|
+
b_vals: list[str] = field(default_factory=list)
|
|
179
|
+
done: bool = False
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@init_single_worker
|
|
183
|
+
@bind_fixed_schema
|
|
184
|
+
class MakePairsIntFunction(TableFunctionGenerator[MakePairsIntArgs, MakePairsIntState]):
|
|
185
|
+
"""Generate integer pairs (i, i*2) from start to stop-1.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
SELECT * FROM make_pairs(1, 4)
|
|
189
|
+
Returns: (1,2), (2,4), (3,6)
|
|
190
|
+
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_INT_SCHEMA
|
|
194
|
+
|
|
195
|
+
class Meta:
|
|
196
|
+
"""Function metadata."""
|
|
197
|
+
|
|
198
|
+
name = "make_pairs"
|
|
199
|
+
description = "Generate integer pairs (i, i*2)"
|
|
200
|
+
|
|
201
|
+
@classmethod
|
|
202
|
+
def initial_state(cls, params: ProcessParams[MakePairsIntArgs]) -> MakePairsIntState:
|
|
203
|
+
"""Build integer pairs."""
|
|
204
|
+
vals = list(range(params.args.start, params.args.stop))
|
|
205
|
+
return MakePairsIntState(a_vals=vals, b_vals=[v * 2 for v in vals])
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def process(cls, params: ProcessParams[MakePairsIntArgs], state: MakePairsIntState, out: OutputCollector) -> None:
|
|
209
|
+
"""Emit pairs batch."""
|
|
210
|
+
if state.done:
|
|
211
|
+
out.finish()
|
|
212
|
+
return
|
|
213
|
+
state.done = True
|
|
214
|
+
out.emit(pa.RecordBatch.from_pydict({"a": state.a_vals, "b": state.b_vals}, schema=MAKE_PAIRS_INT_SCHEMA))
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@init_single_worker
|
|
218
|
+
@bind_fixed_schema
|
|
219
|
+
class MakePairsStrFunction(TableFunctionGenerator[MakePairsStrArgs, MakePairsStrState]):
|
|
220
|
+
"""Generate string pairs (prefix+i, suffix+i) for i in 0..4.
|
|
221
|
+
|
|
222
|
+
Example:
|
|
223
|
+
SELECT * FROM make_pairs('row_', '_end')
|
|
224
|
+
Returns: ('row_0','_end0'), ('row_1','_end1'), ...
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_STR_SCHEMA
|
|
229
|
+
|
|
230
|
+
class Meta:
|
|
231
|
+
"""Function metadata."""
|
|
232
|
+
|
|
233
|
+
name = "make_pairs"
|
|
234
|
+
description = "Generate string pairs with prefix and suffix"
|
|
235
|
+
|
|
236
|
+
@classmethod
|
|
237
|
+
def initial_state(cls, params: ProcessParams[MakePairsStrArgs]) -> MakePairsStrState:
|
|
238
|
+
"""Build string pairs."""
|
|
239
|
+
return MakePairsStrState(
|
|
240
|
+
a_vals=[f"{params.args.prefix}{i}" for i in range(5)],
|
|
241
|
+
b_vals=[f"{params.args.suffix}{i}" for i in range(5)],
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
@classmethod
|
|
245
|
+
def process(cls, params: ProcessParams[MakePairsStrArgs], state: MakePairsStrState, out: OutputCollector) -> None:
|
|
246
|
+
"""Emit pairs batch."""
|
|
247
|
+
if state.done:
|
|
248
|
+
out.finish()
|
|
249
|
+
return
|
|
250
|
+
state.done = True
|
|
251
|
+
out.emit(pa.RecordBatch.from_pydict({"a": state.a_vals, "b": state.b_vals}, schema=MAKE_PAIRS_STR_SCHEMA))
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ============================================================================
|
|
255
|
+
|
|
256
|
+
MAKE_PAIRS_MIXED_SCHEMA = pa.schema(
|
|
257
|
+
[("a", pa.int64()), ("b", pa.string())] # type: ignore[arg-type] # PyArrow mixed-type tuple typing
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
@dataclass(kw_only=True)
|
|
262
|
+
class MakePairsIntStrArgs:
|
|
263
|
+
"""Arguments for mixed-type make_pairs."""
|
|
264
|
+
|
|
265
|
+
start: Annotated[int, Arg(0, doc="Start integer value")]
|
|
266
|
+
label: Annotated[str, Arg(1, doc="Label prefix for string column")]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
@dataclass(kw_only=True)
|
|
270
|
+
class MakePairsIntStrState(ArrowSerializableDataclass):
|
|
271
|
+
"""State for mixed-type make_pairs."""
|
|
272
|
+
|
|
273
|
+
a_vals: list[int] = field(default_factory=list)
|
|
274
|
+
b_vals: list[str] = field(default_factory=list)
|
|
275
|
+
done: bool = False
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@init_single_worker
|
|
279
|
+
@bind_fixed_schema
|
|
280
|
+
class MakePairsIntStrFunction(TableFunctionGenerator[MakePairsIntStrArgs, MakePairsIntStrState]):
|
|
281
|
+
"""Generate mixed int/string pairs (start+i, label+str(i)) for i in 0..4.
|
|
282
|
+
|
|
283
|
+
Example:
|
|
284
|
+
SELECT * FROM make_pairs(10, 'item_')
|
|
285
|
+
Returns: (10, 'item_0'), (11, 'item_1'), ..., (14, 'item_4')
|
|
286
|
+
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_PAIRS_MIXED_SCHEMA
|
|
290
|
+
|
|
291
|
+
class Meta:
|
|
292
|
+
"""Function metadata."""
|
|
293
|
+
|
|
294
|
+
name = "make_pairs"
|
|
295
|
+
description = "Generate mixed int/string pairs"
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def initial_state(cls, params: ProcessParams[MakePairsIntStrArgs]) -> MakePairsIntStrState:
|
|
299
|
+
"""Build mixed-type pairs."""
|
|
300
|
+
return MakePairsIntStrState(
|
|
301
|
+
a_vals=[params.args.start + i for i in range(5)],
|
|
302
|
+
b_vals=[f"{params.args.label}{i}" for i in range(5)],
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
@classmethod
|
|
306
|
+
def process(
|
|
307
|
+
cls, params: ProcessParams[MakePairsIntStrArgs], state: MakePairsIntStrState, out: OutputCollector
|
|
308
|
+
) -> None:
|
|
309
|
+
"""Emit pairs batch."""
|
|
310
|
+
if state.done:
|
|
311
|
+
out.finish()
|
|
312
|
+
return
|
|
313
|
+
state.done = True
|
|
314
|
+
out.emit(pa.RecordBatch.from_pydict({"a": state.a_vals, "b": state.b_vals}, schema=MAKE_PAIRS_MIXED_SCHEMA))
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
# ============================================================================
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
@dataclass(kw_only=True)
|
|
321
|
+
class RepeatValueIntArgs:
|
|
322
|
+
"""Arguments for integer repeat_value."""
|
|
323
|
+
|
|
324
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate")]
|
|
325
|
+
values: Annotated[list[int], Arg(1, varargs=True, arrow_type=pa.int64(), doc="Integer values to repeat")]
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
@dataclass(kw_only=True)
|
|
329
|
+
class RepeatValueStrArgs:
|
|
330
|
+
"""Arguments for string repeat_value."""
|
|
331
|
+
|
|
332
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate")]
|
|
333
|
+
values: Annotated[list[str], Arg(1, varargs=True, arrow_type=pa.string(), doc="String values to repeat")]
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@dataclass(kw_only=True)
|
|
337
|
+
class RepeatValueIntState(ArrowSerializableDataclass):
|
|
338
|
+
"""State for integer repeat_value."""
|
|
339
|
+
|
|
340
|
+
rows: list[list[int]] = field(default_factory=list)
|
|
341
|
+
done: bool = False
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
@dataclass(kw_only=True)
|
|
345
|
+
class RepeatValueStrState(ArrowSerializableDataclass):
|
|
346
|
+
"""State for string repeat_value."""
|
|
347
|
+
|
|
348
|
+
rows: list[list[str]] = field(default_factory=list)
|
|
349
|
+
done: bool = False
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@init_single_worker
|
|
353
|
+
class RepeatValueIntFunction(TableFunctionGenerator[RepeatValueIntArgs, RepeatValueIntState]):
|
|
354
|
+
"""Repeat integer values for count rows.
|
|
355
|
+
|
|
356
|
+
Example:
|
|
357
|
+
SELECT * FROM repeat_value(3, 10, 20)
|
|
358
|
+
Returns 3 rows with columns v0=10, v1=20
|
|
359
|
+
|
|
360
|
+
"""
|
|
361
|
+
|
|
362
|
+
class Meta:
|
|
363
|
+
"""Function metadata."""
|
|
364
|
+
|
|
365
|
+
name = "repeat_value"
|
|
366
|
+
description = "Repeat integer values for N rows"
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def on_bind(cls, params: BindParams[RepeatValueIntArgs]) -> BindResponse:
|
|
370
|
+
"""Build output schema from varargs count."""
|
|
371
|
+
num_values = len(params.args.values)
|
|
372
|
+
fields = [pa.field(f"v{i}", pa.int64()) for i in range(num_values)]
|
|
373
|
+
return BindResponse(output_schema=pa.schema(fields))
|
|
374
|
+
|
|
375
|
+
@classmethod
|
|
376
|
+
def initial_state(cls, params: ProcessParams[RepeatValueIntArgs]) -> RepeatValueIntState:
|
|
377
|
+
"""Build repeated rows."""
|
|
378
|
+
return RepeatValueIntState(
|
|
379
|
+
rows=[[v] * params.args.count for v in params.args.values],
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
@classmethod
|
|
383
|
+
def process(
|
|
384
|
+
cls, params: ProcessParams[RepeatValueIntArgs], state: RepeatValueIntState, out: OutputCollector
|
|
385
|
+
) -> None:
|
|
386
|
+
"""Emit repeated values."""
|
|
387
|
+
if state.done:
|
|
388
|
+
out.finish()
|
|
389
|
+
return
|
|
390
|
+
state.done = True
|
|
391
|
+
data = {f"v{i}": col for i, col in enumerate(state.rows)}
|
|
392
|
+
out_schema = schema({f"v{i}": pa.int64() for i in range(len(state.rows))})
|
|
393
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=out_schema))
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
@init_single_worker
|
|
397
|
+
class RepeatValueStrFunction(TableFunctionGenerator[RepeatValueStrArgs, RepeatValueStrState]):
|
|
398
|
+
"""Repeat string values for count rows.
|
|
399
|
+
|
|
400
|
+
Example:
|
|
401
|
+
SELECT * FROM repeat_value(3, 'a', 'b')
|
|
402
|
+
Returns 3 rows with columns v0='a', v1='b'
|
|
403
|
+
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
class Meta:
|
|
407
|
+
"""Function metadata."""
|
|
408
|
+
|
|
409
|
+
name = "repeat_value"
|
|
410
|
+
description = "Repeat string values for N rows"
|
|
411
|
+
|
|
412
|
+
@classmethod
|
|
413
|
+
def on_bind(cls, params: BindParams[RepeatValueStrArgs]) -> BindResponse:
|
|
414
|
+
"""Build output schema from varargs count."""
|
|
415
|
+
num_values = len(params.args.values)
|
|
416
|
+
fields = [pa.field(f"v{i}", pa.string()) for i in range(num_values)]
|
|
417
|
+
return BindResponse(output_schema=pa.schema(fields))
|
|
418
|
+
|
|
419
|
+
@classmethod
|
|
420
|
+
def initial_state(cls, params: ProcessParams[RepeatValueStrArgs]) -> RepeatValueStrState:
|
|
421
|
+
"""Build repeated rows."""
|
|
422
|
+
return RepeatValueStrState(
|
|
423
|
+
rows=[[v] * params.args.count for v in params.args.values],
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
@classmethod
|
|
427
|
+
def process(
|
|
428
|
+
cls, params: ProcessParams[RepeatValueStrArgs], state: RepeatValueStrState, out: OutputCollector
|
|
429
|
+
) -> None:
|
|
430
|
+
"""Emit repeated values."""
|
|
431
|
+
if state.done:
|
|
432
|
+
out.finish()
|
|
433
|
+
return
|
|
434
|
+
state.done = True
|
|
435
|
+
data = {f"v{i}": col for i, col in enumerate(state.rows)}
|
|
436
|
+
out_schema = schema({f"v{i}": pa.string() for i in range(len(state.rows))})
|
|
437
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=out_schema))
|