vgi-python 0.8.7__py3-none-any.whl → 0.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/_test_fixtures/copy_from.py +99 -0
- vgi/_test_fixtures/copy_to.py +160 -0
- vgi/_test_fixtures/table/__init__.py +4 -0
- vgi/_test_fixtures/table/pairs.py +107 -1
- vgi/_test_fixtures/table/settings.py +57 -2
- vgi/_test_fixtures/worker.py +11 -0
- vgi/catalog/catalog_interface.py +131 -0
- vgi/copy_from_function.py +157 -0
- vgi/copy_to_function.py +179 -0
- vgi/meta_worker.py +1 -0
- vgi/protocol.py +71 -0
- vgi/scalar_function.py +22 -4
- vgi/table_function.py +91 -10
- vgi/worker.py +14 -0
- {vgi_python-0.8.7.dist-info → vgi_python-0.8.8.dist-info}/METADATA +1 -1
- {vgi_python-0.8.7.dist-info → vgi_python-0.8.8.dist-info}/RECORD +19 -15
- {vgi_python-0.8.7.dist-info → vgi_python-0.8.8.dist-info}/WHEEL +0 -0
- {vgi_python-0.8.7.dist-info → vgi_python-0.8.8.dist-info}/entry_points.txt +0 -0
- {vgi_python-0.8.7.dist-info → vgi_python-0.8.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Fixture ``COPY ... FROM`` format reader for VGI integration tests.
|
|
4
|
+
|
|
5
|
+
``ExampleLinesCopyFromFunction`` registers the SQL format ``example_lines`` — a
|
|
6
|
+
toy delimited-text reader. It exercises the full COPY-FROM path plus the option
|
|
7
|
+
machinery: a defaulted option (``delimiter``), an ``INTEGER`` option with a range
|
|
8
|
+
constraint (``skip_rows``), a required option (``null_string``), and an
|
|
9
|
+
enum/``choices`` option (``on_error``).
|
|
10
|
+
|
|
11
|
+
Usage::
|
|
12
|
+
|
|
13
|
+
CREATE TABLE t (a INTEGER, b VARCHAR);
|
|
14
|
+
COPY t FROM '/path/data.txt' (FORMAT example_lines, null_string 'NA');
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from typing import TYPE_CHECKING, Annotated, ClassVar
|
|
21
|
+
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
|
|
24
|
+
from vgi.arguments import Arg
|
|
25
|
+
from vgi.copy_from_function import CopyFromFunction
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from vgi_rpc.rpc import OutputCollector
|
|
29
|
+
|
|
30
|
+
from vgi.table_function import ProcessParams
|
|
31
|
+
|
|
32
|
+
__all__ = ["ExampleLinesCopyFromFunction"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
36
|
+
class ExampleLinesCopyFromArgs:
|
|
37
|
+
"""Options for the ``example_lines`` COPY format."""
|
|
38
|
+
|
|
39
|
+
null_string: Annotated[str, Arg("null_string", doc="Token parsed as SQL NULL")]
|
|
40
|
+
delimiter: Annotated[str, Arg("delimiter", default=",", doc="Field separator")] = ","
|
|
41
|
+
skip_rows: Annotated[int, Arg("skip_rows", default=0, ge=0, doc="Leading lines to skip before data")] = 0
|
|
42
|
+
on_error: Annotated[
|
|
43
|
+
str,
|
|
44
|
+
Arg(
|
|
45
|
+
"on_error",
|
|
46
|
+
default="fail",
|
|
47
|
+
choices=["fail", "skip"],
|
|
48
|
+
doc="Behavior on a row whose column count does not match the target",
|
|
49
|
+
),
|
|
50
|
+
] = "fail"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ExampleLinesCopyFromFunction(CopyFromFunction[ExampleLinesCopyFromArgs]):
|
|
54
|
+
"""Toy delimited-text ``COPY ... FROM`` reader (test fixture)."""
|
|
55
|
+
|
|
56
|
+
COPY_FROM_FORMAT: ClassVar[str] = "example_lines"
|
|
57
|
+
COPY_FROM_COMMENT: ClassVar[str | None] = "Toy delimited-text reader for tests"
|
|
58
|
+
|
|
59
|
+
class Meta:
|
|
60
|
+
name = "example_lines_copy_reader"
|
|
61
|
+
description = "Read a delimited text file into the COPY target table"
|
|
62
|
+
categories = ["copy", "test"]
|
|
63
|
+
tags = {"category": "copy_from", "stability": "test"}
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def read(
|
|
67
|
+
cls,
|
|
68
|
+
*,
|
|
69
|
+
path: str,
|
|
70
|
+
options: ExampleLinesCopyFromArgs,
|
|
71
|
+
expected_schema: pa.Schema,
|
|
72
|
+
params: ProcessParams[ExampleLinesCopyFromArgs],
|
|
73
|
+
out: OutputCollector,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Parse ``path`` line-by-line and emit one batch matching ``expected_schema``."""
|
|
76
|
+
with open(path, encoding="utf-8") as fh:
|
|
77
|
+
lines = fh.read().splitlines()
|
|
78
|
+
lines = lines[options.skip_rows :]
|
|
79
|
+
|
|
80
|
+
ncols = len(expected_schema)
|
|
81
|
+
rows: list[list[str]] = []
|
|
82
|
+
for line in lines:
|
|
83
|
+
if line == "":
|
|
84
|
+
continue
|
|
85
|
+
cells = line.split(options.delimiter)
|
|
86
|
+
if len(cells) != ncols:
|
|
87
|
+
if options.on_error == "skip":
|
|
88
|
+
continue
|
|
89
|
+
raise ValueError(f"example_lines: row has {len(cells)} fields, expected {ncols}: {line!r}")
|
|
90
|
+
rows.append(cells)
|
|
91
|
+
|
|
92
|
+
# Column-major string arrays, NULL where the cell equals null_string,
|
|
93
|
+
# then cast each column to the target type (DuckDB inserts no cast).
|
|
94
|
+
columns = list(zip(*rows)) if rows else [() for _ in range(ncols)]
|
|
95
|
+
arrays = []
|
|
96
|
+
for idx, field in enumerate(expected_schema):
|
|
97
|
+
raw = [None if v == options.null_string else v for v in columns[idx]]
|
|
98
|
+
arrays.append(pa.array(raw, type=pa.string()).cast(field.type))
|
|
99
|
+
out.emit(pa.RecordBatch.from_arrays(arrays, schema=expected_schema))
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Fixture ``COPY ... TO`` format writer for VGI integration tests.
|
|
4
|
+
|
|
5
|
+
``ExampleLinesCopyToFunction`` registers the SQL format ``example_lines_out`` — a
|
|
6
|
+
toy delimited-text writer, the symmetric counterpart of the ``example_lines``
|
|
7
|
+
reader. It exercises the COPY-TO Sink+Combine path plus the option machinery: a
|
|
8
|
+
required option (``null_string``), a defaulted option (``delimiter``), a BOOLEAN
|
|
9
|
+
option (``header``), and an enum/``choices`` option (``on_exists``).
|
|
10
|
+
|
|
11
|
+
Shards are buffered in ``params.storage`` (``execution_id``-scoped) by ``write()``
|
|
12
|
+
and concatenated to the destination by ``close()`` — the cross-process-safe
|
|
13
|
+
pattern, so it works under pool rotation / HTTP.
|
|
14
|
+
|
|
15
|
+
Usage::
|
|
16
|
+
|
|
17
|
+
COPY (SELECT * FROM t) TO '/path/out.txt' (FORMAT 'acme.example_lines_out', null_string 'NA');
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from typing import TYPE_CHECKING, Annotated, ClassVar
|
|
24
|
+
|
|
25
|
+
import pyarrow as pa
|
|
26
|
+
|
|
27
|
+
from vgi.arguments import Arg
|
|
28
|
+
from vgi.copy_to_function import CopyToFunction
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from vgi.table_buffering_function import TableBufferingParams
|
|
32
|
+
|
|
33
|
+
__all__ = ["ExampleLinesCopyToFunction", "ExampleLinesOrderedCopyToFunction"]
|
|
34
|
+
|
|
35
|
+
_SHARD_NS = b"copy_to_shard"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(slots=True, frozen=True, kw_only=True)
|
|
39
|
+
class ExampleLinesCopyToArgs:
|
|
40
|
+
"""Options for the ``example_lines_out`` COPY format."""
|
|
41
|
+
|
|
42
|
+
null_string: Annotated[str, Arg("null_string", doc="Token written for SQL NULL")]
|
|
43
|
+
delimiter: Annotated[str, Arg("delimiter", default=",", doc="Field separator")] = ","
|
|
44
|
+
header: Annotated[bool, Arg("header", default=False, doc="Write a header row of column names")] = False
|
|
45
|
+
header_repeat: Annotated[
|
|
46
|
+
int,
|
|
47
|
+
Arg("header_repeat", default=1, ge=0, le=3, doc="When header=true, write the header line this many times"),
|
|
48
|
+
] = 1
|
|
49
|
+
on_exists: Annotated[
|
|
50
|
+
str,
|
|
51
|
+
Arg(
|
|
52
|
+
"on_exists",
|
|
53
|
+
default="overwrite",
|
|
54
|
+
choices=["overwrite", "error"],
|
|
55
|
+
doc="Behavior when the destination file already exists",
|
|
56
|
+
),
|
|
57
|
+
] = "overwrite"
|
|
58
|
+
fail_on_value: Annotated[
|
|
59
|
+
str,
|
|
60
|
+
Arg("fail_on_value", default="", doc="If non-empty, fail mid-write when a cell equals this value"),
|
|
61
|
+
] = ""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ExampleLinesCopyToFunction(CopyToFunction[ExampleLinesCopyToArgs]):
|
|
65
|
+
"""Toy delimited-text ``COPY ... TO`` writer (test fixture)."""
|
|
66
|
+
|
|
67
|
+
COPY_TO_FORMAT: ClassVar[str] = "example_lines_out"
|
|
68
|
+
COPY_TO_COMMENT: ClassVar[str | None] = "Toy delimited-text writer for tests"
|
|
69
|
+
|
|
70
|
+
class Meta:
|
|
71
|
+
name = "example_lines_writer"
|
|
72
|
+
description = "Write the COPY source to a delimited text file"
|
|
73
|
+
categories = ["copy", "test"]
|
|
74
|
+
tags = {"category": "copy_to", "stability": "test"}
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def write(
|
|
78
|
+
cls,
|
|
79
|
+
*,
|
|
80
|
+
batch: pa.RecordBatch,
|
|
81
|
+
options: ExampleLinesCopyToArgs,
|
|
82
|
+
file_path: str,
|
|
83
|
+
params: TableBufferingParams[ExampleLinesCopyToArgs],
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Buffer one input batch as an IPC blob in execution-scoped storage."""
|
|
86
|
+
# Mid-sink failure trigger: raise during a process() call when a cell
|
|
87
|
+
# matches fail_on_value. Exercises the in-flight teardown/recovery path.
|
|
88
|
+
if options.fail_on_value:
|
|
89
|
+
for col in batch.columns:
|
|
90
|
+
for value in col.to_pylist():
|
|
91
|
+
if value is not None and str(value) == options.fail_on_value:
|
|
92
|
+
raise ValueError(f"example_lines_out: fail_on_value hit: {options.fail_on_value!r}")
|
|
93
|
+
sink = pa.BufferOutputStream()
|
|
94
|
+
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
95
|
+
writer.write_batch(batch)
|
|
96
|
+
# state_append is atomic + race-safe across parallel sink threads/workers.
|
|
97
|
+
params.storage.state_append(_SHARD_NS, b"", sink.getvalue().to_pybytes())
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def close(
|
|
101
|
+
cls,
|
|
102
|
+
*,
|
|
103
|
+
options: ExampleLinesCopyToArgs,
|
|
104
|
+
file_path: str,
|
|
105
|
+
params: TableBufferingParams[ExampleLinesCopyToArgs],
|
|
106
|
+
) -> int:
|
|
107
|
+
"""Concatenate every shard and write the delimited destination file (once)."""
|
|
108
|
+
import os
|
|
109
|
+
|
|
110
|
+
if options.on_exists == "error" and os.path.exists(file_path):
|
|
111
|
+
raise FileExistsError(f"example_lines_out: destination already exists: {file_path}")
|
|
112
|
+
|
|
113
|
+
shards = params.storage.state_log_scan(_SHARD_NS, b"", after_id=-1)
|
|
114
|
+
|
|
115
|
+
def fmt(value: object) -> str:
|
|
116
|
+
return options.null_string if value is None else str(value)
|
|
117
|
+
|
|
118
|
+
def write_header(fh: object, names: list[str]) -> None:
|
|
119
|
+
# header=true writes the column-name line `header_repeat` times.
|
|
120
|
+
if options.header:
|
|
121
|
+
for _ in range(options.header_repeat):
|
|
122
|
+
fh.write(options.delimiter.join(names) + "\n") # type: ignore[attr-defined]
|
|
123
|
+
|
|
124
|
+
rows_written = 0
|
|
125
|
+
with open(file_path, "w", encoding="utf-8") as fh:
|
|
126
|
+
wrote_header = False
|
|
127
|
+
for _log_id, blob in shards:
|
|
128
|
+
table = pa.ipc.open_stream(blob).read_all()
|
|
129
|
+
if not wrote_header:
|
|
130
|
+
write_header(fh, list(table.schema.names))
|
|
131
|
+
wrote_header = True
|
|
132
|
+
for row in table.to_pylist():
|
|
133
|
+
fh.write(options.delimiter.join(fmt(row[name]) for name in table.schema.names) + "\n")
|
|
134
|
+
rows_written += 1
|
|
135
|
+
# Empty COPY with header=true still emits the header row(s). We need the
|
|
136
|
+
# source column names; they ride the bind's input_schema.
|
|
137
|
+
if not wrote_header:
|
|
138
|
+
assert params.init_call is not None
|
|
139
|
+
in_schema = params.init_call.bind_call.input_schema
|
|
140
|
+
if in_schema is not None:
|
|
141
|
+
write_header(fh, list(in_schema.names))
|
|
142
|
+
return rows_written
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class ExampleLinesOrderedCopyToFunction(ExampleLinesCopyToFunction):
|
|
146
|
+
"""Ordered variant of :class:`ExampleLinesCopyToFunction`.
|
|
147
|
+
|
|
148
|
+
``Meta.ordered = True`` makes the extension use a single-threaded sink, so the
|
|
149
|
+
worker receives every batch in source order and writes the file in order.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
COPY_TO_FORMAT: ClassVar[str] = "example_lines_ordered_out"
|
|
153
|
+
COPY_TO_COMMENT: ClassVar[str | None] = "Toy delimited-text writer (ordered, single-thread sink)"
|
|
154
|
+
|
|
155
|
+
class Meta:
|
|
156
|
+
name = "example_lines_ordered_writer"
|
|
157
|
+
description = "Write the COPY source to a delimited file, preserving source order"
|
|
158
|
+
categories = ["copy", "test"]
|
|
159
|
+
tags = {"category": "copy_to", "stability": "test"}
|
|
160
|
+
sink_order_dependent = True # ordered COPY TO → single-thread sink
|
|
@@ -78,6 +78,7 @@ from vgi._test_fixtures.table.pairs import (
|
|
|
78
78
|
MakePairsStrFunction,
|
|
79
79
|
RepeatValueIntFunction,
|
|
80
80
|
RepeatValueStrFunction,
|
|
81
|
+
UnionVarargsFunction,
|
|
81
82
|
)
|
|
82
83
|
from vgi._test_fixtures.table.partition_columns import (
|
|
83
84
|
CountryPartitionedSalesFunction,
|
|
@@ -119,6 +120,7 @@ from vgi._test_fixtures.table.sequence import (
|
|
|
119
120
|
TenThousandFunction,
|
|
120
121
|
)
|
|
121
122
|
from vgi._test_fixtures.table.settings import (
|
|
123
|
+
MultiSecretDemoFunction,
|
|
122
124
|
ScopedSecretDemoFunction,
|
|
123
125
|
SecretDemoFunction,
|
|
124
126
|
SettingsAwareFunction,
|
|
@@ -198,6 +200,7 @@ __all__ = [
|
|
|
198
200
|
"RegionYearPartitionedFunction",
|
|
199
201
|
"RepeatValueIntFunction",
|
|
200
202
|
"RepeatValueStrFunction",
|
|
203
|
+
"UnionVarargsFunction",
|
|
201
204
|
"RFF_MULTI_COLUMNS",
|
|
202
205
|
"RFF_NESTED_COLUMNS",
|
|
203
206
|
"RFF_NONE_COLUMNS",
|
|
@@ -212,6 +215,7 @@ __all__ = [
|
|
|
212
215
|
"RffStructScanFunction",
|
|
213
216
|
"RowIdSequenceFunction",
|
|
214
217
|
"SampleEchoFunction",
|
|
218
|
+
"MultiSecretDemoFunction",
|
|
215
219
|
"ScopedSecretDemoFunction",
|
|
216
220
|
"SecretDemoFunction",
|
|
217
221
|
"SequenceFunction",
|
|
@@ -14,7 +14,7 @@ from vgi_rpc.rpc import OutputCollector
|
|
|
14
14
|
from vgi._test_fixtures.table._common import (
|
|
15
15
|
_cardinality_from_count,
|
|
16
16
|
)
|
|
17
|
-
from vgi.arguments import Arg
|
|
17
|
+
from vgi.arguments import Arg, TaggedUnion
|
|
18
18
|
from vgi.invocation import BindResponse
|
|
19
19
|
from vgi.metadata import FunctionExample
|
|
20
20
|
from vgi.schema_utils import schema
|
|
@@ -447,3 +447,109 @@ class RepeatValueStrFunction(TableFunctionGenerator[RepeatValueStrArgs, RepeatVa
|
|
|
447
447
|
data = {f"v{i}": col for i, col in enumerate(state.rows)}
|
|
448
448
|
out_schema = schema({f"v{i}": pa.string() for i in range(len(state.rows))})
|
|
449
449
|
out.emit(pa.RecordBatch.from_pydict(data, schema=out_schema))
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
# ============================================================================
|
|
453
|
+
|
|
454
|
+
# Sparse union shared by every union_varargs argument. DuckDB only ever emits
|
|
455
|
+
# sparse unions (+us:) over Arrow, so this round-trips end-to-end.
|
|
456
|
+
UNION_VARARGS_TYPE = pa.sparse_union([pa.field("i", pa.int64()), pa.field("s", pa.string())])
|
|
457
|
+
|
|
458
|
+
UNION_VARARGS_SCHEMA = schema(idx=pa.int64(), tag=pa.string(), value=pa.string())
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
@dataclass(kw_only=True)
|
|
462
|
+
class UnionVarargsArgs:
|
|
463
|
+
"""Arguments for union_varargs."""
|
|
464
|
+
|
|
465
|
+
configs: Annotated[
|
|
466
|
+
tuple[TaggedUnion, ...],
|
|
467
|
+
Arg(
|
|
468
|
+
0,
|
|
469
|
+
varargs=True,
|
|
470
|
+
arrow_type=UNION_VARARGS_TYPE,
|
|
471
|
+
doc="Union values whose active member tag is echoed back",
|
|
472
|
+
),
|
|
473
|
+
]
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
@dataclass(kw_only=True)
|
|
477
|
+
class UnionVarargsState(ArrowSerializableDataclass):
|
|
478
|
+
"""State for union_varargs."""
|
|
479
|
+
|
|
480
|
+
idx: list[int] = field(default_factory=list)
|
|
481
|
+
tags: list[str | None] = field(default_factory=list)
|
|
482
|
+
values: list[str] = field(default_factory=list)
|
|
483
|
+
done: bool = False
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@init_single_worker
|
|
487
|
+
@bind_fixed_schema
|
|
488
|
+
class UnionVarargsFunction(TableFunctionGenerator[UnionVarargsArgs, UnionVarargsState]):
|
|
489
|
+
"""Echo the active member tag and value of each union vararg.
|
|
490
|
+
|
|
491
|
+
USE CASE
|
|
492
|
+
--------
|
|
493
|
+
Exercises union-typed varargs: each argument arrives as a
|
|
494
|
+
[`TaggedUnion`][vgi.arguments.TaggedUnion] so the active member
|
|
495
|
+
discriminator (which a plain ``Scalar.as_py()`` would drop) is preserved.
|
|
496
|
+
Emits one row per vararg with its positional index, the active member
|
|
497
|
+
name, and the member value stringified into a single fixed column.
|
|
498
|
+
|
|
499
|
+
SCHEMA
|
|
500
|
+
------
|
|
501
|
+
Fixed: ``{"idx": int64, "tag": string, "value": string}``.
|
|
502
|
+
|
|
503
|
+
Example:
|
|
504
|
+
SELECT * FROM union_varargs(
|
|
505
|
+
union_value(i := 1)::UNION(i INT, s VARCHAR),
|
|
506
|
+
union_value(s := 'x')::UNION(i INT, s VARCHAR))
|
|
507
|
+
Returns: (0, 'i', '1'), (1, 's', 'x')
|
|
508
|
+
|
|
509
|
+
Attributes:
|
|
510
|
+
FIXED_SCHEMA: The fixed Arrow output schema this function always produces.
|
|
511
|
+
|
|
512
|
+
"""
|
|
513
|
+
|
|
514
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = UNION_VARARGS_SCHEMA
|
|
515
|
+
|
|
516
|
+
class Meta:
|
|
517
|
+
"""Function metadata."""
|
|
518
|
+
|
|
519
|
+
name = "union_varargs"
|
|
520
|
+
description = "Echo the active member tag and value of each union vararg"
|
|
521
|
+
categories = ["generator", "utility"]
|
|
522
|
+
examples = [
|
|
523
|
+
FunctionExample(
|
|
524
|
+
sql=(
|
|
525
|
+
"SELECT * FROM union_varargs("
|
|
526
|
+
"union_value(i := 1)::UNION(i INT, s VARCHAR), "
|
|
527
|
+
"union_value(s := 'x')::UNION(i INT, s VARCHAR))"
|
|
528
|
+
),
|
|
529
|
+
description="Echo the tag and value of two union arguments",
|
|
530
|
+
),
|
|
531
|
+
]
|
|
532
|
+
|
|
533
|
+
@classmethod
|
|
534
|
+
def initial_state(cls, params: ProcessParams[UnionVarargsArgs]) -> UnionVarargsState:
|
|
535
|
+
"""Decompose each union vararg into (idx, tag, value) rows."""
|
|
536
|
+
configs = params.args.configs
|
|
537
|
+
return UnionVarargsState(
|
|
538
|
+
idx=list(range(len(configs))),
|
|
539
|
+
tags=[cfg.tag for cfg in configs],
|
|
540
|
+
values=[str(cfg.value) for cfg in configs],
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
@classmethod
|
|
544
|
+
def process(cls, params: ProcessParams[UnionVarargsArgs], state: UnionVarargsState, out: OutputCollector) -> None:
|
|
545
|
+
"""Emit one row per union vararg."""
|
|
546
|
+
if state.done:
|
|
547
|
+
out.finish()
|
|
548
|
+
return
|
|
549
|
+
state.done = True
|
|
550
|
+
out.emit(
|
|
551
|
+
pa.RecordBatch.from_pydict(
|
|
552
|
+
{"idx": state.idx, "tag": state.tags, "value": state.values},
|
|
553
|
+
schema=UNION_VARARGS_SCHEMA,
|
|
554
|
+
)
|
|
555
|
+
)
|
|
@@ -321,7 +321,7 @@ class SecretDemoFunction(TableFunctionGenerator[None, SecretDemoState]):
|
|
|
321
321
|
@classmethod
|
|
322
322
|
def initial_state(cls, params: ProcessParams[None]) -> SecretDemoState:
|
|
323
323
|
"""Build initial state from secret key-value pairs."""
|
|
324
|
-
secret = params.secrets.
|
|
324
|
+
secret = next(iter(params.secrets.of_type("vgi_example")), {})
|
|
325
325
|
keys = list(secret.keys())
|
|
326
326
|
values = [str(v.as_py()) for v in secret.values()]
|
|
327
327
|
types = [str(v.type) for v in secret.values()]
|
|
@@ -406,7 +406,7 @@ class ScopedSecretDemoFunction(TableFunctionGenerator[ScopedSecretDemoArgs, Scop
|
|
|
406
406
|
@classmethod
|
|
407
407
|
def initial_state(cls, params: ProcessParams[ScopedSecretDemoArgs]) -> ScopedSecretDemoState:
|
|
408
408
|
"""Build state from resolved secrets."""
|
|
409
|
-
secret = params.secrets.
|
|
409
|
+
secret = next(iter(params.secrets.of_type("vgi_example")), {})
|
|
410
410
|
return ScopedSecretDemoState(
|
|
411
411
|
found=bool(secret),
|
|
412
412
|
secret_keys=",".join(secret.keys()) if secret else "",
|
|
@@ -430,3 +430,58 @@ class ScopedSecretDemoFunction(TableFunctionGenerator[ScopedSecretDemoArgs, Scop
|
|
|
430
430
|
)
|
|
431
431
|
out.emit(batch)
|
|
432
432
|
out.finish()
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
@dataclass(kw_only=True)
|
|
436
|
+
class MultiSecretDemoState(ArrowSerializableDataclass):
|
|
437
|
+
"""State for MultiSecretDemoFunction."""
|
|
438
|
+
|
|
439
|
+
api_key: str = ""
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
@init_single_worker
|
|
443
|
+
class MultiSecretDemoFunction(TableFunctionGenerator[ScopedSecretDemoArgs, MultiSecretDemoState]):
|
|
444
|
+
"""Resolve TWO same-type scoped secrets in one bind, then select per path.
|
|
445
|
+
|
|
446
|
+
Requests the ``vgi_example`` secret for both ``s3://bucket-a/`` and
|
|
447
|
+
``s3://bucket-b/`` scopes in a single bind. Because resolved secrets are keyed
|
|
448
|
+
by name, both survive; ``for_scope_of_type`` then picks the one whose scope
|
|
449
|
+
matches the ``path`` argument and returns its ``api_key``.
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
class Meta:
|
|
453
|
+
"""Metadata for MultiSecretDemoFunction."""
|
|
454
|
+
|
|
455
|
+
name = "multi_secret_demo"
|
|
456
|
+
description = "Demo: two same-type scoped secrets resolved in one bind"
|
|
457
|
+
|
|
458
|
+
@classmethod
|
|
459
|
+
def on_bind(cls, params: BindParams[ScopedSecretDemoArgs]) -> BindResponse:
|
|
460
|
+
"""Request the secret for two distinct scopes of the same type."""
|
|
461
|
+
params.secrets.get("vgi_example", scope="s3://bucket-a/")
|
|
462
|
+
params.secrets.get("vgi_example", scope="s3://bucket-b/")
|
|
463
|
+
return BindResponse(output_schema=schema({"api_key": pa.string()}))
|
|
464
|
+
|
|
465
|
+
@classmethod
|
|
466
|
+
def initial_state(cls, params: ProcessParams[ScopedSecretDemoArgs]) -> MultiSecretDemoState:
|
|
467
|
+
"""Select the resolved secret matching the path and return its api_key."""
|
|
468
|
+
secret = params.secrets.for_scope_of_type(params.args.path, "vgi_example") or {}
|
|
469
|
+
api_key = secret.get("api_key")
|
|
470
|
+
if api_key is not None and hasattr(api_key, "as_py"):
|
|
471
|
+
api_key = api_key.as_py()
|
|
472
|
+
return MultiSecretDemoState(api_key="" if api_key is None else str(api_key))
|
|
473
|
+
|
|
474
|
+
@classmethod
|
|
475
|
+
def process(
|
|
476
|
+
cls,
|
|
477
|
+
params: ProcessParams[ScopedSecretDemoArgs],
|
|
478
|
+
state: MultiSecretDemoState,
|
|
479
|
+
out: OutputCollector,
|
|
480
|
+
) -> None:
|
|
481
|
+
"""Emit the resolved api_key for the path-matched secret."""
|
|
482
|
+
batch = pa.RecordBatch.from_pydict(
|
|
483
|
+
{"api_key": [state.api_key]},
|
|
484
|
+
schema=params.output_schema,
|
|
485
|
+
)
|
|
486
|
+
out.emit(batch)
|
|
487
|
+
out.finish()
|
vgi/_test_fixtures/worker.py
CHANGED
|
@@ -59,6 +59,8 @@ from vgi._test_fixtures.cancellable import (
|
|
|
59
59
|
SlowCancellableFunction,
|
|
60
60
|
SlowCancellableInOutFunction,
|
|
61
61
|
)
|
|
62
|
+
from vgi._test_fixtures.copy_from import ExampleLinesCopyFromFunction
|
|
63
|
+
from vgi._test_fixtures.copy_to import ExampleLinesCopyToFunction, ExampleLinesOrderedCopyToFunction
|
|
62
64
|
from vgi._test_fixtures.nest_tensor import NestTensorFunction, UnnestTensorFunction, UnnestTensorRowsFunction
|
|
63
65
|
from vgi._test_fixtures.scalar import (
|
|
64
66
|
AddValuesFunction,
|
|
@@ -143,6 +145,7 @@ from vgi._test_fixtures.table import (
|
|
|
143
145
|
MakeSeriesRangeFunction,
|
|
144
146
|
MakeSeriesStepFunction,
|
|
145
147
|
MissingBatchIndexTagFunction,
|
|
148
|
+
MultiSecretDemoFunction,
|
|
146
149
|
NamedParamsEchoFunction,
|
|
147
150
|
NestedSequenceFunction,
|
|
148
151
|
NonMonotoneBatchIndexFunction,
|
|
@@ -179,6 +182,7 @@ from vgi._test_fixtures.table import (
|
|
|
179
182
|
TenThousandFunction,
|
|
180
183
|
TxCachedValueFunction,
|
|
181
184
|
TypedProbeFunction,
|
|
185
|
+
UnionVarargsFunction,
|
|
182
186
|
ValuePruneFunction,
|
|
183
187
|
VersionedConstraintsScanFunction,
|
|
184
188
|
VersionedDataFunction,
|
|
@@ -349,6 +353,11 @@ _EXAMPLE_CATALOG = Catalog(
|
|
|
349
353
|
EchoBufferingFunction,
|
|
350
354
|
BufferEmitWideFunction,
|
|
351
355
|
SlowCancellableBufferingFunction,
|
|
356
|
+
# CopyFromFunction - custom COPY ... FROM format reader
|
|
357
|
+
ExampleLinesCopyFromFunction,
|
|
358
|
+
# CopyToFunction - custom COPY ... TO format writer
|
|
359
|
+
ExampleLinesCopyToFunction,
|
|
360
|
+
ExampleLinesOrderedCopyToFunction,
|
|
352
361
|
# TableFunctionGenerator - generate output without input
|
|
353
362
|
ConstantColumnsFunction,
|
|
354
363
|
SlowCancellableFunction,
|
|
@@ -374,6 +383,7 @@ _EXAMPLE_CATALOG = Catalog(
|
|
|
374
383
|
MakePairsStrFunction,
|
|
375
384
|
RepeatValueIntFunction,
|
|
376
385
|
RepeatValueStrFunction,
|
|
386
|
+
UnionVarargsFunction,
|
|
377
387
|
NamedParamsEchoFunction,
|
|
378
388
|
NestedSequenceFunction,
|
|
379
389
|
ProfilingDemoFunction,
|
|
@@ -409,6 +419,7 @@ _EXAMPLE_CATALOG = Catalog(
|
|
|
409
419
|
SampleEchoFunction,
|
|
410
420
|
RowIdSequenceFunction,
|
|
411
421
|
SecretDemoFunction,
|
|
422
|
+
MultiSecretDemoFunction,
|
|
412
423
|
ScopedSecretDemoFunction,
|
|
413
424
|
ExpressionFilterTestFunction,
|
|
414
425
|
SequenceFunction,
|