vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Settings/secrets fixtures: settings_aware, struct_settings, secret_demo, scoped_secret_demo."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Annotated, Any, ClassVar
|
|
9
|
+
|
|
10
|
+
import pyarrow as pa
|
|
11
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
12
|
+
from vgi_rpc.rpc import OutputCollector
|
|
13
|
+
|
|
14
|
+
from vgi._test_fixtures.table._common import (
|
|
15
|
+
_cardinality_from_count,
|
|
16
|
+
)
|
|
17
|
+
from vgi.arguments import Arg, Secret, Setting
|
|
18
|
+
from vgi.invocation import BindResponse
|
|
19
|
+
from vgi.metadata import FunctionExample
|
|
20
|
+
from vgi.schema_utils import schema
|
|
21
|
+
from vgi.table_function import (
|
|
22
|
+
BindParams,
|
|
23
|
+
ProcessParams,
|
|
24
|
+
TableFunctionGenerator,
|
|
25
|
+
init_single_worker,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(slots=True, frozen=True)
|
|
30
|
+
class SettingsAwareFunctionArguments:
|
|
31
|
+
"""Arguments for SettingsAwareFunction."""
|
|
32
|
+
|
|
33
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(kw_only=True)
|
|
37
|
+
class SettingsAwareState(ArrowSerializableDataclass):
|
|
38
|
+
"""Mutable state for SettingsAwareFunction with typed settings."""
|
|
39
|
+
|
|
40
|
+
remaining: int
|
|
41
|
+
current_index: int = 0
|
|
42
|
+
verbose: bool = False
|
|
43
|
+
greeting: str = "Hello"
|
|
44
|
+
multiplier: int = 1
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@init_single_worker
|
|
48
|
+
@_cardinality_from_count
|
|
49
|
+
class SettingsAwareFunction(TableFunctionGenerator[SettingsAwareFunctionArguments, SettingsAwareState]):
|
|
50
|
+
"""Generates data demonstrating that settings are passed to functions.
|
|
51
|
+
|
|
52
|
+
USE CASE
|
|
53
|
+
--------
|
|
54
|
+
Demonstrates how functions can declare required settings via
|
|
55
|
+
Setting() annotations and access them via state (resolved once
|
|
56
|
+
in initial_state()). The output includes columns showing the actual
|
|
57
|
+
setting values that were passed.
|
|
58
|
+
|
|
59
|
+
This function uses three settings:
|
|
60
|
+
- vgi_verbose_mode: bool - when true, adds a details column
|
|
61
|
+
- greeting: str - a custom greeting message echoed in output
|
|
62
|
+
- multiplier: int - multiplies the value column
|
|
63
|
+
|
|
64
|
+
Settings are typed: the C++ extension sends Arrow scalars with proper
|
|
65
|
+
types (bool, int64, string). For backward compatibility, string values
|
|
66
|
+
like "true" are also accepted for vgi_verbose_mode.
|
|
67
|
+
|
|
68
|
+
SCHEMA
|
|
69
|
+
------
|
|
70
|
+
Base output: {"id": int64, "greeting": string, "value": float64}
|
|
71
|
+
With vgi_verbose_mode=true: adds "details": string column
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
-------
|
|
75
|
+
With settings={vgi_verbose_mode: true, greeting: "Hi", multiplier: 2}:
|
|
76
|
+
Returns: [{"id": 0, "greeting": "Hi", "value": 0.0, "details": "row_0"}, ...]
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
class Meta:
|
|
81
|
+
"""Metadata for SettingsAwareFunction."""
|
|
82
|
+
|
|
83
|
+
name = "settings_aware"
|
|
84
|
+
description = "Generates data demonstrating settings are passed"
|
|
85
|
+
categories = ["generator", "settings"]
|
|
86
|
+
examples = [
|
|
87
|
+
FunctionExample(
|
|
88
|
+
sql="SELECT * FROM settings_aware(5)",
|
|
89
|
+
description="Generate 5 rows showing setting values",
|
|
90
|
+
)
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
BATCH_SIZE: ClassVar[int] = 1000
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def _is_verbose(val: object) -> bool:
|
|
97
|
+
"""Check if verbose mode is enabled, handling both bool and string values."""
|
|
98
|
+
return val is True or val == "true"
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def on_bind(
|
|
102
|
+
cls,
|
|
103
|
+
params: BindParams[SettingsAwareFunctionArguments],
|
|
104
|
+
*,
|
|
105
|
+
vgi_verbose_mode: Annotated[pa.Scalar[Any] | None, Setting()] = None,
|
|
106
|
+
greeting: Annotated[pa.Scalar[Any] | None, Setting()] = None,
|
|
107
|
+
multiplier: Annotated[pa.Scalar[Any] | None, Setting()] = None,
|
|
108
|
+
) -> BindResponse:
|
|
109
|
+
"""Return output schema based on vgi_verbose_mode setting.
|
|
110
|
+
|
|
111
|
+
Always includes id, greeting (from setting), and value (multiplied).
|
|
112
|
+
When vgi_verbose_mode is true, includes an extra "details" column.
|
|
113
|
+
"""
|
|
114
|
+
fields: list[pa.Field[pa.DataType]] = [
|
|
115
|
+
pa.field("id", pa.int64()),
|
|
116
|
+
pa.field("greeting", pa.string()),
|
|
117
|
+
pa.field("value", pa.float64()),
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
# Add details column if verbose mode is enabled (handles bool and string)
|
|
121
|
+
if vgi_verbose_mode is not None and cls._is_verbose(vgi_verbose_mode.as_py()):
|
|
122
|
+
fields.append(pa.field("details", pa.string()))
|
|
123
|
+
|
|
124
|
+
return BindResponse(output_schema=pa.schema(fields))
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def initial_state(cls, params: ProcessParams[SettingsAwareFunctionArguments]) -> SettingsAwareState:
|
|
128
|
+
"""Create initial state with typed settings resolved once."""
|
|
129
|
+
verbose_val = params.settings.get("vgi_verbose_mode", pa.scalar(False)).as_py()
|
|
130
|
+
greeting_val = params.settings.get("greeting", pa.scalar("Hello")).as_py()
|
|
131
|
+
multiplier_val = params.settings.get("multiplier", pa.scalar(1)).as_py()
|
|
132
|
+
|
|
133
|
+
return SettingsAwareState(
|
|
134
|
+
remaining=params.args.count,
|
|
135
|
+
verbose=cls._is_verbose(verbose_val),
|
|
136
|
+
greeting=str(greeting_val),
|
|
137
|
+
multiplier=int(multiplier_val),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def process(
|
|
142
|
+
cls,
|
|
143
|
+
params: ProcessParams[SettingsAwareFunctionArguments],
|
|
144
|
+
state: SettingsAwareState,
|
|
145
|
+
out: OutputCollector,
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Generate data based on settings stored in state."""
|
|
148
|
+
if state.remaining <= 0:
|
|
149
|
+
out.finish()
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
size = min(state.remaining, cls.BATCH_SIZE)
|
|
153
|
+
ids = list(range(state.current_index, state.current_index + size))
|
|
154
|
+
|
|
155
|
+
data: dict[str, list[int] | list[float] | list[str]] = {
|
|
156
|
+
"id": ids,
|
|
157
|
+
"greeting": [state.greeting] * size,
|
|
158
|
+
"value": [float(i) * 2.5 * state.multiplier for i in ids],
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if state.verbose:
|
|
162
|
+
data["details"] = [f"row_{i}" for i in ids]
|
|
163
|
+
|
|
164
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
|
|
165
|
+
|
|
166
|
+
state.current_index += size
|
|
167
|
+
state.remaining -= size
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass(slots=True, frozen=True)
|
|
171
|
+
class StructSettingsFunctionArguments:
|
|
172
|
+
"""Arguments for StructSettingsFunction."""
|
|
173
|
+
|
|
174
|
+
count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass(kw_only=True)
|
|
178
|
+
class StructSettingsState(ArrowSerializableDataclass):
|
|
179
|
+
"""Mutable state for StructSettingsFunction."""
|
|
180
|
+
|
|
181
|
+
remaining: int
|
|
182
|
+
current_index: int = 0
|
|
183
|
+
start: int = 0
|
|
184
|
+
step: int = 1
|
|
185
|
+
label: str = "item"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@init_single_worker
|
|
189
|
+
@_cardinality_from_count
|
|
190
|
+
class StructSettingsFunction(TableFunctionGenerator[StructSettingsFunctionArguments, StructSettingsState]):
|
|
191
|
+
"""Generates a sequence configured by a struct setting.
|
|
192
|
+
|
|
193
|
+
USE CASE
|
|
194
|
+
--------
|
|
195
|
+
Demonstrates how a single struct setting can configure multiple aspects
|
|
196
|
+
of a function's behavior. The config setting is a struct with fields:
|
|
197
|
+
- start: int64 - starting value for the sequence
|
|
198
|
+
- step: int64 - step between values
|
|
199
|
+
- label: string - prefix for label column
|
|
200
|
+
|
|
201
|
+
SCHEMA
|
|
202
|
+
------
|
|
203
|
+
Output: {"n": int64, "label": string}
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
-------
|
|
207
|
+
With config={'start': 10, 'step': 5, 'label': 'item'} and count=3:
|
|
208
|
+
Returns: [{"n": 10, "label": "item_0"}, {"n": 15, "label": "item_1"}, {"n": 20, "label": "item_2"}]
|
|
209
|
+
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
class Meta:
|
|
213
|
+
"""Metadata for StructSettingsFunction."""
|
|
214
|
+
|
|
215
|
+
name = "struct_settings"
|
|
216
|
+
description = "Generate a sequence configured by a struct setting"
|
|
217
|
+
categories = ["generator", "settings"]
|
|
218
|
+
examples = [
|
|
219
|
+
FunctionExample(
|
|
220
|
+
sql="SELECT * FROM struct_settings(5)",
|
|
221
|
+
description="Generate 5 rows configured by the config setting",
|
|
222
|
+
)
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = schema({"n": pa.int64(), "label": pa.string()})
|
|
226
|
+
|
|
227
|
+
@classmethod
|
|
228
|
+
def on_bind(
|
|
229
|
+
cls,
|
|
230
|
+
params: BindParams[StructSettingsFunctionArguments],
|
|
231
|
+
*,
|
|
232
|
+
config: Annotated[pa.Scalar[Any] | None, Setting()] = None,
|
|
233
|
+
) -> BindResponse:
|
|
234
|
+
"""Return output schema. Config declared here for required_settings registration."""
|
|
235
|
+
return BindResponse(output_schema=cls.FIXED_SCHEMA)
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def initial_state(cls, params: ProcessParams[StructSettingsFunctionArguments]) -> StructSettingsState:
|
|
239
|
+
"""Create initial state with struct setting values resolved once."""
|
|
240
|
+
config = params.settings["config"] # pa.StructScalar
|
|
241
|
+
cfg = config.as_py() # dict
|
|
242
|
+
return StructSettingsState(
|
|
243
|
+
remaining=params.args.count,
|
|
244
|
+
start=cfg["start"],
|
|
245
|
+
step=cfg["step"],
|
|
246
|
+
label=cfg["label"],
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def process(
|
|
251
|
+
cls,
|
|
252
|
+
params: ProcessParams[StructSettingsFunctionArguments],
|
|
253
|
+
state: StructSettingsState,
|
|
254
|
+
out: OutputCollector,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""Generate rows with values derived from the struct setting."""
|
|
257
|
+
if state.remaining <= 0:
|
|
258
|
+
out.finish()
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
size = min(state.remaining, 1000)
|
|
262
|
+
data: dict[str, list[int] | list[str]] = {
|
|
263
|
+
"n": [state.start + (state.current_index + i) * state.step for i in range(size)],
|
|
264
|
+
"label": [f"{state.label}_{state.current_index + i}" for i in range(size)],
|
|
265
|
+
}
|
|
266
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
|
|
267
|
+
state.current_index += size
|
|
268
|
+
state.remaining -= size
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# =============================================================================
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
@dataclass(kw_only=True)
|
|
275
|
+
class SecretDemoState(ArrowSerializableDataclass):
|
|
276
|
+
"""State for SecretDemoFunction."""
|
|
277
|
+
|
|
278
|
+
keys: list[str] = field(default_factory=list)
|
|
279
|
+
values: list[str] = field(default_factory=list)
|
|
280
|
+
types: list[str] = field(default_factory=list)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@init_single_worker
|
|
284
|
+
class SecretDemoFunction(TableFunctionGenerator[None, SecretDemoState]):
|
|
285
|
+
"""Table function that outputs secret key-value pairs as rows.
|
|
286
|
+
|
|
287
|
+
Demonstrates basic secret access via Secret() annotation.
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
class Meta:
|
|
291
|
+
"""Metadata for SecretDemoFunction."""
|
|
292
|
+
|
|
293
|
+
name = "secret_demo"
|
|
294
|
+
description = "Outputs secret contents as key-value rows"
|
|
295
|
+
|
|
296
|
+
@classmethod
|
|
297
|
+
def on_bind(
|
|
298
|
+
cls,
|
|
299
|
+
params: BindParams[None],
|
|
300
|
+
) -> BindResponse:
|
|
301
|
+
"""Bind with secret request via SecretsAccessor."""
|
|
302
|
+
# Request the secret via the accessor — triggers two-phase bind
|
|
303
|
+
# so the resolved secret is available in initial_state().
|
|
304
|
+
params.secrets.get("vgi_example")
|
|
305
|
+
return BindResponse(
|
|
306
|
+
output_schema=schema(
|
|
307
|
+
{
|
|
308
|
+
"key": pa.string(),
|
|
309
|
+
"value": pa.string(),
|
|
310
|
+
"arrow_type": pa.string(),
|
|
311
|
+
}
|
|
312
|
+
)
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
@classmethod
|
|
316
|
+
def initial_state(cls, params: ProcessParams[None]) -> SecretDemoState:
|
|
317
|
+
"""Build initial state from secret key-value pairs."""
|
|
318
|
+
secret = params.secrets.get("vgi_example", {})
|
|
319
|
+
keys = list(secret.keys())
|
|
320
|
+
values = [str(v.as_py()) for v in secret.values()]
|
|
321
|
+
types = [str(v.type) for v in secret.values()]
|
|
322
|
+
return SecretDemoState(keys=keys, values=values, types=types)
|
|
323
|
+
|
|
324
|
+
@classmethod
|
|
325
|
+
def process(
|
|
326
|
+
cls,
|
|
327
|
+
params: ProcessParams[None],
|
|
328
|
+
state: SecretDemoState,
|
|
329
|
+
out: OutputCollector,
|
|
330
|
+
) -> None:
|
|
331
|
+
"""Emit secret entries as rows."""
|
|
332
|
+
if not state.keys:
|
|
333
|
+
out.finish()
|
|
334
|
+
return
|
|
335
|
+
batch = pa.RecordBatch.from_pydict(
|
|
336
|
+
{"key": state.keys, "value": state.values, "arrow_type": state.types},
|
|
337
|
+
schema=params.output_schema,
|
|
338
|
+
)
|
|
339
|
+
out.emit(batch)
|
|
340
|
+
state.keys = []
|
|
341
|
+
state.values = []
|
|
342
|
+
state.types = []
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@dataclass(frozen=True)
|
|
346
|
+
class ScopedSecretDemoArgs:
|
|
347
|
+
"""Arguments for ScopedSecretDemoFunction."""
|
|
348
|
+
|
|
349
|
+
path: Annotated[str, Arg(0, doc="Scope path for secret lookup")]
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@dataclass(kw_only=True)
|
|
353
|
+
class ScopedSecretDemoState(ArrowSerializableDataclass):
|
|
354
|
+
"""State for ScopedSecretDemoFunction."""
|
|
355
|
+
|
|
356
|
+
found: bool = False
|
|
357
|
+
secret_keys: str = ""
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
@init_single_worker
|
|
361
|
+
class ScopedSecretDemoFunction(TableFunctionGenerator[ScopedSecretDemoArgs, ScopedSecretDemoState]):
|
|
362
|
+
"""Demonstrates automatic two-phase bind with scoped secrets.
|
|
363
|
+
|
|
364
|
+
Requests a secret with a dynamic scope computed from the function argument.
|
|
365
|
+
The framework automatically handles the two-phase bind retry.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
class Meta:
|
|
369
|
+
"""Metadata for ScopedSecretDemoFunction."""
|
|
370
|
+
|
|
371
|
+
name = "scoped_secret_demo"
|
|
372
|
+
description = "Demo: resolves scoped secret based on argument"
|
|
373
|
+
|
|
374
|
+
@classmethod
|
|
375
|
+
def on_bind(
|
|
376
|
+
cls,
|
|
377
|
+
params: BindParams[ScopedSecretDemoArgs],
|
|
378
|
+
*,
|
|
379
|
+
vgi_example: Annotated[dict[str, pa.Scalar[Any]] | None, Secret("vgi_example")] = None,
|
|
380
|
+
) -> BindResponse:
|
|
381
|
+
"""Bind with dynamic scoped secret lookup."""
|
|
382
|
+
# Request secret with dynamic scope — framework handles retry automatically.
|
|
383
|
+
# The get() call registers a pending scoped lookup; the return value is
|
|
384
|
+
# unused because the framework will trigger a two-phase bind retry.
|
|
385
|
+
params.secrets.get("vgi_example", scope=params.args.path)
|
|
386
|
+
|
|
387
|
+
# On first call: secret is None (pending), framework triggers retry
|
|
388
|
+
# On retry: secret is dict (found) or None (genuinely not found)
|
|
389
|
+
|
|
390
|
+
return BindResponse(
|
|
391
|
+
output_schema=schema(
|
|
392
|
+
{
|
|
393
|
+
"scope": pa.string(),
|
|
394
|
+
"found": pa.bool_(),
|
|
395
|
+
"secret_keys": pa.string(),
|
|
396
|
+
}
|
|
397
|
+
)
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
@classmethod
|
|
401
|
+
def initial_state(cls, params: ProcessParams[ScopedSecretDemoArgs]) -> ScopedSecretDemoState:
|
|
402
|
+
"""Build state from resolved secrets."""
|
|
403
|
+
secret = params.secrets.get("vgi_example", {})
|
|
404
|
+
return ScopedSecretDemoState(
|
|
405
|
+
found=bool(secret),
|
|
406
|
+
secret_keys=",".join(secret.keys()) if secret else "",
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
@classmethod
|
|
410
|
+
def process(
|
|
411
|
+
cls,
|
|
412
|
+
params: ProcessParams[ScopedSecretDemoArgs],
|
|
413
|
+
state: ScopedSecretDemoState,
|
|
414
|
+
out: OutputCollector,
|
|
415
|
+
) -> None:
|
|
416
|
+
"""Emit scope info and resolved secret keys."""
|
|
417
|
+
batch = pa.RecordBatch.from_pydict(
|
|
418
|
+
{
|
|
419
|
+
"scope": [params.args.path],
|
|
420
|
+
"found": [state.found],
|
|
421
|
+
"secret_keys": [state.secret_keys],
|
|
422
|
+
},
|
|
423
|
+
schema=params.output_schema,
|
|
424
|
+
)
|
|
425
|
+
out.emit(batch)
|
|
426
|
+
out.finish()
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Demo of transaction-scoped storage (``BindParams.transaction_storage``).
|
|
4
|
+
|
|
5
|
+
Backs the ``example.main.tx_cached_value(key, seed)`` function exposed by
|
|
6
|
+
``vgi-fixture-worker``. The function uses ``BindParams.transaction_storage``
|
|
7
|
+
to cache its ``seed`` argument per ``(transaction_opaque_data, key)``:
|
|
8
|
+
|
|
9
|
+
* First call within a transaction for a given ``key``: stores ``seed`` and
|
|
10
|
+
emits it.
|
|
11
|
+
* Subsequent calls within the **same** transaction for the **same** ``key``:
|
|
12
|
+
emit the originally-cached value and **ignore** the new ``seed``.
|
|
13
|
+
* New transaction or different ``key``: produces a fresh cached value.
|
|
14
|
+
* Without a transaction (``params.transaction_storage is None``): no caching;
|
|
15
|
+
every call emits its own ``seed``.
|
|
16
|
+
|
|
17
|
+
The resolved value is shipped from ``on_bind`` to ``process`` via
|
|
18
|
+
``BindResponse.opaque_data`` so any worker in the pool can produce the same
|
|
19
|
+
answer — the value lives in shared storage (sqlite/CF DO/Azure SQL), not in
|
|
20
|
+
the bind worker's local memory.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import struct
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from typing import Annotated, ClassVar
|
|
28
|
+
|
|
29
|
+
import pyarrow as pa
|
|
30
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
31
|
+
from vgi_rpc.rpc import OutputCollector
|
|
32
|
+
|
|
33
|
+
from vgi.arguments import Arg
|
|
34
|
+
from vgi.invocation import BindResponse, GlobalInitResponse
|
|
35
|
+
from vgi.schema_utils import schema
|
|
36
|
+
from vgi.table_function import (
|
|
37
|
+
BindParams,
|
|
38
|
+
InitParams,
|
|
39
|
+
ProcessParams,
|
|
40
|
+
TableCardinality,
|
|
41
|
+
TableFunctionGenerator,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__ = ["TxCachedValueFunction"]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class TxCachedValueArgs:
|
|
49
|
+
"""Arguments for ``tx_cached_value``."""
|
|
50
|
+
|
|
51
|
+
key: Annotated[str, Arg(0, doc="Cache key, scoped to the current transaction")]
|
|
52
|
+
seed: Annotated[int, Arg(1, doc="Value to cache on first call; ignored on cache hit")]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(kw_only=True)
|
|
56
|
+
class _TxCachedValueState(ArrowSerializableDataclass):
|
|
57
|
+
"""Mutable per-process state carried into ``process``."""
|
|
58
|
+
|
|
59
|
+
# Resolved value (cached or freshly-seeded). Carried from bind via
|
|
60
|
+
# opaque_data so process() doesn't need access to transaction_storage
|
|
61
|
+
# (which is only populated on BindParams).
|
|
62
|
+
value: int
|
|
63
|
+
emitted: bool = False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TxCachedValueFunction(TableFunctionGenerator[TxCachedValueArgs, _TxCachedValueState]):
|
|
67
|
+
"""Returns a single-row table whose value is cached per (transaction, key).
|
|
68
|
+
|
|
69
|
+
The cache lives in ``BindParams.transaction_storage`` — a view over
|
|
70
|
+
``FunctionStorage.transaction_state_*``. On a cache hit the stored value
|
|
71
|
+
is returned; on a miss, the supplied ``seed`` is written to storage and
|
|
72
|
+
returned.
|
|
73
|
+
|
|
74
|
+
Without a transaction (``params.transaction_storage is None``) every
|
|
75
|
+
bind acts as a cache miss and emits the caller's ``seed`` verbatim —
|
|
76
|
+
so the same SQL run inside vs. outside a ``BEGIN``/``COMMIT`` block
|
|
77
|
+
visibly differs.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
FunctionArguments = TxCachedValueArgs
|
|
81
|
+
State = _TxCachedValueState
|
|
82
|
+
|
|
83
|
+
class Meta:
|
|
84
|
+
"""Metadata for tx_cached_value."""
|
|
85
|
+
|
|
86
|
+
name = "tx_cached_value"
|
|
87
|
+
description = "Return a value cached per (transaction_opaque_data, key) via transaction_storage."
|
|
88
|
+
categories = ["test", "transaction-storage"]
|
|
89
|
+
tags = {"category": "test"}
|
|
90
|
+
|
|
91
|
+
OUTPUT_SCHEMA: ClassVar[pa.Schema] = schema({"v": pa.int64()})
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _storage_key(user_key: str) -> bytes:
|
|
95
|
+
"""Storage key — namespaced so unrelated demos can share one transaction."""
|
|
96
|
+
return f"vgi-fixture:tx_cached_value:{user_key}".encode()
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def on_bind(cls, params: BindParams[TxCachedValueArgs]) -> BindResponse:
|
|
100
|
+
"""Resolve the value via transaction_storage, ship it via opaque_data."""
|
|
101
|
+
storage = params.transaction_storage
|
|
102
|
+
if storage is not None:
|
|
103
|
+
key = cls._storage_key(params.args.key)
|
|
104
|
+
cached = storage.get_one(key)
|
|
105
|
+
if cached is not None:
|
|
106
|
+
value = struct.unpack(">q", cached)[0]
|
|
107
|
+
else:
|
|
108
|
+
value = params.args.seed
|
|
109
|
+
storage.put_one(key, struct.pack(">q", value))
|
|
110
|
+
else:
|
|
111
|
+
# No transaction → no caching possible. Every call is a fresh
|
|
112
|
+
# bind that uses the caller's seed verbatim.
|
|
113
|
+
value = params.args.seed
|
|
114
|
+
|
|
115
|
+
return BindResponse(
|
|
116
|
+
output_schema=cls.OUTPUT_SCHEMA,
|
|
117
|
+
opaque_data=struct.pack(">q", value),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def cardinality(cls, params: BindParams[TxCachedValueArgs]) -> TableCardinality:
|
|
122
|
+
"""One row, always."""
|
|
123
|
+
del params
|
|
124
|
+
return TableCardinality(estimate=1, max=1)
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def on_init(cls, params: InitParams[TxCachedValueArgs]) -> GlobalInitResponse:
|
|
128
|
+
"""Pass the resolved value through to process().
|
|
129
|
+
|
|
130
|
+
``max_workers=1`` because this function emits exactly one row and
|
|
131
|
+
does no work-queue coordination — running it in parallel would
|
|
132
|
+
cause every secondary worker to re-emit the same row.
|
|
133
|
+
"""
|
|
134
|
+
return GlobalInitResponse(
|
|
135
|
+
max_workers=1,
|
|
136
|
+
opaque_data=params.init_call.bind_opaque_data,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def initial_state(cls, params: ProcessParams[TxCachedValueArgs]) -> _TxCachedValueState:
|
|
141
|
+
"""Decode the opaque_data shipped from on_bind()."""
|
|
142
|
+
assert params.init_response is not None
|
|
143
|
+
opaque = params.init_response.opaque_data
|
|
144
|
+
assert opaque is not None and len(opaque) == 8, (
|
|
145
|
+
"tx_cached_value: bind must populate opaque_data with an 8-byte int"
|
|
146
|
+
)
|
|
147
|
+
return _TxCachedValueState(value=struct.unpack(">q", opaque)[0])
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def process(
|
|
151
|
+
cls,
|
|
152
|
+
params: ProcessParams[TxCachedValueArgs],
|
|
153
|
+
state: _TxCachedValueState,
|
|
154
|
+
out: OutputCollector,
|
|
155
|
+
) -> None:
|
|
156
|
+
"""Emit the resolved value as a single-row batch, then finish."""
|
|
157
|
+
del params
|
|
158
|
+
if state.emitted:
|
|
159
|
+
out.finish()
|
|
160
|
+
return
|
|
161
|
+
out.emit(pa.RecordBatch.from_pydict({"v": [state.value]}, schema=cls.OUTPUT_SCHEMA))
|
|
162
|
+
state.emitted = True
|