vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,426 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Settings/secrets fixtures: settings_aware, struct_settings, secret_demo, scoped_secret_demo."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Annotated, Any, ClassVar
9
+
10
+ import pyarrow as pa
11
+ from vgi_rpc import ArrowSerializableDataclass
12
+ from vgi_rpc.rpc import OutputCollector
13
+
14
+ from vgi._test_fixtures.table._common import (
15
+ _cardinality_from_count,
16
+ )
17
+ from vgi.arguments import Arg, Secret, Setting
18
+ from vgi.invocation import BindResponse
19
+ from vgi.metadata import FunctionExample
20
+ from vgi.schema_utils import schema
21
+ from vgi.table_function import (
22
+ BindParams,
23
+ ProcessParams,
24
+ TableFunctionGenerator,
25
+ init_single_worker,
26
+ )
27
+
28
+
29
+ @dataclass(slots=True, frozen=True)
30
+ class SettingsAwareFunctionArguments:
31
+ """Arguments for SettingsAwareFunction."""
32
+
33
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
34
+
35
+
36
+ @dataclass(kw_only=True)
37
+ class SettingsAwareState(ArrowSerializableDataclass):
38
+ """Mutable state for SettingsAwareFunction with typed settings."""
39
+
40
+ remaining: int
41
+ current_index: int = 0
42
+ verbose: bool = False
43
+ greeting: str = "Hello"
44
+ multiplier: int = 1
45
+
46
+
47
+ @init_single_worker
48
+ @_cardinality_from_count
49
+ class SettingsAwareFunction(TableFunctionGenerator[SettingsAwareFunctionArguments, SettingsAwareState]):
50
+ """Generates data demonstrating that settings are passed to functions.
51
+
52
+ USE CASE
53
+ --------
54
+ Demonstrates how functions can declare required settings via
55
+ Setting() annotations and access them via state (resolved once
56
+ in initial_state()). The output includes columns showing the actual
57
+ setting values that were passed.
58
+
59
+ This function uses three settings:
60
+ - vgi_verbose_mode: bool - when true, adds a details column
61
+ - greeting: str - a custom greeting message echoed in output
62
+ - multiplier: int - multiplies the value column
63
+
64
+ Settings are typed: the C++ extension sends Arrow scalars with proper
65
+ types (bool, int64, string). For backward compatibility, string values
66
+ like "true" are also accepted for vgi_verbose_mode.
67
+
68
+ SCHEMA
69
+ ------
70
+ Base output: {"id": int64, "greeting": string, "value": float64}
71
+ With vgi_verbose_mode=true: adds "details": string column
72
+
73
+ Example:
74
+ -------
75
+ With settings={vgi_verbose_mode: true, greeting: "Hi", multiplier: 2}:
76
+ Returns: [{"id": 0, "greeting": "Hi", "value": 0.0, "details": "row_0"}, ...]
77
+
78
+ """
79
+
80
+ class Meta:
81
+ """Metadata for SettingsAwareFunction."""
82
+
83
+ name = "settings_aware"
84
+ description = "Generates data demonstrating settings are passed"
85
+ categories = ["generator", "settings"]
86
+ examples = [
87
+ FunctionExample(
88
+ sql="SELECT * FROM settings_aware(5)",
89
+ description="Generate 5 rows showing setting values",
90
+ )
91
+ ]
92
+
93
+ BATCH_SIZE: ClassVar[int] = 1000
94
+
95
+ @staticmethod
96
+ def _is_verbose(val: object) -> bool:
97
+ """Check if verbose mode is enabled, handling both bool and string values."""
98
+ return val is True or val == "true"
99
+
100
+ @classmethod
101
+ def on_bind(
102
+ cls,
103
+ params: BindParams[SettingsAwareFunctionArguments],
104
+ *,
105
+ vgi_verbose_mode: Annotated[pa.Scalar[Any] | None, Setting()] = None,
106
+ greeting: Annotated[pa.Scalar[Any] | None, Setting()] = None,
107
+ multiplier: Annotated[pa.Scalar[Any] | None, Setting()] = None,
108
+ ) -> BindResponse:
109
+ """Return output schema based on vgi_verbose_mode setting.
110
+
111
+ Always includes id, greeting (from setting), and value (multiplied).
112
+ When vgi_verbose_mode is true, includes an extra "details" column.
113
+ """
114
+ fields: list[pa.Field[pa.DataType]] = [
115
+ pa.field("id", pa.int64()),
116
+ pa.field("greeting", pa.string()),
117
+ pa.field("value", pa.float64()),
118
+ ]
119
+
120
+ # Add details column if verbose mode is enabled (handles bool and string)
121
+ if vgi_verbose_mode is not None and cls._is_verbose(vgi_verbose_mode.as_py()):
122
+ fields.append(pa.field("details", pa.string()))
123
+
124
+ return BindResponse(output_schema=pa.schema(fields))
125
+
126
+ @classmethod
127
+ def initial_state(cls, params: ProcessParams[SettingsAwareFunctionArguments]) -> SettingsAwareState:
128
+ """Create initial state with typed settings resolved once."""
129
+ verbose_val = params.settings.get("vgi_verbose_mode", pa.scalar(False)).as_py()
130
+ greeting_val = params.settings.get("greeting", pa.scalar("Hello")).as_py()
131
+ multiplier_val = params.settings.get("multiplier", pa.scalar(1)).as_py()
132
+
133
+ return SettingsAwareState(
134
+ remaining=params.args.count,
135
+ verbose=cls._is_verbose(verbose_val),
136
+ greeting=str(greeting_val),
137
+ multiplier=int(multiplier_val),
138
+ )
139
+
140
+ @classmethod
141
+ def process(
142
+ cls,
143
+ params: ProcessParams[SettingsAwareFunctionArguments],
144
+ state: SettingsAwareState,
145
+ out: OutputCollector,
146
+ ) -> None:
147
+ """Generate data based on settings stored in state."""
148
+ if state.remaining <= 0:
149
+ out.finish()
150
+ return
151
+
152
+ size = min(state.remaining, cls.BATCH_SIZE)
153
+ ids = list(range(state.current_index, state.current_index + size))
154
+
155
+ data: dict[str, list[int] | list[float] | list[str]] = {
156
+ "id": ids,
157
+ "greeting": [state.greeting] * size,
158
+ "value": [float(i) * 2.5 * state.multiplier for i in ids],
159
+ }
160
+
161
+ if state.verbose:
162
+ data["details"] = [f"row_{i}" for i in ids]
163
+
164
+ out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
165
+
166
+ state.current_index += size
167
+ state.remaining -= size
168
+
169
+
170
+ @dataclass(slots=True, frozen=True)
171
+ class StructSettingsFunctionArguments:
172
+ """Arguments for StructSettingsFunction."""
173
+
174
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
175
+
176
+
177
+ @dataclass(kw_only=True)
178
+ class StructSettingsState(ArrowSerializableDataclass):
179
+ """Mutable state for StructSettingsFunction."""
180
+
181
+ remaining: int
182
+ current_index: int = 0
183
+ start: int = 0
184
+ step: int = 1
185
+ label: str = "item"
186
+
187
+
188
+ @init_single_worker
189
+ @_cardinality_from_count
190
+ class StructSettingsFunction(TableFunctionGenerator[StructSettingsFunctionArguments, StructSettingsState]):
191
+ """Generates a sequence configured by a struct setting.
192
+
193
+ USE CASE
194
+ --------
195
+ Demonstrates how a single struct setting can configure multiple aspects
196
+ of a function's behavior. The config setting is a struct with fields:
197
+ - start: int64 - starting value for the sequence
198
+ - step: int64 - step between values
199
+ - label: string - prefix for label column
200
+
201
+ SCHEMA
202
+ ------
203
+ Output: {"n": int64, "label": string}
204
+
205
+ Example:
206
+ -------
207
+ With config={'start': 10, 'step': 5, 'label': 'item'} and count=3:
208
+ Returns: [{"n": 10, "label": "item_0"}, {"n": 15, "label": "item_1"}, {"n": 20, "label": "item_2"}]
209
+
210
+ """
211
+
212
+ class Meta:
213
+ """Metadata for StructSettingsFunction."""
214
+
215
+ name = "struct_settings"
216
+ description = "Generate a sequence configured by a struct setting"
217
+ categories = ["generator", "settings"]
218
+ examples = [
219
+ FunctionExample(
220
+ sql="SELECT * FROM struct_settings(5)",
221
+ description="Generate 5 rows configured by the config setting",
222
+ )
223
+ ]
224
+
225
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema({"n": pa.int64(), "label": pa.string()})
226
+
227
+ @classmethod
228
+ def on_bind(
229
+ cls,
230
+ params: BindParams[StructSettingsFunctionArguments],
231
+ *,
232
+ config: Annotated[pa.Scalar[Any] | None, Setting()] = None,
233
+ ) -> BindResponse:
234
+ """Return output schema. Config declared here for required_settings registration."""
235
+ return BindResponse(output_schema=cls.FIXED_SCHEMA)
236
+
237
+ @classmethod
238
+ def initial_state(cls, params: ProcessParams[StructSettingsFunctionArguments]) -> StructSettingsState:
239
+ """Create initial state with struct setting values resolved once."""
240
+ config = params.settings["config"] # pa.StructScalar
241
+ cfg = config.as_py() # dict
242
+ return StructSettingsState(
243
+ remaining=params.args.count,
244
+ start=cfg["start"],
245
+ step=cfg["step"],
246
+ label=cfg["label"],
247
+ )
248
+
249
+ @classmethod
250
+ def process(
251
+ cls,
252
+ params: ProcessParams[StructSettingsFunctionArguments],
253
+ state: StructSettingsState,
254
+ out: OutputCollector,
255
+ ) -> None:
256
+ """Generate rows with values derived from the struct setting."""
257
+ if state.remaining <= 0:
258
+ out.finish()
259
+ return
260
+
261
+ size = min(state.remaining, 1000)
262
+ data: dict[str, list[int] | list[str]] = {
263
+ "n": [state.start + (state.current_index + i) * state.step for i in range(size)],
264
+ "label": [f"{state.label}_{state.current_index + i}" for i in range(size)],
265
+ }
266
+ out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
267
+ state.current_index += size
268
+ state.remaining -= size
269
+
270
+
271
+ # =============================================================================
272
+
273
+
274
+ @dataclass(kw_only=True)
275
+ class SecretDemoState(ArrowSerializableDataclass):
276
+ """State for SecretDemoFunction."""
277
+
278
+ keys: list[str] = field(default_factory=list)
279
+ values: list[str] = field(default_factory=list)
280
+ types: list[str] = field(default_factory=list)
281
+
282
+
283
+ @init_single_worker
284
+ class SecretDemoFunction(TableFunctionGenerator[None, SecretDemoState]):
285
+ """Table function that outputs secret key-value pairs as rows.
286
+
287
+ Demonstrates basic secret access via Secret() annotation.
288
+ """
289
+
290
+ class Meta:
291
+ """Metadata for SecretDemoFunction."""
292
+
293
+ name = "secret_demo"
294
+ description = "Outputs secret contents as key-value rows"
295
+
296
+ @classmethod
297
+ def on_bind(
298
+ cls,
299
+ params: BindParams[None],
300
+ ) -> BindResponse:
301
+ """Bind with secret request via SecretsAccessor."""
302
+ # Request the secret via the accessor — triggers two-phase bind
303
+ # so the resolved secret is available in initial_state().
304
+ params.secrets.get("vgi_example")
305
+ return BindResponse(
306
+ output_schema=schema(
307
+ {
308
+ "key": pa.string(),
309
+ "value": pa.string(),
310
+ "arrow_type": pa.string(),
311
+ }
312
+ )
313
+ )
314
+
315
+ @classmethod
316
+ def initial_state(cls, params: ProcessParams[None]) -> SecretDemoState:
317
+ """Build initial state from secret key-value pairs."""
318
+ secret = params.secrets.get("vgi_example", {})
319
+ keys = list(secret.keys())
320
+ values = [str(v.as_py()) for v in secret.values()]
321
+ types = [str(v.type) for v in secret.values()]
322
+ return SecretDemoState(keys=keys, values=values, types=types)
323
+
324
+ @classmethod
325
+ def process(
326
+ cls,
327
+ params: ProcessParams[None],
328
+ state: SecretDemoState,
329
+ out: OutputCollector,
330
+ ) -> None:
331
+ """Emit secret entries as rows."""
332
+ if not state.keys:
333
+ out.finish()
334
+ return
335
+ batch = pa.RecordBatch.from_pydict(
336
+ {"key": state.keys, "value": state.values, "arrow_type": state.types},
337
+ schema=params.output_schema,
338
+ )
339
+ out.emit(batch)
340
+ state.keys = []
341
+ state.values = []
342
+ state.types = []
343
+
344
+
345
+ @dataclass(frozen=True)
346
+ class ScopedSecretDemoArgs:
347
+ """Arguments for ScopedSecretDemoFunction."""
348
+
349
+ path: Annotated[str, Arg(0, doc="Scope path for secret lookup")]
350
+
351
+
352
+ @dataclass(kw_only=True)
353
+ class ScopedSecretDemoState(ArrowSerializableDataclass):
354
+ """State for ScopedSecretDemoFunction."""
355
+
356
+ found: bool = False
357
+ secret_keys: str = ""
358
+
359
+
360
+ @init_single_worker
361
+ class ScopedSecretDemoFunction(TableFunctionGenerator[ScopedSecretDemoArgs, ScopedSecretDemoState]):
362
+ """Demonstrates automatic two-phase bind with scoped secrets.
363
+
364
+ Requests a secret with a dynamic scope computed from the function argument.
365
+ The framework automatically handles the two-phase bind retry.
366
+ """
367
+
368
+ class Meta:
369
+ """Metadata for ScopedSecretDemoFunction."""
370
+
371
+ name = "scoped_secret_demo"
372
+ description = "Demo: resolves scoped secret based on argument"
373
+
374
+ @classmethod
375
+ def on_bind(
376
+ cls,
377
+ params: BindParams[ScopedSecretDemoArgs],
378
+ *,
379
+ vgi_example: Annotated[dict[str, pa.Scalar[Any]] | None, Secret("vgi_example")] = None,
380
+ ) -> BindResponse:
381
+ """Bind with dynamic scoped secret lookup."""
382
+ # Request secret with dynamic scope — framework handles retry automatically.
383
+ # The get() call registers a pending scoped lookup; the return value is
384
+ # unused because the framework will trigger a two-phase bind retry.
385
+ params.secrets.get("vgi_example", scope=params.args.path)
386
+
387
+ # On first call: secret is None (pending), framework triggers retry
388
+ # On retry: secret is dict (found) or None (genuinely not found)
389
+
390
+ return BindResponse(
391
+ output_schema=schema(
392
+ {
393
+ "scope": pa.string(),
394
+ "found": pa.bool_(),
395
+ "secret_keys": pa.string(),
396
+ }
397
+ )
398
+ )
399
+
400
+ @classmethod
401
+ def initial_state(cls, params: ProcessParams[ScopedSecretDemoArgs]) -> ScopedSecretDemoState:
402
+ """Build state from resolved secrets."""
403
+ secret = params.secrets.get("vgi_example", {})
404
+ return ScopedSecretDemoState(
405
+ found=bool(secret),
406
+ secret_keys=",".join(secret.keys()) if secret else "",
407
+ )
408
+
409
+ @classmethod
410
+ def process(
411
+ cls,
412
+ params: ProcessParams[ScopedSecretDemoArgs],
413
+ state: ScopedSecretDemoState,
414
+ out: OutputCollector,
415
+ ) -> None:
416
+ """Emit scope info and resolved secret keys."""
417
+ batch = pa.RecordBatch.from_pydict(
418
+ {
419
+ "scope": [params.args.path],
420
+ "found": [state.found],
421
+ "secret_keys": [state.secret_keys],
422
+ },
423
+ schema=params.output_schema,
424
+ )
425
+ out.emit(batch)
426
+ out.finish()
@@ -0,0 +1,162 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Demo of transaction-scoped storage (``BindParams.transaction_storage``).
4
+
5
+ Backs the ``example.main.tx_cached_value(key, seed)`` function exposed by
6
+ ``vgi-fixture-worker``. The function uses ``BindParams.transaction_storage``
7
+ to cache its ``seed`` argument per ``(transaction_opaque_data, key)``:
8
+
9
+ * First call within a transaction for a given ``key``: stores ``seed`` and
10
+ emits it.
11
+ * Subsequent calls within the **same** transaction for the **same** ``key``:
12
+ emit the originally-cached value and **ignore** the new ``seed``.
13
+ * New transaction or different ``key``: produces a fresh cached value.
14
+ * Without a transaction (``params.transaction_storage is None``): no caching;
15
+ every call emits its own ``seed``.
16
+
17
+ The resolved value is shipped from ``on_bind`` to ``process`` via
18
+ ``BindResponse.opaque_data`` so any worker in the pool can produce the same
19
+ answer — the value lives in shared storage (sqlite/CF DO/Azure SQL), not in
20
+ the bind worker's local memory.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import struct
26
+ from dataclasses import dataclass
27
+ from typing import Annotated, ClassVar
28
+
29
+ import pyarrow as pa
30
+ from vgi_rpc import ArrowSerializableDataclass
31
+ from vgi_rpc.rpc import OutputCollector
32
+
33
+ from vgi.arguments import Arg
34
+ from vgi.invocation import BindResponse, GlobalInitResponse
35
+ from vgi.schema_utils import schema
36
+ from vgi.table_function import (
37
+ BindParams,
38
+ InitParams,
39
+ ProcessParams,
40
+ TableCardinality,
41
+ TableFunctionGenerator,
42
+ )
43
+
44
+ __all__ = ["TxCachedValueFunction"]
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class TxCachedValueArgs:
49
+ """Arguments for ``tx_cached_value``."""
50
+
51
+ key: Annotated[str, Arg(0, doc="Cache key, scoped to the current transaction")]
52
+ seed: Annotated[int, Arg(1, doc="Value to cache on first call; ignored on cache hit")]
53
+
54
+
55
+ @dataclass(kw_only=True)
56
+ class _TxCachedValueState(ArrowSerializableDataclass):
57
+ """Mutable per-process state carried into ``process``."""
58
+
59
+ # Resolved value (cached or freshly-seeded). Carried from bind via
60
+ # opaque_data so process() doesn't need access to transaction_storage
61
+ # (which is only populated on BindParams).
62
+ value: int
63
+ emitted: bool = False
64
+
65
+
66
+ class TxCachedValueFunction(TableFunctionGenerator[TxCachedValueArgs, _TxCachedValueState]):
67
+ """Returns a single-row table whose value is cached per (transaction, key).
68
+
69
+ The cache lives in ``BindParams.transaction_storage`` — a view over
70
+ ``FunctionStorage.transaction_state_*``. On a cache hit the stored value
71
+ is returned; on a miss, the supplied ``seed`` is written to storage and
72
+ returned.
73
+
74
+ Without a transaction (``params.transaction_storage is None``) every
75
+ bind acts as a cache miss and emits the caller's ``seed`` verbatim —
76
+ so the same SQL run inside vs. outside a ``BEGIN``/``COMMIT`` block
77
+ visibly differs.
78
+ """
79
+
80
+ FunctionArguments = TxCachedValueArgs
81
+ State = _TxCachedValueState
82
+
83
+ class Meta:
84
+ """Metadata for tx_cached_value."""
85
+
86
+ name = "tx_cached_value"
87
+ description = "Return a value cached per (transaction_opaque_data, key) via transaction_storage."
88
+ categories = ["test", "transaction-storage"]
89
+ tags = {"category": "test"}
90
+
91
+ OUTPUT_SCHEMA: ClassVar[pa.Schema] = schema({"v": pa.int64()})
92
+
93
+ @staticmethod
94
+ def _storage_key(user_key: str) -> bytes:
95
+ """Storage key — namespaced so unrelated demos can share one transaction."""
96
+ return f"vgi-fixture:tx_cached_value:{user_key}".encode()
97
+
98
+ @classmethod
99
+ def on_bind(cls, params: BindParams[TxCachedValueArgs]) -> BindResponse:
100
+ """Resolve the value via transaction_storage, ship it via opaque_data."""
101
+ storage = params.transaction_storage
102
+ if storage is not None:
103
+ key = cls._storage_key(params.args.key)
104
+ cached = storage.get_one(key)
105
+ if cached is not None:
106
+ value = struct.unpack(">q", cached)[0]
107
+ else:
108
+ value = params.args.seed
109
+ storage.put_one(key, struct.pack(">q", value))
110
+ else:
111
+ # No transaction → no caching possible. Every call is a fresh
112
+ # bind that uses the caller's seed verbatim.
113
+ value = params.args.seed
114
+
115
+ return BindResponse(
116
+ output_schema=cls.OUTPUT_SCHEMA,
117
+ opaque_data=struct.pack(">q", value),
118
+ )
119
+
120
+ @classmethod
121
+ def cardinality(cls, params: BindParams[TxCachedValueArgs]) -> TableCardinality:
122
+ """One row, always."""
123
+ del params
124
+ return TableCardinality(estimate=1, max=1)
125
+
126
+ @classmethod
127
+ def on_init(cls, params: InitParams[TxCachedValueArgs]) -> GlobalInitResponse:
128
+ """Pass the resolved value through to process().
129
+
130
+ ``max_workers=1`` because this function emits exactly one row and
131
+ does no work-queue coordination — running it in parallel would
132
+ cause every secondary worker to re-emit the same row.
133
+ """
134
+ return GlobalInitResponse(
135
+ max_workers=1,
136
+ opaque_data=params.init_call.bind_opaque_data,
137
+ )
138
+
139
+ @classmethod
140
+ def initial_state(cls, params: ProcessParams[TxCachedValueArgs]) -> _TxCachedValueState:
141
+ """Decode the opaque_data shipped from on_bind()."""
142
+ assert params.init_response is not None
143
+ opaque = params.init_response.opaque_data
144
+ assert opaque is not None and len(opaque) == 8, (
145
+ "tx_cached_value: bind must populate opaque_data with an 8-byte int"
146
+ )
147
+ return _TxCachedValueState(value=struct.unpack(">q", opaque)[0])
148
+
149
+ @classmethod
150
+ def process(
151
+ cls,
152
+ params: ProcessParams[TxCachedValueArgs],
153
+ state: _TxCachedValueState,
154
+ out: OutputCollector,
155
+ ) -> None:
156
+ """Emit the resolved value as a single-row batch, then finish."""
157
+ del params
158
+ if state.emitted:
159
+ out.finish()
160
+ return
161
+ out.emit(pa.RecordBatch.from_pydict({"v": [state.value]}, schema=cls.OUTPUT_SCHEMA))
162
+ state.emitted = True