vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,304 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Deliberately-broken PartitionColumns fixtures for v2 contract testing.
4
+
5
+ Each fixture violates one specific clause of the PartitionColumns contract
6
+ documented at ``vgi/_test_fixtures/table/partition_columns.py`` /
7
+ ``vgi/src/vgi_table_function_impl.cpp::InstallBatch``.
8
+
9
+ * :class:`BrokenMissingPartitionValuesFunction` — declares
10
+ ``partition_kind = SINGLE_VALUE_PARTITIONS`` and an annotated bind-
11
+ schema field, but bypasses the framework's wrapper validation by
12
+ reaching the inner OutputCollector directly. The C++ extension's
13
+ ``InstallBatch`` catches the missing ``vgi_partition_values#b64``
14
+ metadata.
15
+
16
+ * :class:`BrokenPartitionMinNeqMaxFunction` — declares
17
+ ``SINGLE_VALUE_PARTITIONS`` but emits a chunk whose partition
18
+ column has multiple distinct values. The framework's auto-extract
19
+ path would catch this client-side, so the fixture supplies an
20
+ explicit ``partition_values={"col": (min, max)}`` with min != max
21
+ to defeat the worker check and reach the C++ defense-in-depth
22
+ validation in ``InstallBatch``. The C++ check is what guarantees
23
+ this fires on release builds where DuckDB's own
24
+ ``BatchedDataCollection::Append`` assertion is compiled out.
25
+
26
+ * :class:`BrokenPartitionValuesNoAnnotationFunction` — no
27
+ ``vgi.partition_column`` annotation on any bind-schema field and
28
+ ``partition_kind = NOT_PARTITIONED``, but the worker passes
29
+ ``partition_values=`` on ``out.emit`` anyway. The framework
30
+ rejects with RuntimeError at the emit site.
31
+
32
+ * :class:`BrokenPartitionColumnAbsentFromBatchFunction` — declares
33
+ ``partition_kind`` and annotates a bind-schema field, but the
34
+ worker emits a batch that DOES NOT include that column AND does
35
+ not supply an explicit ``partition_values=`` override. The
36
+ framework's ``_merge_partition_values`` raises RuntimeError at
37
+ the emit site (auto-extract can't find the column).
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ from dataclasses import dataclass
43
+ from typing import Annotated, Any, ClassVar, cast
44
+
45
+ import pyarrow as pa
46
+ from vgi_rpc import ArrowSerializableDataclass
47
+ from vgi_rpc.rpc import OutputCollector
48
+
49
+ from vgi._test_fixtures.table._common import _cardinality_from_count
50
+ from vgi.arguments import Arg
51
+ from vgi.metadata import PartitionKind
52
+ from vgi.protocol import VgiOutputCollector
53
+ from vgi.schema_utils import partition_field
54
+ from vgi.table_function import (
55
+ ProcessParams,
56
+ TableFunctionGenerator,
57
+ bind_fixed_schema,
58
+ )
59
+
60
+
61
+ @dataclass(slots=True, frozen=True)
62
+ class _BrokenArgs:
63
+ count: Annotated[int, Arg(0, doc="Rows to attempt to emit", ge=1)]
64
+
65
+
66
+ @dataclass(kw_only=True)
67
+ class _BrokenState(ArrowSerializableDataclass):
68
+ emitted: bool = False
69
+
70
+
71
+ # =============================================================================
72
+ # 1. Missing partition_values metadata (C++ side raises)
73
+ # =============================================================================
74
+
75
+
76
+ @bind_fixed_schema
77
+ @_cardinality_from_count
78
+ class BrokenMissingPartitionValuesFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
79
+ """Opt-in declared, but worker bypasses framework metadata merge."""
80
+
81
+ FIXED_SCHEMA: ClassVar[pa.Schema] = pa.schema(
82
+ [
83
+ partition_field("country", pa.string()),
84
+ pa.field("sales", pa.int64()),
85
+ ]
86
+ )
87
+
88
+ class Meta:
89
+ name = "broken_missing_partition_values"
90
+ description = (
91
+ "DELIBERATELY BROKEN: declares partition_kind + partition-annotated "
92
+ "field but emits a data batch without vgi_partition_values#b64 "
93
+ "metadata. C++ extension's contract check raises."
94
+ )
95
+ categories = ["testing", "broken"]
96
+ partition_kind = PartitionKind.SINGLE_VALUE_PARTITIONS
97
+
98
+ @classmethod
99
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
100
+ return _BrokenState()
101
+
102
+ @classmethod
103
+ def process(
104
+ cls,
105
+ params: ProcessParams[_BrokenArgs],
106
+ state: _BrokenState,
107
+ out: OutputCollector,
108
+ ) -> None:
109
+ if state.emitted:
110
+ out.finish()
111
+ return
112
+ batch = pa.RecordBatch.from_pydict(
113
+ {"country": ["US"] * params.args.count, "sales": list(range(params.args.count))},
114
+ schema=cls.FIXED_SCHEMA,
115
+ )
116
+ # Reach into the wrapper stack and call the innermost inner
117
+ # directly. This is what makes the fixture "broken": the
118
+ # framework's _merge_partition_values validator never runs, so
119
+ # the data batch has no vgi_partition_values#b64 metadata and
120
+ # the C++ extension's InstallBatch contract check fires.
121
+ # Same pattern as v1's broken_missing_batch_index_tag fixture.
122
+ inner = out
123
+ while hasattr(inner, "_inner"):
124
+ inner = inner._inner
125
+ inner.emit(batch)
126
+ state.emitted = True
127
+
128
+
129
+ # =============================================================================
130
+ # 2. SINGLE_VALUE with min != max (C++ defense-in-depth raises)
131
+ # =============================================================================
132
+
133
+
134
+ @bind_fixed_schema
135
+ @_cardinality_from_count
136
+ class BrokenPartitionMinNeqMaxFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
137
+ """SINGLE_VALUE_PARTITIONS but emit min != max via explicit override."""
138
+
139
+ FIXED_SCHEMA: ClassVar[pa.Schema] = pa.schema(
140
+ [
141
+ partition_field("country", pa.string()),
142
+ pa.field("sales", pa.int64()),
143
+ ]
144
+ )
145
+
146
+ class Meta:
147
+ name = "broken_partition_min_neq_max"
148
+ description = (
149
+ "DELIBERATELY BROKEN: declares SINGLE_VALUE_PARTITIONS but "
150
+ "supplies an explicit partition_values override with "
151
+ "min != max. The framework's wrapper validation doesn't "
152
+ "compare min vs max for SINGLE_VALUE; the C++ extension's "
153
+ "defense-in-depth check in InstallBatch raises."
154
+ )
155
+ categories = ["testing", "broken"]
156
+ partition_kind = PartitionKind.SINGLE_VALUE_PARTITIONS
157
+
158
+ @classmethod
159
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
160
+ return _BrokenState()
161
+
162
+ @classmethod
163
+ def process(
164
+ cls,
165
+ params: ProcessParams[_BrokenArgs],
166
+ state: _BrokenState,
167
+ out: OutputCollector,
168
+ ) -> None:
169
+ if state.emitted:
170
+ out.finish()
171
+ return
172
+ # Single-valued country column at the data level (so the
173
+ # framework's auto-extract WOULD pass), but the explicit
174
+ # override forces min != max — defeats the framework check
175
+ # and reaches C++ defense-in-depth.
176
+ batch = pa.RecordBatch.from_pydict(
177
+ {"country": ["US"] * params.args.count, "sales": list(range(params.args.count))},
178
+ schema=cls.FIXED_SCHEMA,
179
+ )
180
+ cast(VgiOutputCollector, out).emit(
181
+ batch,
182
+ partition_values={
183
+ "country": (
184
+ pa.scalar("US", type=pa.string()),
185
+ pa.scalar("BR", type=pa.string()), # max != min — bug
186
+ ),
187
+ },
188
+ )
189
+ state.emitted = True
190
+
191
+
192
+ # =============================================================================
193
+ # 3. partition_values kwarg without any annotated field (worker-side raise)
194
+ # =============================================================================
195
+
196
+
197
+ @bind_fixed_schema
198
+ @_cardinality_from_count
199
+ class BrokenPartitionValuesNoAnnotationFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
200
+ """No partition annotation, but worker passes partition_values=."""
201
+
202
+ # No partition_field() — bind schema has no partition columns.
203
+ # cast: mypy joins Field[StringType] + Field[Int64Type] to Field[object];
204
+ # the runtime list is a plain list of pa.Field.
205
+ FIXED_SCHEMA: ClassVar[pa.Schema] = pa.schema(
206
+ cast(
207
+ "list[pa.Field[Any]]",
208
+ [pa.field("country", pa.string()), pa.field("sales", pa.int64())],
209
+ )
210
+ )
211
+
212
+ class Meta:
213
+ name = "broken_partition_values_no_annotation"
214
+ description = (
215
+ "DELIBERATELY BROKEN: no field carries vgi.partition_column "
216
+ "metadata (and partition_kind defaults to NOT_PARTITIONED), "
217
+ "but the worker passes partition_values= on out.emit. The "
218
+ "framework rejects with RuntimeError before the wire."
219
+ )
220
+ categories = ["testing", "broken"]
221
+ # No partition_kind setting — defaults to NOT_PARTITIONED.
222
+
223
+ @classmethod
224
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
225
+ return _BrokenState()
226
+
227
+ @classmethod
228
+ def process(
229
+ cls,
230
+ params: ProcessParams[_BrokenArgs],
231
+ state: _BrokenState,
232
+ out: OutputCollector,
233
+ ) -> None:
234
+ if state.emitted:
235
+ out.finish()
236
+ return
237
+ batch = pa.RecordBatch.from_pydict(
238
+ {"country": ["US"] * params.args.count, "sales": list(range(params.args.count))},
239
+ schema=cls.FIXED_SCHEMA,
240
+ )
241
+ cast(VgiOutputCollector, out).emit(
242
+ batch,
243
+ partition_values={
244
+ "country": (
245
+ pa.scalar("US", type=pa.string()),
246
+ pa.scalar("US", type=pa.string()),
247
+ ),
248
+ },
249
+ )
250
+ state.emitted = True
251
+
252
+
253
+ # =============================================================================
254
+ # 4. Annotated column missing from batch, no explicit override (worker-side raise)
255
+ # =============================================================================
256
+
257
+
258
+ @bind_fixed_schema
259
+ @_cardinality_from_count
260
+ class BrokenPartitionColumnAbsentFromBatchFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
261
+ """Annotated partition column not in emitted batch, no override."""
262
+
263
+ FIXED_SCHEMA: ClassVar[pa.Schema] = pa.schema(
264
+ [
265
+ partition_field("category", pa.string()),
266
+ pa.field("revenue", pa.int64()),
267
+ ]
268
+ )
269
+
270
+ class Meta:
271
+ name = "broken_partition_column_absent_from_batch"
272
+ description = (
273
+ "DELIBERATELY BROKEN: declares partition_kind on "
274
+ "'category' but emits a batch without 'category' AND "
275
+ "doesn't supply an explicit partition_values override. The "
276
+ "framework's auto-extract fails with RuntimeError before "
277
+ "the wire."
278
+ )
279
+ categories = ["testing", "broken"]
280
+ partition_kind = PartitionKind.SINGLE_VALUE_PARTITIONS
281
+
282
+ @classmethod
283
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
284
+ return _BrokenState()
285
+
286
+ @classmethod
287
+ def process(
288
+ cls,
289
+ params: ProcessParams[_BrokenArgs],
290
+ state: _BrokenState,
291
+ out: OutputCollector,
292
+ ) -> None:
293
+ if state.emitted:
294
+ out.finish()
295
+ return
296
+ # Emit a batch WITHOUT 'category'. Framework's auto-extract
297
+ # tries to read batch.column('category') and raises.
298
+ batch_schema = pa.schema([pa.field("revenue", pa.int64())])
299
+ batch = pa.RecordBatch.from_pydict(
300
+ {"revenue": list(range(params.args.count))},
301
+ schema=batch_schema,
302
+ )
303
+ out.emit(batch)
304
+ state.emitted = True
@@ -0,0 +1,195 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Example function exercising the dynamic_to_string callback.
4
+
5
+ ``ProfilingDemoFunction`` demonstrates the recommended persistence
6
+ pattern for diagnostics that should surface under ``EXPLAIN ANALYZE``:
7
+
8
+ 1. ``process()`` keeps per-stream counters in user state (rows,
9
+ batches, start time), and after every tick writes a serialized
10
+ snapshot via ``params.storage.put(bytes)``.
11
+ 2. ``dynamic_to_string()`` constructs a ``BoundStorage`` for the
12
+ given ``execution_id``, calls ``collect()`` to gather every
13
+ worker's last snapshot, and sums them.
14
+
15
+ ``BoundStorage`` defaults to the sqlite-backed shared storage (see
16
+ CLAUDE.md → ``VGI_WORKER_SHARED_STORAGE``), so the pattern works across
17
+ worker processes — both subprocess transport and HTTP transport with
18
+ ``max_workers > 1``. No in-memory class state is involved.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import struct
24
+ import time
25
+ from collections.abc import Mapping
26
+ from dataclasses import dataclass
27
+ from typing import Annotated, ClassVar
28
+
29
+ import numpy as np
30
+ import pyarrow as pa
31
+ from vgi_rpc import ArrowSerializableDataclass
32
+ from vgi_rpc.rpc import OutputCollector
33
+
34
+ from vgi._test_fixtures.table._common import CountBatchArgs
35
+ from vgi.arguments import Arg
36
+ from vgi.function_storage import BoundStorage
37
+ from vgi.metadata import FunctionExample
38
+ from vgi.schema_utils import schema
39
+ from vgi.table_function import (
40
+ BindParams,
41
+ ProcessParams,
42
+ TableCardinality,
43
+ TableFunctionGenerator,
44
+ bind_fixed_schema,
45
+ init_single_worker,
46
+ )
47
+
48
+
49
+ @dataclass(frozen=True)
50
+ class ProfilingDemoArgs(CountBatchArgs):
51
+ """Arguments for ProfilingDemoFunction."""
52
+
53
+ increment: Annotated[int, Arg("increment", default=1, doc="Step between values", ge=1)]
54
+
55
+
56
+ @dataclass(kw_only=True)
57
+ class ProfilingState(ArrowSerializableDataclass):
58
+ """Per-stream counters."""
59
+
60
+ remaining: int
61
+ current_index: int = 0
62
+ rows_emitted: int = 0
63
+ batches_emitted: int = 0
64
+ started_at_ns: int = 0
65
+
66
+
67
+ # Serialized snapshot wire format: three little-endian uint64s
68
+ # (rows, batches, elapsed_us). Compact; survives multi-worker collect().
69
+ _SNAPSHOT = struct.Struct("<QQQ")
70
+
71
+
72
+ def _pack_snapshot(rows: int, batches: int, elapsed_us: int) -> bytes:
73
+ return _SNAPSHOT.pack(rows, batches, elapsed_us)
74
+
75
+
76
+ def _unpack_snapshot(data: bytes) -> tuple[int, int, int]:
77
+ return _SNAPSHOT.unpack(data)
78
+
79
+
80
+ @init_single_worker
81
+ @bind_fixed_schema
82
+ class ProfilingDemoFunction(TableFunctionGenerator[ProfilingDemoArgs, ProfilingState]):
83
+ """Sequence generator that publishes per-execution metrics under EXPLAIN ANALYZE.
84
+
85
+ Output is identical to ``sequence(count, batch_size, increment)``.
86
+ Additionally tracks ``rows_produced``, ``batches_emitted``, and
87
+ ``elapsed_ms`` and surfaces them via ``dynamic_to_string``.
88
+ """
89
+
90
+ FunctionArguments = ProfilingDemoArgs
91
+
92
+ class Meta:
93
+ """Metadata for ProfilingDemoFunction."""
94
+
95
+ name = "profiling_demo"
96
+ description = "Sequence generator publishing diagnostics under EXPLAIN ANALYZE"
97
+ categories = ["generator", "utility"]
98
+ examples = [
99
+ FunctionExample(
100
+ sql="EXPLAIN ANALYZE SELECT count(*) FROM profiling_demo(500)",
101
+ description="Run with diagnostics surfaced as Extra Info",
102
+ ),
103
+ ]
104
+
105
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
106
+ NUMPY_DTYPE: ClassVar[type[np.generic]] = np.int64
107
+
108
+ @classmethod
109
+ def cardinality(cls, params: BindParams[ProfilingDemoArgs]) -> TableCardinality:
110
+ count = params.args.count
111
+ return TableCardinality(estimate=count, max=count)
112
+
113
+ @classmethod
114
+ def initial_state(cls, params: ProcessParams[ProfilingDemoArgs]) -> ProfilingState:
115
+ return ProfilingState(
116
+ remaining=params.args.count,
117
+ started_at_ns=time.monotonic_ns(),
118
+ )
119
+
120
+ @classmethod
121
+ def process(
122
+ cls,
123
+ params: ProcessParams[ProfilingDemoArgs],
124
+ state: ProfilingState,
125
+ out: OutputCollector,
126
+ ) -> None:
127
+ if state.remaining <= 0:
128
+ # Final write so dynamic_to_string sees the totals even after
129
+ # the stream finishes. One row per OS pid via state_put under
130
+ # namespace b"profile" — dynamic_to_string drains them all.
131
+ elapsed_us = (time.monotonic_ns() - state.started_at_ns) // 1000
132
+ import os as _os
133
+
134
+ params.storage.state_put(
135
+ b"profile",
136
+ BoundStorage.pack_int_key(_os.getpid()),
137
+ _pack_snapshot(state.rows_emitted, state.batches_emitted, elapsed_us),
138
+ )
139
+ out.finish()
140
+ return
141
+ batch_size = params.args.batch_size
142
+ size = min(state.remaining, batch_size)
143
+ increment = params.args.increment
144
+ values = np.arange(
145
+ state.current_index * increment,
146
+ (state.current_index + size) * increment,
147
+ increment,
148
+ dtype=cls.NUMPY_DTYPE,
149
+ )
150
+ out.emit(pa.RecordBatch.from_arrays([pa.array(values)], schema=params.output_schema))
151
+ state.current_index += size
152
+ state.remaining -= size
153
+ state.rows_emitted += size
154
+ state.batches_emitted += 1
155
+
156
+ # Per-tick snapshot — overwrites this worker's slot. The dispatcher's
157
+ # state_drain on dynamic_to_string sums one snapshot per worker pid.
158
+ elapsed_us = (time.monotonic_ns() - state.started_at_ns) // 1000
159
+ import os as _os
160
+
161
+ params.storage.state_put(
162
+ b"profile",
163
+ BoundStorage.pack_int_key(_os.getpid()),
164
+ _pack_snapshot(state.rows_emitted, state.batches_emitted, elapsed_us),
165
+ )
166
+
167
+ @classmethod
168
+ def dynamic_to_string(
169
+ cls,
170
+ params: BindParams[ProfilingDemoArgs],
171
+ execution_id: bytes,
172
+ ) -> Mapping[str, str]:
173
+ # BindParams doesn't carry a BoundStorage (no execution_id at bind
174
+ # time). Construct one with the execution_id we received.
175
+ storage = BoundStorage(cls.storage, execution_id, request=params.bind_call)
176
+ try:
177
+ # state_drain returns (key, value) pairs; we only want the values.
178
+ snapshots = [v for _, v in storage.state_drain(b"profile")]
179
+ except Exception:
180
+ return {}
181
+ if not snapshots:
182
+ return {}
183
+ rows = 0
184
+ batches = 0
185
+ elapsed_us = 0
186
+ for blob in snapshots:
187
+ r, b, e = _unpack_snapshot(blob)
188
+ rows += r
189
+ batches += b
190
+ elapsed_us = max(elapsed_us, e)
191
+ return {
192
+ "rows_produced": str(rows),
193
+ "batches_emitted": str(batches),
194
+ "elapsed_ms": f"{elapsed_us / 1000.0:.2f}",
195
+ }