vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,249 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Late-materialization fixture.
4
+
5
+ Exercises DuckDB's late-materialization optimizer end-to-end against a VGI
6
+ worker. When ``Meta.late_materialization`` is advertised and the table has a
7
+ rowid virtual column, a ``TOP_N`` / ``LIMIT`` / ``SAMPLE`` over the scan is
8
+ rewritten by DuckDB into a SEMI join on the rowid: a narrow ordering scan
9
+ selects survivors, then the wide scan re-fetches their columns with the
10
+ surviving rowids pushed down as a filter.
11
+
12
+ Schema ``(row_id int64 [is_row_id], ord int64, payload utf8, pushed utf8)``:
13
+
14
+ * ``row_id == row index`` — unique, deterministic, and snapshot-stable, so a
15
+ rowid emitted by the ordering scan resolves to the same logical row in the
16
+ (independent) wide scan, even across worker processes. This satisfies the
17
+ late-materialization worker contract.
18
+ * ``ord`` is a *scrambled* function of the index so a Top-N on ``ord`` yields
19
+ scattered survivor rowids — that drives the exact ``IN``-list pushdown path
20
+ (DuckDB only builds an ``IN`` list for ``2..dynamic_or_filter_threshold``
21
+ survivors; above that it pushes a rowid min/max range instead).
22
+ * ``payload`` is the wide column whose materialization the rewrite avoids.
23
+ * ``pushed`` is the **witness**: it echoes, per row, the rowid filter the
24
+ worker received (``in=<n>`` join keys, ``rng=<lo>..<hi>`` bounds). Because
25
+ the rewrite's output columns come from the *wide* scan, selecting ``pushed``
26
+ unambiguously reports what was pushed to that scan. This works over both
27
+ subprocess and HTTP transports (unlike in-band ``client_log``).
28
+
29
+ ``dup_row_id=True`` deliberately violates the uniqueness invariant (row_id =
30
+ index // 2) to back the negative gating test. ``null_ord_stride>0`` injects
31
+ NULLs into ``ord`` for the NULL-ordering test.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ from dataclasses import dataclass
37
+ from typing import Annotated, Any
38
+
39
+ import pyarrow as pa
40
+ from vgi_rpc import ArrowSerializableDataclass
41
+ from vgi_rpc.rpc import OutputCollector
42
+
43
+ from vgi._test_fixtures.table._common import _cardinality_from_count
44
+ from vgi.arguments import Arg
45
+ from vgi.invocation import BindResponse
46
+ from vgi.metadata import FunctionExample
47
+ from vgi.table_filter_pushdown import PushdownFilters
48
+ from vgi.table_function import (
49
+ BindParams,
50
+ ProcessParams,
51
+ TableFunctionGenerator,
52
+ init_single_worker,
53
+ )
54
+
55
+ # Field name of the rowid column; the C++ extension resolves a pushed rowid
56
+ # filter to this name on the wire, so the worker matches it by name.
57
+ _ROWID_NAME = "row_id"
58
+
59
+ # Scramble multiplier (odd, coprime with any reasonable count) used to turn the
60
+ # monotonic index into a scattered ordering key.
61
+ _SCRAMBLE = 2654435761
62
+
63
+
64
+ def _scramble_ord(index: int) -> int:
65
+ """Deterministic, scattered ordering key for a given row index."""
66
+ return (index * _SCRAMBLE) % 1_000_000_007
67
+
68
+
69
+ def _rowid_pushdown_witness(filters: PushdownFilters | None) -> str:
70
+ """Summarize the rowid filter the worker received as a stable string.
71
+
72
+ ``in=<n>`` — total number of rowid ``IN``-list (join-key) values.
73
+ ``rng=<lo>..<hi>`` — min/max rowid range bounds, or ``none`` if absent.
74
+ """
75
+ if filters is None:
76
+ return "rid:in=0;rng=none"
77
+
78
+ from vgi.table_filter_pushdown import AndFilter, ConstantFilter, InFilter, OrFilter
79
+
80
+ in_count = 0
81
+ lo: Any = None
82
+ hi: Any = None
83
+
84
+ def walk(f: object) -> None:
85
+ nonlocal in_count, lo, hi
86
+ if isinstance(f, (AndFilter, OrFilter)):
87
+ for child in f.children:
88
+ walk(child)
89
+ elif isinstance(f, InFilter) and f.column_name == _ROWID_NAME:
90
+ in_count += len(f.values)
91
+ elif isinstance(f, ConstantFilter) and f.column_name == _ROWID_NAME:
92
+ sym = f.op.symbol
93
+ if sym.startswith(">"):
94
+ lo = f.value if lo is None else min(lo, f.value)
95
+ elif sym.startswith("<"):
96
+ hi = f.value if hi is None else max(hi, f.value)
97
+ elif sym == "=":
98
+ lo = hi = f.value
99
+
100
+ for f in filters.filters:
101
+ walk(f)
102
+
103
+ rng = f"{lo}..{hi}" if (lo is not None or hi is not None) else "none"
104
+ return f"rid:in={in_count};rng={rng}"
105
+
106
+
107
+ @dataclass(slots=True, frozen=True)
108
+ class LateMaterializationFunctionArgs:
109
+ """Arguments for LateMaterializationFunction."""
110
+
111
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
112
+ batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
113
+ dup_row_id: Annotated[
114
+ bool,
115
+ Arg("dup_row_id", default=False, doc="Emit a deliberately non-unique row_id (index // 2)"),
116
+ ]
117
+ null_ord_stride: Annotated[
118
+ int,
119
+ Arg("null_ord_stride", default=0, doc="Emit NULL ord every Nth row (0 = never)", ge=0),
120
+ ]
121
+
122
+
123
+ @dataclass(kw_only=True)
124
+ class LateMaterializationState(ArrowSerializableDataclass):
125
+ """Mutable state: position, remaining count, and the cached witness string.
126
+
127
+ ``witness`` is serialized (not Transient) so the HTTP rehydrate path — which
128
+ deserializes user state without re-invoking ``initial_state`` — preserves
129
+ the observed pushdown filters across state-token round-trips.
130
+ """
131
+
132
+ remaining: int
133
+ current_index: int = 0
134
+ witness: str = "rid:in=0;rng=none"
135
+
136
+
137
+ @init_single_worker
138
+ @_cardinality_from_count
139
+ class LateMaterializationFunction(TableFunctionGenerator[LateMaterializationFunctionArgs, LateMaterializationState]):
140
+ """Rowid-bearing generator that participates in late materialization.
141
+
142
+ SCHEMA
143
+ ------
144
+ Output: {"row_id": int64 [is_row_id], "ord": int64, "payload": utf8,
145
+ "pushed": utf8}
146
+
147
+ Example:
148
+ -------
149
+ SELECT row_id, payload FROM late_materialization(100000) ORDER BY ord LIMIT 10
150
+
151
+ """
152
+
153
+ FunctionArguments = LateMaterializationFunctionArgs
154
+
155
+ class Meta:
156
+ """Metadata for LateMaterializationFunction."""
157
+
158
+ name = "late_materialization"
159
+ description = "Rowid generator that participates in late materialization"
160
+ categories = ["generator", "diagnostic"]
161
+ projection_pushdown = True
162
+ filter_pushdown = True
163
+ auto_apply_filters = True
164
+ late_materialization = True
165
+ examples = [
166
+ FunctionExample(
167
+ sql="SELECT row_id, payload FROM late_materialization(100000) ORDER BY ord LIMIT 10",
168
+ description="Top-N is late-materialized: payload fetched only for survivors",
169
+ ),
170
+ ]
171
+
172
+ @classmethod
173
+ def on_bind(cls, params: BindParams[LateMaterializationFunctionArgs]) -> BindResponse:
174
+ """Build the rowid-bearing output schema."""
175
+ rid_field = pa.field(_ROWID_NAME, pa.int64(), metadata={b"is_row_id": b""})
176
+ fields: list[pa.Field[Any]] = [
177
+ rid_field,
178
+ pa.field("ord", pa.int64()),
179
+ pa.field("payload", pa.utf8()),
180
+ pa.field("pushed", pa.utf8()),
181
+ ]
182
+ return BindResponse(output_schema=pa.schema(fields))
183
+
184
+ @classmethod
185
+ def initial_state(cls, params: ProcessParams[LateMaterializationFunctionArgs]) -> LateMaterializationState:
186
+ """Seed state and capture the init-time rowid filter into the witness.
187
+
188
+ For the wide probe scan, the SEMI join's build side completes before the
189
+ scan inits, so the surviving rowid range arrives as a *concrete* filter on
190
+ the init-time ``pushdown_filters`` (not a per-tick dynamic filter).
191
+ process() additionally latches anything that shows up per-tick.
192
+ """
193
+ init_witness = "rid:in=0;rng=none"
194
+ ic = params.init_call
195
+ if ic is not None and ic.pushdown_filters is not None:
196
+ init_filters = cls.pushdown_filters(ic.pushdown_filters, join_keys=ic.join_keys)
197
+ init_witness = _rowid_pushdown_witness(init_filters)
198
+ return LateMaterializationState(remaining=params.args.count, witness=init_witness)
199
+
200
+ @classmethod
201
+ def process(
202
+ cls,
203
+ params: ProcessParams[LateMaterializationFunctionArgs],
204
+ state: LateMaterializationState,
205
+ out: OutputCollector,
206
+ ) -> None:
207
+ """Emit the next batch of (projected) rowid rows.
208
+
209
+ The surviving-rowid filter from late materialization is pushed as a
210
+ *dynamic* filter (populated after the SEMI join's build side completes),
211
+ so it surfaces on ``params.current_pushdown_filters`` per tick — not on
212
+ the init-time ``pushdown_filters``. The probe (wide) scan runs after the
213
+ build, so it sees the full rowid filter from its first tick.
214
+ """
215
+ # Refresh the witness from the current per-tick dynamic filters. Once a
216
+ # rowid filter is present, latch it (later ticks of the probe scan keep
217
+ # seeing it, but guard against a transient empty tick clobbering it).
218
+ tick_witness = _rowid_pushdown_witness(params.current_pushdown_filters)
219
+ if tick_witness != "rid:in=0;rng=none" or state.witness == "rid:in=0;rng=none":
220
+ state.witness = tick_witness
221
+
222
+ if state.remaining <= 0:
223
+ out.finish()
224
+ return
225
+
226
+ size = min(state.remaining, params.args.batch_size)
227
+ start = state.current_index
228
+ stride = params.args.null_ord_stride
229
+
230
+ columns: dict[str, list[Any]] = {}
231
+ for f in params.output_schema:
232
+ if f.name == _ROWID_NAME:
233
+ if params.args.dup_row_id:
234
+ columns[_ROWID_NAME] = [i // 2 for i in range(start, start + size)]
235
+ else:
236
+ columns[_ROWID_NAME] = list(range(start, start + size))
237
+ elif f.name == "ord":
238
+ columns["ord"] = [
239
+ None if (stride > 0 and i % stride == 0) else _scramble_ord(i) for i in range(start, start + size)
240
+ ]
241
+ elif f.name == "payload":
242
+ columns["payload"] = [f"payload_{i}" for i in range(start, start + size)]
243
+ elif f.name == "pushed":
244
+ columns["pushed"] = [state.witness] * size
245
+
246
+ out.emit(pa.RecordBatch.from_pydict(columns, schema=params.output_schema))
247
+
248
+ state.current_index += size
249
+ state.remaining -= size
@@ -0,0 +1,273 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """make_series_* generators (count/range/step/csv/float)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Annotated, ClassVar
9
+
10
+ import pyarrow as pa
11
+ from vgi_rpc import ArrowSerializableDataclass
12
+ from vgi_rpc.rpc import OutputCollector
13
+
14
+ from vgi.arguments import Arg
15
+ from vgi.metadata import FunctionExample
16
+ from vgi.schema_utils import schema
17
+ from vgi.table_function import (
18
+ ProcessParams,
19
+ TableFunctionGenerator,
20
+ bind_fixed_schema,
21
+ init_single_worker,
22
+ )
23
+
24
+ # ============================================================================
25
+
26
+ MAKE_SERIES_SCHEMA = schema(value=pa.int64())
27
+
28
+
29
+ @dataclass(kw_only=True)
30
+ class MakeSeriesCountArgs:
31
+ """Arguments for MakeSeriesCountFunction."""
32
+
33
+ count: Annotated[int, Arg(0, doc="Number of values to generate", ge=0)]
34
+
35
+
36
+ @dataclass(kw_only=True)
37
+ class MakeSeriesRangeArgs:
38
+ """Arguments for MakeSeriesRangeFunction."""
39
+
40
+ start: Annotated[int, Arg(0, doc="Start value (inclusive)")]
41
+ stop: Annotated[int, Arg(1, doc="Stop value (exclusive)")]
42
+
43
+
44
+ @dataclass(kw_only=True)
45
+ class MakeSeriesStepArgs:
46
+ """Arguments for MakeSeriesStepFunction."""
47
+
48
+ start: Annotated[int, Arg(0, doc="Start value (inclusive)")]
49
+ stop: Annotated[int, Arg(1, doc="Stop value (exclusive)")]
50
+ step: Annotated[int, Arg(2, doc="Step between values", ge=1)]
51
+
52
+
53
+ @dataclass(kw_only=True)
54
+ class MakeSeriesState(ArrowSerializableDataclass):
55
+ """Mutable state for make_series functions."""
56
+
57
+ values: list[int]
58
+ offset: int = 0
59
+
60
+
61
+ def _make_series_emit(state: MakeSeriesState, out: OutputCollector) -> None:
62
+ """Shared process logic for all make_series overloads."""
63
+ if state.offset >= len(state.values):
64
+ out.finish()
65
+ return
66
+ batch_values = state.values[state.offset : state.offset + 1024]
67
+ out.emit(pa.RecordBatch.from_pydict({"value": batch_values}, schema=MAKE_SERIES_SCHEMA))
68
+ state.offset += len(batch_values)
69
+
70
+
71
+ @init_single_worker
72
+ @bind_fixed_schema
73
+ class MakeSeriesCountFunction(TableFunctionGenerator[MakeSeriesCountArgs, MakeSeriesState]):
74
+ """Generate a series of integers from 0 to count-1.
75
+
76
+ Example:
77
+ SELECT * FROM make_series(5)
78
+ Returns: 0, 1, 2, 3, 4
79
+
80
+ """
81
+
82
+ FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
83
+
84
+ class Meta:
85
+ """Function metadata."""
86
+
87
+ name = "make_series"
88
+ description = "Generate integers from 0 to count-1"
89
+ examples = [
90
+ FunctionExample(
91
+ sql="SELECT * FROM make_series(5)",
92
+ description="Generate 0..4",
93
+ ),
94
+ ]
95
+
96
+ @classmethod
97
+ def initial_state(cls, params: ProcessParams[MakeSeriesCountArgs]) -> MakeSeriesState:
98
+ """Build the full value list."""
99
+ return MakeSeriesState(values=list(range(params.args.count)))
100
+
101
+ @classmethod
102
+ def process(cls, params: ProcessParams[MakeSeriesCountArgs], state: MakeSeriesState, out: OutputCollector) -> None:
103
+ """Emit values in batches."""
104
+ _make_series_emit(state, out)
105
+
106
+
107
+ @init_single_worker
108
+ @bind_fixed_schema
109
+ class MakeSeriesRangeFunction(TableFunctionGenerator[MakeSeriesRangeArgs, MakeSeriesState]):
110
+ """Generate a series of integers from start to stop-1.
111
+
112
+ Example:
113
+ SELECT * FROM make_series(3, 7)
114
+ Returns: 3, 4, 5, 6
115
+
116
+ """
117
+
118
+ FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
119
+
120
+ class Meta:
121
+ """Function metadata."""
122
+
123
+ name = "make_series"
124
+ description = "Generate integers from start to stop-1"
125
+ examples = [
126
+ FunctionExample(
127
+ sql="SELECT * FROM make_series(3, 7)",
128
+ description="Generate 3..6",
129
+ ),
130
+ ]
131
+
132
+ @classmethod
133
+ def initial_state(cls, params: ProcessParams[MakeSeriesRangeArgs]) -> MakeSeriesState:
134
+ """Build the value list from start..stop."""
135
+ return MakeSeriesState(values=list(range(params.args.start, params.args.stop)))
136
+
137
+ @classmethod
138
+ def process(cls, params: ProcessParams[MakeSeriesRangeArgs], state: MakeSeriesState, out: OutputCollector) -> None:
139
+ """Emit values in batches."""
140
+ _make_series_emit(state, out)
141
+
142
+
143
+ @init_single_worker
144
+ @bind_fixed_schema
145
+ class MakeSeriesStepFunction(TableFunctionGenerator[MakeSeriesStepArgs, MakeSeriesState]):
146
+ """Generate a series of integers from start to stop-1 with step.
147
+
148
+ Example:
149
+ SELECT * FROM make_series(0, 10, 3)
150
+ Returns: 0, 3, 6, 9
151
+
152
+ """
153
+
154
+ FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
155
+
156
+ class Meta:
157
+ """Function metadata."""
158
+
159
+ name = "make_series"
160
+ description = "Generate integers from start to stop-1 with step"
161
+ examples = [
162
+ FunctionExample(
163
+ sql="SELECT * FROM make_series(0, 10, 3)",
164
+ description="Generate 0, 3, 6, 9",
165
+ ),
166
+ ]
167
+
168
+ @classmethod
169
+ def initial_state(cls, params: ProcessParams[MakeSeriesStepArgs]) -> MakeSeriesState:
170
+ """Build the value list with step."""
171
+ return MakeSeriesState(values=list(range(params.args.start, params.args.stop, params.args.step)))
172
+
173
+ @classmethod
174
+ def process(cls, params: ProcessParams[MakeSeriesStepArgs], state: MakeSeriesState, out: OutputCollector) -> None:
175
+ """Emit values in batches."""
176
+ _make_series_emit(state, out)
177
+
178
+
179
+ # ============================================================================
180
+
181
+
182
+ @dataclass(kw_only=True)
183
+ class MakeSeriesCsvArgs:
184
+ """Arguments for MakeSeriesCsvFunction."""
185
+
186
+ values: Annotated[str, Arg(0, doc="Comma-separated integers")]
187
+
188
+
189
+ @init_single_worker
190
+ @bind_fixed_schema
191
+ class MakeSeriesCsvFunction(TableFunctionGenerator[MakeSeriesCsvArgs, MakeSeriesState]):
192
+ """Parse a CSV string of integers into rows.
193
+
194
+ Example:
195
+ SELECT * FROM make_series('10,20,30')
196
+ Returns: 10, 20, 30
197
+
198
+ """
199
+
200
+ FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_SCHEMA
201
+
202
+ class Meta:
203
+ """Function metadata."""
204
+
205
+ name = "make_series"
206
+ description = "Parse comma-separated integers into rows"
207
+
208
+ @classmethod
209
+ def initial_state(cls, params: ProcessParams[MakeSeriesCsvArgs]) -> MakeSeriesState:
210
+ """Parse CSV string into value list."""
211
+ return MakeSeriesState(values=[int(x.strip()) for x in params.args.values.split(",")])
212
+
213
+ @classmethod
214
+ def process(cls, params: ProcessParams[MakeSeriesCsvArgs], state: MakeSeriesState, out: OutputCollector) -> None:
215
+ """Emit values in batches."""
216
+ _make_series_emit(state, out)
217
+
218
+
219
+ MAKE_SERIES_FLOAT_SCHEMA = schema(value=pa.float64())
220
+
221
+
222
+ @dataclass(kw_only=True)
223
+ class MakeSeriesFloatArgs:
224
+ """Arguments for MakeSeriesFloatFunction."""
225
+
226
+ step: Annotated[float, Arg(0, doc="Step size between values")]
227
+
228
+
229
+ @dataclass(kw_only=True)
230
+ class MakeSeriesFloatState(ArrowSerializableDataclass):
231
+ """State for float make_series."""
232
+
233
+ values: list[float] = field(default_factory=list)
234
+ offset: int = 0
235
+
236
+
237
+ @init_single_worker
238
+ @bind_fixed_schema
239
+ class MakeSeriesFloatFunction(TableFunctionGenerator[MakeSeriesFloatArgs, MakeSeriesFloatState]):
240
+ """Generate 10 float values: 0.0, step, 2*step, ..., 9*step.
241
+
242
+ Example:
243
+ SELECT * FROM make_series(0.5)
244
+ Returns: 0.0, 0.5, 1.0, ..., 4.5
245
+
246
+ """
247
+
248
+ FIXED_SCHEMA: ClassVar[pa.Schema] = MAKE_SERIES_FLOAT_SCHEMA
249
+
250
+ class Meta:
251
+ """Function metadata."""
252
+
253
+ name = "make_series"
254
+ description = "Generate 10 float values with given step size"
255
+
256
+ @classmethod
257
+ def initial_state(cls, params: ProcessParams[MakeSeriesFloatArgs]) -> MakeSeriesFloatState:
258
+ """Build float value list."""
259
+ return MakeSeriesFloatState(values=[i * params.args.step for i in range(10)])
260
+
261
+ @classmethod
262
+ def process(
263
+ cls, params: ProcessParams[MakeSeriesFloatArgs], state: MakeSeriesFloatState, out: OutputCollector
264
+ ) -> None:
265
+ """Emit values in batches."""
266
+ if state.offset >= len(state.values):
267
+ out.finish()
268
+ return
269
+ batch_size = 1024
270
+ end = min(state.offset + batch_size, len(state.values))
271
+ chunk = state.values[state.offset : end]
272
+ state.offset = end
273
+ out.emit(pa.RecordBatch.from_pydict({"value": chunk}, schema=MAKE_SERIES_FLOAT_SCHEMA))