vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,499 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Misc fixtures: GeneratorException, LoggingGenerator, ProjectedData, OrderEcho, SampleEcho."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Annotated, ClassVar
9
+
10
+ import pyarrow as pa
11
+ from vgi_rpc import ArrowSerializableDataclass, Transient
12
+ from vgi_rpc.log import Level
13
+ from vgi_rpc.rpc import OutputCollector
14
+
15
+ from vgi._test_fixtures.table._common import (
16
+ CountdownState,
17
+ _cardinality_from_count,
18
+ )
19
+ from vgi.arguments import Arg
20
+ from vgi.metadata import FunctionExample
21
+ from vgi.schema_utils import schema
22
+ from vgi.table_function import (
23
+ ProcessParams,
24
+ TableFunctionGenerator,
25
+ bind_fixed_schema,
26
+ init_single_worker,
27
+ )
28
+
29
+
30
+ @dataclass(slots=True, frozen=True)
31
+ class GeneratorExceptionFunctionArguments:
32
+ """Arguments for GeneratorExceptionFunction."""
33
+
34
+ fail_after: Annotated[int, Arg(0, doc="Number of batches before failure", ge=0)]
35
+
36
+
37
+ @dataclass(kw_only=True)
38
+ class GeneratorExceptionState(ArrowSerializableDataclass):
39
+ """Mutable state for GeneratorExceptionFunction."""
40
+
41
+ batch_count: int = 0
42
+
43
+
44
+ @init_single_worker
45
+ @bind_fixed_schema
46
+ class GeneratorExceptionFunction(TableFunctionGenerator[GeneratorExceptionFunctionArguments, GeneratorExceptionState]):
47
+ """Function that raises an exception after generating some output.
48
+
49
+ USE CASE
50
+ --------
51
+ Testing exception handling in the generator protocol.
52
+
53
+ SCHEMA
54
+ ------
55
+ Output: {"n": int64}
56
+
57
+ """
58
+
59
+ class Meta:
60
+ """Metadata for GeneratorExceptionFunction."""
61
+
62
+ name = "generator_exception"
63
+ description = "Raises an exception after N batches for testing"
64
+ categories = ["testing"]
65
+ tags = {"category": "testing", "type": "error-handling"}
66
+
67
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
68
+
69
+ @classmethod
70
+ def initial_state(cls, params: ProcessParams[GeneratorExceptionFunctionArguments]) -> GeneratorExceptionState:
71
+ """Create initial state."""
72
+ return GeneratorExceptionState()
73
+
74
+ @classmethod
75
+ def process(
76
+ cls,
77
+ params: ProcessParams[GeneratorExceptionFunctionArguments],
78
+ state: GeneratorExceptionState,
79
+ out: OutputCollector,
80
+ ) -> None:
81
+ """Generate batches then raise an exception."""
82
+ if state.batch_count >= params.args.fail_after:
83
+ raise ValueError(f"Intentional failure after {params.args.fail_after} batches")
84
+
85
+ out.emit(pa.RecordBatch.from_pydict({"n": [state.batch_count]}, schema=params.output_schema))
86
+ state.batch_count += 1
87
+
88
+
89
+ @dataclass(slots=True, frozen=True)
90
+ class LoggingGeneratorFunctionArguments:
91
+ """Arguments for LoggingGeneratorFunction."""
92
+
93
+ count: Annotated[int, Arg(0, doc="Number of values to generate", ge=0)]
94
+
95
+
96
+ @dataclass(kw_only=True)
97
+ class LoggingGeneratorState(ArrowSerializableDataclass):
98
+ """Mutable state for LoggingGeneratorFunction."""
99
+
100
+ index: int = 0
101
+
102
+
103
+ @init_single_worker
104
+ @bind_fixed_schema
105
+ class LoggingGeneratorFunction(TableFunctionGenerator[LoggingGeneratorFunctionArguments, LoggingGeneratorState]):
106
+ """Function that emits log messages during generation.
107
+
108
+ USE CASE
109
+ --------
110
+ Testing log message handling in the generator protocol.
111
+
112
+ SCHEMA
113
+ ------
114
+ Output: {"n": int64}
115
+
116
+ """
117
+
118
+ class Meta:
119
+ """Metadata for LoggingGeneratorFunction."""
120
+
121
+ name = "logging_generator"
122
+ description = "Emits log messages during generation"
123
+ categories = ["testing"]
124
+
125
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
126
+
127
+ @classmethod
128
+ def initial_state(cls, params: ProcessParams[LoggingGeneratorFunctionArguments]) -> LoggingGeneratorState:
129
+ """Create initial state."""
130
+ return LoggingGeneratorState()
131
+
132
+ @classmethod
133
+ def process(
134
+ cls,
135
+ params: ProcessParams[LoggingGeneratorFunctionArguments],
136
+ state: LoggingGeneratorState,
137
+ out: OutputCollector,
138
+ ) -> None:
139
+ """Generate values with logging."""
140
+ if state.index == 0:
141
+ out.client_log(Level.INFO, f"Starting generation of {params.args.count} values")
142
+
143
+ if state.index >= params.args.count:
144
+ out.client_log(Level.INFO, "Generation complete")
145
+ out.finish()
146
+ return
147
+
148
+ out.emit(pa.RecordBatch.from_pydict({"n": [state.index]}, schema=params.output_schema))
149
+ state.index += 1
150
+
151
+
152
+ @dataclass(slots=True, frozen=True)
153
+ class ProjectedDataFunctionArguments:
154
+ """Arguments for ProjectedDataFunction."""
155
+
156
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
157
+
158
+
159
+ @init_single_worker
160
+ @bind_fixed_schema
161
+ @_cardinality_from_count
162
+ class ProjectedDataFunction(TableFunctionGenerator[ProjectedDataFunctionArguments, CountdownState]):
163
+ """Generates data with 4 columns, supporting projection pushdown.
164
+
165
+ USE CASE
166
+ --------
167
+ Demonstrates projection pushdown where the function only computes
168
+ columns that are actually requested. This is useful for expensive
169
+ column computations that can be skipped if the column isn't needed.
170
+
171
+ SCHEMA
172
+ ------
173
+ Full output: {"id": int64, "name": string, "value": float64, "extra": int64}
174
+ With projection, only the projected columns are included.
175
+
176
+ Example:
177
+ -------
178
+ SELECT id, value FROM projected_data(10) -- Only computes id and value
179
+ Returns: 10 rows with id and value columns only
180
+
181
+ """
182
+
183
+ class Meta:
184
+ """Metadata for ProjectedDataFunction."""
185
+
186
+ name = "projected_data"
187
+ description = "Generates data with 4 columns, supporting projection pushdown"
188
+ categories = ["generator", "utility"]
189
+ projection_pushdown = True
190
+ examples = [
191
+ FunctionExample(
192
+ sql="SELECT * FROM projected_data(10)",
193
+ description="Generate 10 rows with all 4 columns",
194
+ ),
195
+ FunctionExample(
196
+ sql="SELECT id, value FROM projected_data(10)",
197
+ description="Generate 10 rows with only id and value columns",
198
+ ),
199
+ ]
200
+
201
+ # Full schema with all 4 columns
202
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(
203
+ {
204
+ "id": pa.int64(),
205
+ "name": pa.string(),
206
+ "value": pa.float64(),
207
+ "extra": pa.int64(),
208
+ }
209
+ )
210
+
211
+ BATCH_SIZE: ClassVar[int] = 1000
212
+
213
+ @classmethod
214
+ def _get_projected_column_indices(cls, projection_ids: list[int] | None) -> list[int]:
215
+ """Get the column indices to generate.
216
+
217
+ Returns indices from projection_ids if set, otherwise all columns.
218
+ """
219
+ if projection_ids is not None:
220
+ return projection_ids
221
+ return list(range(len(cls.FIXED_SCHEMA)))
222
+
223
+ @classmethod
224
+ def initial_state(cls, params: ProcessParams[ProjectedDataFunctionArguments]) -> CountdownState:
225
+ """Create initial state with remaining count."""
226
+ return CountdownState(remaining=params.args.count)
227
+
228
+ @classmethod
229
+ def process(
230
+ cls,
231
+ params: ProcessParams[ProjectedDataFunctionArguments],
232
+ state: CountdownState,
233
+ out: OutputCollector,
234
+ ) -> None:
235
+ """Generate data for only the projected columns."""
236
+ if state.remaining <= 0:
237
+ out.finish()
238
+ return
239
+
240
+ assert params.init_call is not None
241
+ projected_indices = cls._get_projected_column_indices(params.init_call.projection_ids)
242
+ batch_size = min(state.remaining, cls.BATCH_SIZE)
243
+
244
+ # Only compute columns that are projected
245
+ columns: dict[str, list[int] | list[str] | list[float]] = {}
246
+
247
+ for idx in projected_indices:
248
+ f = cls.FIXED_SCHEMA.field(idx)
249
+ if f.name == "id":
250
+ columns["id"] = list(range(state.current_index, state.current_index + batch_size))
251
+ elif f.name == "name":
252
+ columns["name"] = [f"item_{i}" for i in range(state.current_index, state.current_index + batch_size)]
253
+ elif f.name == "value":
254
+ columns["value"] = [
255
+ float(i) * 1.5 for i in range(state.current_index, state.current_index + batch_size)
256
+ ]
257
+ elif f.name == "extra":
258
+ columns["extra"] = [i * i for i in range(state.current_index, state.current_index + batch_size)]
259
+
260
+ out.emit(pa.RecordBatch.from_pydict(columns, schema=params.output_schema))
261
+
262
+ state.current_index += batch_size
263
+ state.remaining -= batch_size
264
+
265
+
266
+ # ============================================================================
267
+
268
+
269
+ @dataclass(slots=True, frozen=True)
270
+ class _OrderEchoArgs:
271
+ """Arguments for OrderEchoFunction."""
272
+
273
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
274
+ batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
275
+
276
+
277
+ @dataclass(kw_only=True)
278
+ class _OrderEchoState(ArrowSerializableDataclass):
279
+ """Mutable state for OrderEchoFunction."""
280
+
281
+ remaining: int
282
+ current_index: int = 0
283
+ order_column: Annotated[str, Transient()] = "(none)"
284
+ order_direction: Annotated[str, Transient()] = "(none)"
285
+ order_null_order: Annotated[str, Transient()] = "(none)"
286
+ order_limit: Annotated[int, Transient()] = -1
287
+
288
+
289
+ _ORDER_ECHO_SCHEMA = schema(
290
+ {
291
+ "n": pa.int64(),
292
+ "s": pa.utf8(),
293
+ "order_column": pa.utf8(),
294
+ "order_direction": pa.utf8(),
295
+ "order_null_order": pa.utf8(),
296
+ "order_limit": pa.int64(),
297
+ }
298
+ )
299
+
300
+
301
+ @init_single_worker
302
+ @bind_fixed_schema
303
+ @_cardinality_from_count
304
+ class OrderEchoFunction(TableFunctionGenerator[_OrderEchoArgs, _OrderEchoState]):
305
+ """Echoes ORDER BY + LIMIT pushdown hints in output columns.
306
+
307
+ USE CASE
308
+ --------
309
+ Verify that DuckDB's RowGroupPruner optimizer pushes ORDER BY + LIMIT
310
+ hints to VGI table functions via the ``set_scan_order`` callback.
311
+ The order_* columns show what hints were received. The function does
312
+ NOT apply the order/limit itself -- DuckDB's operators handle that.
313
+
314
+ SCHEMA
315
+ ------
316
+ Output: {"n": int64, "s": string, "order_column": string,
317
+ "order_direction": string, "order_null_order": string,
318
+ "order_limit": int64}
319
+
320
+ """
321
+
322
+ class Meta:
323
+ """Metadata for OrderEchoFunction."""
324
+
325
+ name = "order_echo"
326
+ description = "Echoes ORDER BY + LIMIT pushdown hints in output"
327
+ categories = ["generator", "diagnostic"]
328
+ filter_pushdown = True
329
+ auto_apply_filters = True
330
+ projection_pushdown = True
331
+ examples = [
332
+ FunctionExample(
333
+ sql="SELECT * FROM order_echo(100) ORDER BY n LIMIT 5",
334
+ description="See which ORDER BY hint was pushed down",
335
+ ),
336
+ ]
337
+
338
+ FIXED_SCHEMA: ClassVar[pa.Schema] = _ORDER_ECHO_SCHEMA
339
+
340
+ @classmethod
341
+ def initial_state(cls, params: ProcessParams[_OrderEchoArgs]) -> _OrderEchoState:
342
+ """Create initial state with cached order hint values."""
343
+ assert params.init_call is not None
344
+ init = params.init_call
345
+ return _OrderEchoState(
346
+ remaining=params.args.count,
347
+ order_column=init.order_by_column_name or "(none)",
348
+ order_direction=init.order_by_direction.name if init.order_by_direction else "(none)",
349
+ order_null_order=init.order_by_null_order.name if init.order_by_null_order else "(none)",
350
+ order_limit=init.order_by_limit if init.order_by_limit is not None else -1,
351
+ )
352
+
353
+ @classmethod
354
+ def process(
355
+ cls,
356
+ params: ProcessParams[_OrderEchoArgs],
357
+ state: _OrderEchoState,
358
+ out: OutputCollector,
359
+ ) -> None:
360
+ """Generate rows echoing order pushdown hints."""
361
+ if state.remaining <= 0:
362
+ out.finish()
363
+ return
364
+
365
+ size = min(state.remaining, params.args.batch_size)
366
+ start = state.current_index
367
+
368
+ n_values = list(range(start, start + size))
369
+ s_values = [f"row_{i}" for i in n_values]
370
+
371
+ out.emit(
372
+ pa.RecordBatch.from_pydict(
373
+ {
374
+ "n": n_values,
375
+ "s": s_values,
376
+ "order_column": [state.order_column] * size,
377
+ "order_direction": [state.order_direction] * size,
378
+ "order_null_order": [state.order_null_order] * size,
379
+ "order_limit": [state.order_limit] * size,
380
+ },
381
+ schema=params.output_schema,
382
+ )
383
+ )
384
+
385
+ state.current_index += size
386
+ state.remaining -= size
387
+
388
+
389
+ # ============================================================================
390
+
391
+
392
+ @dataclass(slots=True, frozen=True)
393
+ class _SampleEchoArgs:
394
+ """Arguments for SampleEchoFunction."""
395
+
396
+ count: Annotated[int, Arg(0, doc="Number of rows to generate", ge=0)]
397
+ batch_size: Annotated[int, Arg("batch_size", default=2048, doc="Batch size for output", ge=1)]
398
+
399
+
400
+ @dataclass(kw_only=True)
401
+ class _SampleEchoState(ArrowSerializableDataclass):
402
+ """Mutable state for SampleEchoFunction."""
403
+
404
+ remaining: int
405
+ current_index: int = 0
406
+ sample_percentage: Annotated[float, Transient()] = -1.0
407
+ sample_seed: Annotated[int, Transient()] = -1
408
+
409
+
410
+ _SAMPLE_ECHO_SCHEMA = schema(
411
+ {
412
+ "n": pa.int64(),
413
+ "s": pa.utf8(),
414
+ "sample_percentage": pa.float64(),
415
+ "sample_seed": pa.int64(),
416
+ }
417
+ )
418
+
419
+
420
+ @init_single_worker
421
+ @bind_fixed_schema
422
+ @_cardinality_from_count
423
+ class SampleEchoFunction(TableFunctionGenerator[_SampleEchoArgs, _SampleEchoState]):
424
+ """Echoes TABLESAMPLE pushdown hints in output columns.
425
+
426
+ USE CASE
427
+ --------
428
+ Verify that DuckDB's SamplingPushdown optimizer pushes TABLESAMPLE SYSTEM
429
+ hints to VGI table functions. The sample_* columns show what hints were
430
+ received. The function does NOT apply sampling itself -- it returns all
431
+ rows so tests can verify the echo values.
432
+
433
+ SCHEMA
434
+ ------
435
+ Output: {"n": int64, "s": string, "sample_percentage": float64,
436
+ "sample_seed": int64}
437
+
438
+ """
439
+
440
+ class Meta:
441
+ """Metadata for SampleEchoFunction."""
442
+
443
+ name = "sample_echo"
444
+ description = "Echoes TABLESAMPLE pushdown hints in output"
445
+ categories = ["generator", "diagnostic"]
446
+ projection_pushdown = True
447
+ sampling_pushdown = True
448
+ examples = [
449
+ FunctionExample(
450
+ sql="SELECT * FROM sample_echo(100) TABLESAMPLE SYSTEM(10%)",
451
+ description="See which TABLESAMPLE hint was pushed down",
452
+ ),
453
+ ]
454
+
455
+ FIXED_SCHEMA: ClassVar[pa.Schema] = _SAMPLE_ECHO_SCHEMA
456
+
457
+ @classmethod
458
+ def initial_state(cls, params: ProcessParams[_SampleEchoArgs]) -> _SampleEchoState:
459
+ """Create initial state with cached sample hint values."""
460
+ assert params.init_call is not None
461
+ init = params.init_call
462
+ return _SampleEchoState(
463
+ remaining=params.args.count,
464
+ sample_percentage=init.tablesample_percentage if init.tablesample_percentage is not None else -1.0,
465
+ sample_seed=init.tablesample_seed if init.tablesample_seed is not None else -1,
466
+ )
467
+
468
+ @classmethod
469
+ def process(
470
+ cls,
471
+ params: ProcessParams[_SampleEchoArgs],
472
+ state: _SampleEchoState,
473
+ out: OutputCollector,
474
+ ) -> None:
475
+ """Generate rows echoing sample pushdown hints."""
476
+ if state.remaining <= 0:
477
+ out.finish()
478
+ return
479
+
480
+ size = min(state.remaining, params.args.batch_size)
481
+ start = state.current_index
482
+
483
+ n_values = list(range(start, start + size))
484
+ s_values = [f"row_{i}" for i in n_values]
485
+
486
+ out.emit(
487
+ pa.RecordBatch.from_pydict(
488
+ {
489
+ "n": n_values,
490
+ "s": s_values,
491
+ "sample_percentage": [state.sample_percentage] * size,
492
+ "sample_seed": [state.sample_seed] * size,
493
+ },
494
+ schema=params.output_schema,
495
+ )
496
+ )
497
+
498
+ state.current_index += size
499
+ state.remaining -= size
@@ -0,0 +1,164 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Multi-worker partitioned sequence fixtures, one per ``OrderPreservation`` mode.
4
+
5
+ These three fixtures are clones of :class:`PartitionedSequenceFunction` (see
6
+ ``sequence.py``); the only difference is ``Meta.preserves_order``. They exist
7
+ so SQL integration tests can verify that DuckDB's planner honors each mode
8
+ end-to-end:
9
+
10
+ * ``partitioned_preserves_order`` — ``PRESERVES_ORDER`` → DuckDB ``INSERTION_ORDER``
11
+ * ``partitioned_no_order_guarantee`` — ``NO_ORDER_GUARANTEE`` → DuckDB ``NO_ORDER``
12
+ * ``partitioned_fixed_order`` — ``FIXED_ORDER`` → DuckDB ``FIXED_ORDER``
13
+
14
+ DuckDB serializes the pipeline (single worker) for ``FIXED_ORDER`` and
15
+ parallelizes for the other two. Tests grep ``conn=`` from ``duckdb_logs()``
16
+ to count distinct workers — the same pattern used by
17
+ ``partitioned_sequence.test``.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import struct
23
+ from dataclasses import dataclass
24
+ from typing import Annotated, ClassVar
25
+
26
+ import pyarrow as pa
27
+ from vgi_rpc import ArrowSerializableDataclass
28
+ from vgi_rpc.rpc import OutputCollector
29
+
30
+ from vgi._test_fixtures.table._common import _cardinality_from_count
31
+ from vgi.arguments import Arg
32
+ from vgi.invocation import GlobalInitResponse
33
+ from vgi.metadata import FunctionExample, OrderPreservation
34
+ from vgi.schema_utils import schema
35
+ from vgi.table_function import (
36
+ InitParams,
37
+ ProcessParams,
38
+ TableFunctionGenerator,
39
+ bind_fixed_schema,
40
+ )
41
+
42
+
43
+ @dataclass(slots=True, frozen=True)
44
+ class _OrderModeArgs:
45
+ """Arguments for the order-preservation-mode fixtures."""
46
+
47
+ count: Annotated[int, Arg(0, doc="Total number of integers to generate", ge=0)]
48
+
49
+
50
+ @dataclass(kw_only=True)
51
+ class _OrderModeState(ArrowSerializableDataclass):
52
+ """Mutable per-worker state for the order-preservation-mode fixtures."""
53
+
54
+ current_start: int | None = None
55
+ current_end: int | None = None
56
+ current_idx: int = 0
57
+
58
+
59
+ class _BasePartitionedOrderMode(TableFunctionGenerator[_OrderModeArgs, _OrderModeState]):
60
+ """Shared multi-worker work-queue logic. Subclasses pin ``Meta``.
61
+
62
+ The chunk/batch sizing matches ``PartitionedSequenceFunction``: 1k chunks,
63
+ 1k-row output batches. The primary worker enqueues all chunks during
64
+ ``on_init``; every worker (including the primary) pulls chunks atomically
65
+ via ``params.storage.queue_pop``.
66
+ """
67
+
68
+ CHUNK_SIZE: ClassVar[int] = 1000
69
+ BATCH_SIZE: ClassVar[int] = 1000
70
+
71
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
72
+
73
+ @classmethod
74
+ def on_init(cls, params: InitParams[_OrderModeArgs]) -> GlobalInitResponse:
75
+ work_items: list[bytes] = []
76
+ for start_idx in range(0, params.args.count, cls.CHUNK_SIZE):
77
+ end_idx = min(start_idx + cls.CHUNK_SIZE, params.args.count)
78
+ work_items.append(struct.pack(">QQ", start_idx, end_idx))
79
+ params.storage.queue_push(work_items)
80
+ return GlobalInitResponse()
81
+
82
+ @classmethod
83
+ def initial_state(cls, params: ProcessParams[_OrderModeArgs]) -> _OrderModeState:
84
+ return _OrderModeState()
85
+
86
+ @classmethod
87
+ def process(
88
+ cls,
89
+ params: ProcessParams[_OrderModeArgs],
90
+ state: _OrderModeState,
91
+ out: OutputCollector,
92
+ ) -> None:
93
+ if state.current_start is None or state.current_idx >= (state.current_end or 0):
94
+ work_data = params.storage.queue_pop()
95
+ if work_data is None:
96
+ out.finish()
97
+ return
98
+ state.current_start, state.current_end = struct.unpack(">QQ", work_data)
99
+ assert state.current_start is not None
100
+ state.current_idx = state.current_start
101
+
102
+ batch_end_idx = min(state.current_idx + cls.BATCH_SIZE, state.current_end or 0)
103
+ values = list(range(state.current_idx, batch_end_idx))
104
+ out.emit(pa.RecordBatch.from_pydict({"n": values}, schema=params.output_schema))
105
+ state.current_idx = batch_end_idx
106
+
107
+
108
+ @bind_fixed_schema
109
+ @_cardinality_from_count
110
+ class PartitionedPreservesOrderFunction(_BasePartitionedOrderMode):
111
+ """Multi-worker partitioned sequence — ``PRESERVES_ORDER``."""
112
+
113
+ class Meta:
114
+ name = "partitioned_preserves_order"
115
+ description = (
116
+ "Multi-worker partitioned sequence; preserves_order=PRESERVES_ORDER (maps to DuckDB INSERTION_ORDER)."
117
+ )
118
+ categories = ["generator", "utility"]
119
+ preserves_order = OrderPreservation.PRESERVES_ORDER
120
+ examples = [
121
+ FunctionExample(
122
+ sql="SELECT * FROM partitioned_preserves_order(100)",
123
+ description="Generate 0-99 in parallel; preserves_order=PRESERVES_ORDER",
124
+ ),
125
+ ]
126
+
127
+
128
+ @bind_fixed_schema
129
+ @_cardinality_from_count
130
+ class PartitionedNoOrderGuaranteeFunction(_BasePartitionedOrderMode):
131
+ """Multi-worker partitioned sequence — ``NO_ORDER_GUARANTEE``."""
132
+
133
+ class Meta:
134
+ name = "partitioned_no_order_guarantee"
135
+ description = "Multi-worker partitioned sequence; preserves_order=NO_ORDER_GUARANTEE (maps to DuckDB NO_ORDER)."
136
+ categories = ["generator", "utility"]
137
+ preserves_order = OrderPreservation.NO_ORDER_GUARANTEE
138
+ examples = [
139
+ FunctionExample(
140
+ sql="SELECT * FROM partitioned_no_order_guarantee(100)",
141
+ description="Generate 0-99 in parallel; preserves_order=NO_ORDER_GUARANTEE",
142
+ ),
143
+ ]
144
+
145
+
146
+ @bind_fixed_schema
147
+ @_cardinality_from_count
148
+ class PartitionedFixedOrderFunction(_BasePartitionedOrderMode):
149
+ """Multi-worker partitioned sequence — ``FIXED_ORDER`` (DuckDB serializes)."""
150
+
151
+ class Meta:
152
+ name = "partitioned_fixed_order"
153
+ description = (
154
+ "Multi-worker partitioned sequence; preserves_order=FIXED_ORDER "
155
+ "(DuckDB serializes the pipeline so a single worker produces all rows)."
156
+ )
157
+ categories = ["generator", "utility"]
158
+ preserves_order = OrderPreservation.FIXED_ORDER
159
+ examples = [
160
+ FunctionExample(
161
+ sql="SELECT * FROM partitioned_fixed_order(100)",
162
+ description="Generate 0-99; FIXED_ORDER forces single-worker execution",
163
+ ),
164
+ ]