vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,200 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Deliberately-broken batch_index fixtures for contract-enforcement testing.
4
+
5
+ These fixtures violate the ``Meta.supports_batch_index = True`` contract in
6
+ three different ways so SQL integration tests can assert that the C++
7
+ extension's contract checks (in ``InstallBatch``) and the worker library's
8
+ ``_merge_batch_index`` validator (in ``vgi/protocol.py``) raise typed
9
+ errors. None of these is intended for production use.
10
+
11
+ The shape of the contract is documented at
12
+ ``vgi-python/vgi/_test_fixtures/table/batch_index.py`` and
13
+ ``vgi/src/vgi_table_function_impl.cpp::InstallBatch``.
14
+
15
+ * ``broken_missing_batch_index_tag`` — emits a data batch with NO
16
+ ``vgi_batch_index`` metadata, bypassing the framework wrapper's
17
+ validation by reaching into the inner collector directly. The C++
18
+ extension's ``InstallBatch`` raises IOException "without
19
+ vgi_batch_index metadata" when the function opts in.
20
+
21
+ * ``broken_non_monotone_batch_index`` — emits batches with strictly
22
+ decreasing partition_ids on the same stream. The C++ extension's
23
+ ``InstallBatch`` raises IOException "decreased from N to M on the
24
+ same stream" — DuckDB's per-thread monotonicity assertion is debug-
25
+ only, so VGI must enforce in release builds.
26
+
27
+ * ``broken_batch_index_overflow`` — emits a partition_id at 2^60, well
28
+ above DuckDB's ``BATCH_INCREMENT = 10^13`` per-pipeline cap. The
29
+ C++ extension's ``InstallBatch`` raises IOException "exceeds
30
+ DuckDB's per-pipeline cap" — without this, the worker would surface
31
+ an opaque DuckDB InternalException from the pipeline executor.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ from dataclasses import dataclass
37
+ from typing import Annotated, ClassVar, cast
38
+
39
+ import pyarrow as pa
40
+ from vgi_rpc import ArrowSerializableDataclass
41
+ from vgi_rpc.rpc import OutputCollector
42
+
43
+ from vgi._test_fixtures.table._common import _cardinality_from_count
44
+ from vgi.arguments import Arg
45
+ from vgi.metadata import OrderPreservation
46
+ from vgi.protocol import VgiOutputCollector
47
+ from vgi.schema_utils import schema
48
+ from vgi.table_function import (
49
+ ProcessParams,
50
+ TableFunctionGenerator,
51
+ bind_fixed_schema,
52
+ )
53
+
54
+
55
+ @dataclass(slots=True, frozen=True)
56
+ class _BrokenArgs:
57
+ count: Annotated[int, Arg(0, doc="Total rows to attempt to generate", ge=1)]
58
+
59
+
60
+ @dataclass(kw_only=True)
61
+ class _BrokenState(ArrowSerializableDataclass):
62
+ emitted: bool = False
63
+
64
+
65
+ @bind_fixed_schema
66
+ @_cardinality_from_count
67
+ class MissingBatchIndexTagFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
68
+ """Opts in to batch_index but emits without a tag. C++ raises."""
69
+
70
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
71
+
72
+ class Meta:
73
+ name = "broken_missing_batch_index_tag"
74
+ description = (
75
+ "DELIBERATELY BROKEN: declares supports_batch_index=True but "
76
+ "emits a data batch with no vgi_batch_index metadata. C++ "
77
+ "extension's contract check raises."
78
+ )
79
+ categories = ["testing", "broken"]
80
+ preserves_order = OrderPreservation.FIXED_ORDER
81
+ supports_batch_index = True
82
+
83
+ @classmethod
84
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
85
+ return _BrokenState()
86
+
87
+ @classmethod
88
+ def process(
89
+ cls,
90
+ params: ProcessParams[_BrokenArgs],
91
+ state: _BrokenState,
92
+ out: OutputCollector,
93
+ ) -> None:
94
+ if state.emitted:
95
+ out.finish()
96
+ return
97
+ batch = pa.RecordBatch.from_pydict(
98
+ {"n": list(range(params.args.count))},
99
+ schema=params.output_schema,
100
+ )
101
+ # Reach into the wrapper stack and call the innermost inner directly.
102
+ # This is what makes this fixture "broken": the framework's
103
+ # _merge_batch_index validator never runs, so a data batch with no
104
+ # vgi_batch_index metadata reaches the C++ extension. The walk also
105
+ # exercises the contract that the wire format (not the wrapper
106
+ # layer) is the authoritative check — same defense the worker
107
+ # library provides for stand-alone OutputCollector consumers.
108
+ inner = out
109
+ while hasattr(inner, "_inner"):
110
+ inner = inner._inner
111
+ inner.emit(batch)
112
+ state.emitted = True
113
+
114
+
115
+ @bind_fixed_schema
116
+ @_cardinality_from_count
117
+ class NonMonotoneBatchIndexFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
118
+ """Emits two batches with strictly decreasing partition_id. C++ raises."""
119
+
120
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
121
+
122
+ class Meta:
123
+ name = "broken_non_monotone_batch_index"
124
+ description = (
125
+ "DELIBERATELY BROKEN: emits batches with strictly decreasing "
126
+ "partition_id on one stream. C++ extension's monotonicity check "
127
+ "raises (DuckDB's debug-only assertion is not relied upon)."
128
+ )
129
+ categories = ["testing", "broken"]
130
+ preserves_order = OrderPreservation.FIXED_ORDER
131
+ supports_batch_index = True
132
+
133
+ @classmethod
134
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
135
+ return _BrokenState()
136
+
137
+ # Reuse `emitted` to track which of the two batches we've sent.
138
+ @classmethod
139
+ def process(
140
+ cls,
141
+ params: ProcessParams[_BrokenArgs],
142
+ state: _BrokenState,
143
+ out: OutputCollector,
144
+ ) -> None:
145
+ if state.emitted:
146
+ # Second call: emit with a LOWER batch_index than the first.
147
+ batch = pa.RecordBatch.from_pydict(
148
+ {"n": [42]},
149
+ schema=params.output_schema,
150
+ )
151
+ cast(VgiOutputCollector, out).emit(batch, batch_index=3)
152
+ out.finish()
153
+ return
154
+ batch = pa.RecordBatch.from_pydict(
155
+ {"n": list(range(params.args.count))},
156
+ schema=params.output_schema,
157
+ )
158
+ cast(VgiOutputCollector, out).emit(batch, batch_index=10)
159
+ state.emitted = True
160
+
161
+
162
+ @bind_fixed_schema
163
+ @_cardinality_from_count
164
+ class BatchIndexOverflowFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
165
+ """Emits a partition_id above the C++ cap. C++ raises."""
166
+
167
+ FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
168
+
169
+ class Meta:
170
+ name = "broken_batch_index_overflow"
171
+ description = (
172
+ "DELIBERATELY BROKEN: emits a batch tagged with a partition_id "
173
+ "well above DuckDB's BATCH_INCREMENT=10^13 per-pipeline cap. "
174
+ "C++ extension rejects at parse time."
175
+ )
176
+ categories = ["testing", "broken"]
177
+ preserves_order = OrderPreservation.FIXED_ORDER
178
+ supports_batch_index = True
179
+
180
+ @classmethod
181
+ def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
182
+ return _BrokenState()
183
+
184
+ @classmethod
185
+ def process(
186
+ cls,
187
+ params: ProcessParams[_BrokenArgs],
188
+ state: _BrokenState,
189
+ out: OutputCollector,
190
+ ) -> None:
191
+ if state.emitted:
192
+ out.finish()
193
+ return
194
+ batch = pa.RecordBatch.from_pydict(
195
+ {"n": list(range(params.args.count))},
196
+ schema=params.output_schema,
197
+ )
198
+ # 2^60 — far above the 10^13 cap.
199
+ cast(VgiOutputCollector, out).emit(batch, batch_index=1 << 60)
200
+ state.emitted = True
@@ -0,0 +1,162 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Static catalog scan functions (colors, departments, employees, products, projects)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ import pyarrow as pa
10
+ from vgi_rpc.rpc import OutputCollector
11
+
12
+ from vgi._test_fixtures.table._common import _EmptyArgs, _OneShotState
13
+ from vgi.invocation import BindResponse
14
+ from vgi.table_function import (
15
+ BindParams,
16
+ ProcessParams,
17
+ TableFunctionGenerator,
18
+ init_single_worker,
19
+ )
20
+
21
+
22
+ def _static_scan_function(
23
+ func_name: str,
24
+ func_description: str,
25
+ output_schema: pa.Schema,
26
+ data: dict[str, list[Any]],
27
+ ) -> type[TableFunctionGenerator[_EmptyArgs, _OneShotState]]:
28
+ """Create a table function that returns static data in one batch.
29
+
30
+ This factory eliminates boilerplate for simple scan functions that
31
+ return a fixed dataset. Each generated class is decorated with
32
+ ``@init_single_worker`` and has a unique ``Meta.name``.
33
+ """
34
+
35
+ @init_single_worker
36
+ class StaticScanFunction(TableFunctionGenerator[_EmptyArgs, _OneShotState]):
37
+ """Returns static data."""
38
+
39
+ class Meta:
40
+ """Function metadata."""
41
+
42
+ name = func_name
43
+ description = func_description
44
+
45
+ @classmethod
46
+ def on_bind(cls, params: BindParams[_EmptyArgs]) -> BindResponse:
47
+ """Return output schema."""
48
+ return BindResponse(output_schema=output_schema)
49
+
50
+ @classmethod
51
+ def initial_state(cls, params: ProcessParams[_EmptyArgs]) -> _OneShotState:
52
+ """Create initial state."""
53
+ return _OneShotState()
54
+
55
+ @classmethod
56
+ def process(
57
+ cls,
58
+ params: ProcessParams[_EmptyArgs],
59
+ state: _OneShotState,
60
+ out: OutputCollector,
61
+ ) -> None:
62
+ """Emit data."""
63
+ if state.done:
64
+ out.finish()
65
+ return
66
+ state.done = True
67
+ out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
68
+
69
+ StaticScanFunction.__name__ = func_name.title().replace("_", "") + "Function"
70
+ StaticScanFunction.__qualname__ = StaticScanFunction.__name__
71
+
72
+ return StaticScanFunction
73
+
74
+
75
+ DepartmentsScanFunction = _static_scan_function(
76
+ func_name="departments_scan",
77
+ func_description="Scan departments table",
78
+ output_schema=pa.schema(
79
+ [ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
80
+ pa.field("id", pa.int64()),
81
+ pa.field("name", pa.string()),
82
+ pa.field("budget", pa.float64()),
83
+ ]
84
+ ),
85
+ data={
86
+ "id": [1, 2, 3],
87
+ "name": ["Engineering", "Sales", "HR"],
88
+ "budget": [500000.0, 300000.0, 200000.0],
89
+ },
90
+ )
91
+
92
+ EmployeesScanFunction = _static_scan_function(
93
+ func_name="employees_scan",
94
+ func_description="Scan employees table",
95
+ output_schema=pa.schema(
96
+ [ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
97
+ pa.field("id", pa.int64()),
98
+ pa.field("name", pa.string()),
99
+ pa.field("email", pa.string()),
100
+ pa.field("department_id", pa.int64()),
101
+ ]
102
+ ),
103
+ data={
104
+ "id": [1, 2, 3, 4, 5],
105
+ "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
106
+ "email": ["alice@co.com", "bob@co.com", "carol@co.com", "dave@co.com", "eve@co.com"],
107
+ "department_id": [1, 1, 2, 2, 3],
108
+ },
109
+ )
110
+
111
+ ProjectsScanFunction = _static_scan_function(
112
+ func_name="projects_scan",
113
+ func_description="Scan projects table",
114
+ output_schema=pa.schema(
115
+ [ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
116
+ pa.field("department_id", pa.int64()),
117
+ pa.field("project_code", pa.string()),
118
+ pa.field("title", pa.string()),
119
+ ]
120
+ ),
121
+ data={
122
+ "department_id": [1, 1, 2],
123
+ "project_code": ["P001", "P002", "P003"],
124
+ "title": ["Backend API", "Frontend UI", "Sales Portal"],
125
+ },
126
+ )
127
+
128
+ ProductsScanFunction = _static_scan_function(
129
+ func_name="products_scan",
130
+ func_description="Scan products table",
131
+ output_schema=pa.schema(
132
+ [ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
133
+ pa.field("id", pa.int64()),
134
+ pa.field("name", pa.string()),
135
+ pa.field("quantity", pa.int64()),
136
+ pa.field("price", pa.float64()),
137
+ ]
138
+ ),
139
+ data={
140
+ "id": [1, 2, 3],
141
+ "name": ["Widget", "Gadget", "Doohickey"],
142
+ "quantity": [100, 50, 200],
143
+ "price": [9.99, 24.99, 4.99],
144
+ },
145
+ )
146
+
147
+ ColorsScanFunction = _static_scan_function(
148
+ func_name="colors_scan",
149
+ func_description="Scan colors table (ENUM column)",
150
+ output_schema=pa.schema(
151
+ [ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
152
+ pa.field("id", pa.int64()),
153
+ pa.field("color", pa.string()),
154
+ pa.field("hex_code", pa.string()),
155
+ ]
156
+ ),
157
+ data={
158
+ "id": [1, 2, 3],
159
+ "color": ["blue", "green", "red"],
160
+ "hex_code": ["#0000FF", "#00FF00", "#FF0000"],
161
+ },
162
+ )