vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,234 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Scan functions backing the ``rff_*`` required-filter sqllogictest Tables.
4
+
5
+ Used by the ``vgi_required_filters_*.test`` matrix. These fixtures exercise
6
+ the ``Table.required_field_filter_paths`` field +
7
+ the C++ optimizer extension that enforces it. The five tables form a small
8
+ matrix:
9
+
10
+ * ``rff_simple`` — flat columns, single top-level required path.
11
+ * ``rff_struct`` — struct column with two required subfield paths.
12
+ * ``rff_nested`` — nested struct with a 3-deep required path.
13
+ * ``rff_multi`` — mixed top-level + struct subfield requirements.
14
+ * ``rff_none`` — no requirement (control / regression for the fast path).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import Any
20
+
21
+ import pyarrow as pa
22
+ from vgi_rpc.rpc import OutputCollector
23
+
24
+ from vgi._test_fixtures.table._common import _EmptyArgs, _OneShotState
25
+ from vgi._test_fixtures.table.catalog_scans import _static_scan_function
26
+ from vgi.invocation import BindResponse
27
+ from vgi.table_function import (
28
+ BindParams,
29
+ ProcessParams,
30
+ TableFunctionGenerator,
31
+ init_single_worker,
32
+ )
33
+
34
+ # The fixture schemas. These are referenced both by the scan functions below and
35
+ # by the Table descriptors registered on the worker.
36
+
37
+ RFF_SIMPLE_COLUMNS = pa.schema(
38
+ [
39
+ pa.field("a", pa.int64()),
40
+ pa.field("b", pa.int64()),
41
+ ]
42
+ )
43
+
44
+ RFF_STRUCT_COLUMNS = pa.schema(
45
+ [
46
+ pa.field(
47
+ "s",
48
+ pa.struct(
49
+ [
50
+ pa.field("a", pa.int64()),
51
+ pa.field("b", pa.int64()),
52
+ ]
53
+ ),
54
+ ),
55
+ pa.field("other", pa.int64()),
56
+ ]
57
+ )
58
+
59
+ RFF_NESTED_COLUMNS = pa.schema(
60
+ [
61
+ pa.field(
62
+ "wrapper",
63
+ pa.struct(
64
+ [
65
+ pa.field(
66
+ "mid",
67
+ pa.struct(
68
+ [
69
+ pa.field("leaf", pa.int64()),
70
+ ]
71
+ ),
72
+ ),
73
+ ]
74
+ ),
75
+ ),
76
+ ]
77
+ )
78
+
79
+ RFF_MULTI_COLUMNS = pa.schema(
80
+ [
81
+ pa.field(
82
+ "s",
83
+ pa.struct(
84
+ [
85
+ pa.field("a", pa.int64()),
86
+ pa.field("b", pa.int64()),
87
+ ]
88
+ ),
89
+ ),
90
+ pa.field("top", pa.int64()),
91
+ ]
92
+ )
93
+
94
+ RFF_NONE_COLUMNS = pa.schema(
95
+ [
96
+ pa.field("a", pa.int64()),
97
+ pa.field("b", pa.int64()),
98
+ ]
99
+ )
100
+
101
+ # rff_rowid — a row-id column (virtual, hidden from SELECT *) alongside a bbox
102
+ # struct with required_field_filter_paths. A `WHERE rowid = N` predicate pushes
103
+ # a table_filter keyed by the COLUMN_IDENTIFIER_ROW_ID sentinel (>> column
104
+ # count), which the optimizer's required-filter check must skip rather than
105
+ # index out of bounds. See required_field_filter_paths_native.test.
106
+ RFF_ROWID_COLUMNS = pa.schema(
107
+ [
108
+ pa.field("row_id", pa.int64(), metadata={b"is_row_id": b""}),
109
+ pa.field(
110
+ "bbox",
111
+ pa.struct(
112
+ [
113
+ pa.field("xmin", pa.float32()),
114
+ pa.field("ymin", pa.float32()),
115
+ pa.field("xmax", pa.float32()),
116
+ pa.field("ymax", pa.float32()),
117
+ ]
118
+ ),
119
+ ),
120
+ pa.field("other", pa.int64()),
121
+ ]
122
+ )
123
+
124
+
125
+ RffSimpleScanFunction = _static_scan_function(
126
+ func_name="rff_simple_scan",
127
+ func_description="rff_simple — flat columns (a, b) for required_field_filter_paths tests",
128
+ output_schema=RFF_SIMPLE_COLUMNS,
129
+ data={
130
+ "a": [1, 2, 3],
131
+ "b": [10, 20, 30],
132
+ },
133
+ )
134
+
135
+ RffStructScanFunction = _static_scan_function(
136
+ func_name="rff_struct_scan",
137
+ func_description="rff_struct — STRUCT(s.a, s.b) + other for required_field_filter_paths tests",
138
+ output_schema=RFF_STRUCT_COLUMNS,
139
+ data={
140
+ "s": [
141
+ {"a": 1, "b": 10},
142
+ {"a": 2, "b": 20},
143
+ {"a": 3, "b": 30},
144
+ ],
145
+ "other": [100, 200, 300],
146
+ },
147
+ )
148
+
149
+ RffNestedScanFunction = _static_scan_function(
150
+ func_name="rff_nested_scan",
151
+ func_description="rff_nested — nested STRUCT(wrapper.mid.leaf) for required_field_filter_paths tests",
152
+ output_schema=RFF_NESTED_COLUMNS,
153
+ data={
154
+ "wrapper": [
155
+ {"mid": {"leaf": 1}},
156
+ {"mid": {"leaf": 2}},
157
+ {"mid": {"leaf": 3}},
158
+ ],
159
+ },
160
+ )
161
+
162
+ RffMultiScanFunction = _static_scan_function(
163
+ func_name="rff_multi_scan",
164
+ func_description="rff_multi — top-level + struct subfield required paths",
165
+ output_schema=RFF_MULTI_COLUMNS,
166
+ data={
167
+ "s": [
168
+ {"a": 1, "b": 10},
169
+ {"a": 2, "b": 20},
170
+ ],
171
+ "top": [100, 200],
172
+ },
173
+ )
174
+
175
+ RffNoneScanFunction = _static_scan_function(
176
+ func_name="rff_none_scan",
177
+ func_description="rff_none — control table with no required_field_filter_paths",
178
+ output_schema=RFF_NONE_COLUMNS,
179
+ data={
180
+ "a": [1, 2, 3],
181
+ "b": [10, 20, 30],
182
+ },
183
+ )
184
+
185
+
186
+ # rff_rowid needs projection_pushdown (virtual row-id columns require it), so it
187
+ # can't use the one-shot static factory — under projection the emitted batch must
188
+ # match the *projected* output schema. Build only the requested columns.
189
+ @init_single_worker
190
+ class RffRowidScanFunction(TableFunctionGenerator[_EmptyArgs, _OneShotState]):
191
+ """rff_rowid — row_id virtual column + bbox.* required filters."""
192
+
193
+ class Meta:
194
+ """Function metadata."""
195
+
196
+ name = "rff_rowid_scan"
197
+ description = "rff_rowid — row_id virtual column + bbox.* required filters"
198
+ projection_pushdown = True
199
+ # filter_pushdown routes the WHERE predicates (incl. the rowid filter,
200
+ # keyed by the COLUMN_IDENTIFIER_ROW_ID sentinel) into the scan's
201
+ # table_filters; auto_apply_filters lets the framework apply them so
202
+ # results stay correct without a hand-written filter loop.
203
+ filter_pushdown = True
204
+ auto_apply_filters = True
205
+
206
+ @classmethod
207
+ def on_bind(cls, params: BindParams[_EmptyArgs]) -> BindResponse:
208
+ """Return the full output schema (row_id + bbox + other)."""
209
+ return BindResponse(output_schema=RFF_ROWID_COLUMNS)
210
+
211
+ @classmethod
212
+ def initial_state(cls, params: ProcessParams[_EmptyArgs]) -> _OneShotState:
213
+ """Create initial state."""
214
+ return _OneShotState()
215
+
216
+ @classmethod
217
+ def process(
218
+ cls,
219
+ params: ProcessParams[_EmptyArgs],
220
+ state: _OneShotState,
221
+ out: OutputCollector,
222
+ ) -> None:
223
+ """Emit 10 rows, projecting to whatever columns the scan requested."""
224
+ if state.done:
225
+ out.finish()
226
+ return
227
+ state.done = True
228
+ full: dict[str, Any] = {
229
+ "row_id": list(range(10)),
230
+ "bbox": [{"xmin": float(i), "ymin": 2.0, "xmax": 3.0, "ymax": 4.0} for i in range(10)],
231
+ "other": [i * 10 for i in range(10)],
232
+ }
233
+ columns = {f.name: full[f.name] for f in params.output_schema}
234
+ out.emit(pa.RecordBatch.from_pydict(columns, schema=params.output_schema))