vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Time-travel + filter-pushdown fixtures.
|
|
4
|
+
|
|
5
|
+
These back ``test/sql/integration/table/time_travel_pushdown.test`` in the C++
|
|
6
|
+
repo, which asserts that a table can be partition-pruned (filter pushdown) AND
|
|
7
|
+
time-travelled (``AT (VERSION|TIMESTAMP ...)``) in the *same* query — for tables
|
|
8
|
+
declared **both** ways:
|
|
9
|
+
|
|
10
|
+
- ``tt_pushdown_fn`` — **function-backed** (``Table(function=...)``). It reads the
|
|
11
|
+
AT clause from the init request (``params.at_value`` →
|
|
12
|
+
``init_call.bind_call.at_value``), which only works once the framework threads
|
|
13
|
+
AT onto the bind request. Before that fix this table cannot see AT at all, so
|
|
14
|
+
``seen_version`` collapses to the current version — the regression guard.
|
|
15
|
+
- ``tt_pushdown_cols`` — **columns-based** (``Table(columns=...)`` routed via
|
|
16
|
+
``table_scan_function_get``). It gets the resolved version as a scan-function
|
|
17
|
+
**argument** (the ``versioned_data`` mechanism) — the native columns-based AT
|
|
18
|
+
path, here to prove the bind-side change didn't regress it.
|
|
19
|
+
|
|
20
|
+
Both echo ``seen_version`` (the version they actually scanned) and
|
|
21
|
+
``pushed_filters`` (the SQL-like predicate DuckDB pushed down), so one query can
|
|
22
|
+
assert both signals at once. ``auto_apply_filters`` keeps the result set correct.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import Annotated, Any, ClassVar
|
|
29
|
+
|
|
30
|
+
import pyarrow as pa
|
|
31
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
32
|
+
from vgi_rpc.rpc import OutputCollector
|
|
33
|
+
|
|
34
|
+
from vgi._test_fixtures.table._common import _EmptyArgs
|
|
35
|
+
from vgi._test_fixtures.table.filters import _format_pushed_filters
|
|
36
|
+
from vgi.arguments import Arg
|
|
37
|
+
from vgi.schema_utils import schema
|
|
38
|
+
from vgi.table_function import (
|
|
39
|
+
ProcessParams,
|
|
40
|
+
TableFunctionGenerator,
|
|
41
|
+
bind_fixed_schema,
|
|
42
|
+
init_single_worker,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Output schema is version-INDEPENDENT (no schema evolution in scope): only the
|
|
46
|
+
# row *data* changes per version, so the function-backed table stays inline-bound.
|
|
47
|
+
_TT_SCHEMA: pa.Schema = schema(
|
|
48
|
+
id=pa.int64(),
|
|
49
|
+
val=pa.int64(),
|
|
50
|
+
seen_version=pa.int64(),
|
|
51
|
+
pushed_filters=pa.string(),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Per-version row ids (val = id * 10). v2 is a strict superset of v1, so a row
|
|
55
|
+
# count difference cleanly proves which version was scanned.
|
|
56
|
+
_TT_VERSION_IDS: dict[int, list[int]] = {
|
|
57
|
+
1: [1, 2, 3, 4, 5],
|
|
58
|
+
2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
59
|
+
}
|
|
60
|
+
_TT_CURRENT_VERSION = 2 # default when there is no AT clause
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def resolve_tt_version(at_unit: str | None, at_value: str | None) -> int:
|
|
64
|
+
"""Resolve an AT clause to one of this fixture's versions (1 or 2).
|
|
65
|
+
|
|
66
|
+
- ``None`` → current version (2)
|
|
67
|
+
- ``VERSION => n`` → ``int(n)`` (must be 1 or 2)
|
|
68
|
+
- ``TIMESTAMP`` → year <= 2020 → 1, else 2
|
|
69
|
+
"""
|
|
70
|
+
if not at_unit:
|
|
71
|
+
return _TT_CURRENT_VERSION
|
|
72
|
+
unit = at_unit.upper()
|
|
73
|
+
if unit == "VERSION":
|
|
74
|
+
version = int(at_value) # type: ignore[arg-type]
|
|
75
|
+
if version not in _TT_VERSION_IDS:
|
|
76
|
+
raise ValueError(f"Unknown version {version}; valid: {sorted(_TT_VERSION_IDS)}")
|
|
77
|
+
return version
|
|
78
|
+
if unit == "TIMESTAMP":
|
|
79
|
+
year = int(str(at_value)[:4])
|
|
80
|
+
return 1 if year <= 2020 else 2
|
|
81
|
+
raise ValueError(f"Unsupported at_unit: {at_unit!r}")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(kw_only=True)
|
|
85
|
+
class _TtState(ArrowSerializableDataclass):
|
|
86
|
+
"""State for the time-travel + pushdown fixtures.
|
|
87
|
+
|
|
88
|
+
``seen_version`` / ``pushed_filters`` are serialized (NOT transient): the HTTP
|
|
89
|
+
state-token rehydrate path deserializes state without re-running
|
|
90
|
+
``initial_state``, so they must survive that round-trip to echo correctly.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
seen_version: int = 0
|
|
94
|
+
pushed_filters: str = "(none)"
|
|
95
|
+
done: bool = False
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _emit_version(params: ProcessParams[Any], state: _TtState, out: OutputCollector) -> None:
|
|
99
|
+
"""Emit one batch for ``state.seen_version``, projected to the output schema."""
|
|
100
|
+
if state.done:
|
|
101
|
+
out.finish()
|
|
102
|
+
return
|
|
103
|
+
state.done = True
|
|
104
|
+
ids = _TT_VERSION_IDS[state.seen_version]
|
|
105
|
+
full: dict[str, list[Any]] = {
|
|
106
|
+
"id": ids,
|
|
107
|
+
"val": [i * 10 for i in ids],
|
|
108
|
+
"seen_version": [state.seen_version] * len(ids),
|
|
109
|
+
"pushed_filters": [state.pushed_filters] * len(ids),
|
|
110
|
+
}
|
|
111
|
+
# projection_pushdown=True: emit only the requested columns.
|
|
112
|
+
columns = {f.name: full[f.name] for f in params.output_schema}
|
|
113
|
+
out.emit(pa.RecordBatch.from_pydict(columns, schema=params.output_schema))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _pushed_filters_str(params: ProcessParams[Any]) -> str:
|
|
117
|
+
assert params.init_call is not None
|
|
118
|
+
pf = params.init_call.pushdown_filters
|
|
119
|
+
jk = params.init_call.join_keys
|
|
120
|
+
filters = TableFunctionGenerator.pushdown_filters(pf, join_keys=jk) if pf is not None else None
|
|
121
|
+
return _format_pushed_filters(filters)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@init_single_worker
|
|
125
|
+
@bind_fixed_schema
|
|
126
|
+
class TimeTravelPushdownFunction(TableFunctionGenerator[_EmptyArgs, _TtState]):
|
|
127
|
+
"""Function-backed time-travel + pushdown scan.
|
|
128
|
+
|
|
129
|
+
Reads the AT clause from the **init** request (``params.at_value``) — proving
|
|
130
|
+
the framework now threads AT onto the bind request embedded in init. No
|
|
131
|
+
arguments: the version comes from AT, not from a scan-function argument.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
class Meta:
|
|
135
|
+
name = "tt_pushdown_scan"
|
|
136
|
+
description = "Function-backed time-travel + filter-pushdown scan (reads AT at init)."
|
|
137
|
+
categories = ["generator", "diagnostic", "testing"]
|
|
138
|
+
filter_pushdown = True
|
|
139
|
+
auto_apply_filters = True
|
|
140
|
+
projection_pushdown = True
|
|
141
|
+
|
|
142
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = _TT_SCHEMA
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def initial_state(cls, params: ProcessParams[_EmptyArgs]) -> _TtState:
|
|
146
|
+
version = resolve_tt_version(params.at_unit, params.at_value)
|
|
147
|
+
return _TtState(seen_version=version, pushed_filters=_pushed_filters_str(params))
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def process(cls, params: ProcessParams[_EmptyArgs], state: _TtState, out: OutputCollector) -> None:
|
|
151
|
+
_emit_version(params, state, out)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass(slots=True, frozen=True)
|
|
155
|
+
class _TtColsArgs:
|
|
156
|
+
"""Argument for the columns-based scan.
|
|
157
|
+
|
|
158
|
+
The resolved version (injected by the worker's
|
|
159
|
+
``table_scan_function_get`` from the AT clause).
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
version: Annotated[int, Arg(0, doc="Resolved data version")]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@init_single_worker
|
|
166
|
+
@bind_fixed_schema
|
|
167
|
+
class TtPushdownColsScanFunction(TableFunctionGenerator[_TtColsArgs, _TtState]):
|
|
168
|
+
"""Columns-based time-travel + pushdown scan.
|
|
169
|
+
|
|
170
|
+
Receives the version as a scan-function **argument** (the native columns-based
|
|
171
|
+
AT mechanism: the worker resolves AT → version in ``table_scan_function_get``).
|
|
172
|
+
Backs ``example.data.tt_pushdown_cols``.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
class Meta:
|
|
176
|
+
name = "tt_pushdown_cols_scan"
|
|
177
|
+
description = "Columns-based time-travel + filter-pushdown scan (version via arg)."
|
|
178
|
+
categories = ["generator", "diagnostic", "testing"]
|
|
179
|
+
filter_pushdown = True
|
|
180
|
+
auto_apply_filters = True
|
|
181
|
+
projection_pushdown = True
|
|
182
|
+
|
|
183
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = _TT_SCHEMA
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def initial_state(cls, params: ProcessParams[_TtColsArgs]) -> _TtState:
|
|
187
|
+
return _TtState(seen_version=params.args.version, pushed_filters=_pushed_filters_str(params))
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def process(cls, params: ProcessParams[_TtColsArgs], state: _TtState, out: OutputCollector) -> None:
|
|
191
|
+
_emit_version(params, state, out)
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Versioned data fixtures (versioned_data, versioned_constraints) used by time-travel tests."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Annotated, Any
|
|
9
|
+
|
|
10
|
+
import pyarrow as pa
|
|
11
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
12
|
+
from vgi_rpc.rpc import OutputCollector
|
|
13
|
+
|
|
14
|
+
from vgi._test_fixtures.table._common import _OneShotState
|
|
15
|
+
from vgi.arguments import Arg
|
|
16
|
+
from vgi.invocation import BindResponse
|
|
17
|
+
from vgi.schema_utils import schema
|
|
18
|
+
from vgi.table_function import (
|
|
19
|
+
BindParams,
|
|
20
|
+
ProcessParams,
|
|
21
|
+
TableFunctionGenerator,
|
|
22
|
+
init_single_worker,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# ============================================================================
|
|
26
|
+
|
|
27
|
+
# Version definitions: schema and data per version
|
|
28
|
+
_VERSIONED_SCHEMAS: dict[int, pa.Schema] = {
|
|
29
|
+
1: schema(id=pa.int64()),
|
|
30
|
+
2: schema(id=pa.int64(), name=pa.string(), score=pa.float64(), active=pa.bool_()),
|
|
31
|
+
3: schema(id=pa.int64(), score=pa.float64()),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
_VERSIONED_DATA: dict[int, dict[str, list[Any]]] = {
|
|
35
|
+
1: {"id": [1, 2, 3]},
|
|
36
|
+
2: {
|
|
37
|
+
"id": [1, 2, 3, 4, 5],
|
|
38
|
+
"name": ["alice", "bob", "carol", "dave", "eve"],
|
|
39
|
+
"score": [10.0, 20.0, 30.0, 40.0, 50.0],
|
|
40
|
+
"active": [True, False, True, False, True],
|
|
41
|
+
},
|
|
42
|
+
3: {"id": [1, 2, 3, 4], "score": [15.0, 25.0, 35.0, 45.0]},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Current version (default when no AT clause)
|
|
46
|
+
_CURRENT_VERSION = 3
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def resolve_version(at_unit: str | None, at_value: str | None) -> int:
|
|
50
|
+
"""Resolve AT clause to a version number.
|
|
51
|
+
|
|
52
|
+
- ``VERSION``: direct integer version (must exist in ``_VERSIONED_SCHEMAS``)
|
|
53
|
+
- ``TIMESTAMP``: year-based mapping (<=2020→1, <=2021→2, >=2022→3)
|
|
54
|
+
- ``None``: current version (3)
|
|
55
|
+
|
|
56
|
+
Raises ``ValueError`` for unknown versions or unsupported AT units.
|
|
57
|
+
"""
|
|
58
|
+
if not at_unit:
|
|
59
|
+
return _CURRENT_VERSION
|
|
60
|
+
|
|
61
|
+
if at_unit.upper() == "VERSION":
|
|
62
|
+
version = int(at_value) # type: ignore[arg-type]
|
|
63
|
+
if version not in _VERSIONED_SCHEMAS:
|
|
64
|
+
raise ValueError(f"Unknown version: {version}. Valid versions: {sorted(_VERSIONED_SCHEMAS)}")
|
|
65
|
+
return version
|
|
66
|
+
|
|
67
|
+
if at_unit.upper() == "TIMESTAMP":
|
|
68
|
+
# Parse year from timestamp string (e.g. "2020-06-15 00:00:00")
|
|
69
|
+
year = int(str(at_value)[:4])
|
|
70
|
+
if year < 2020:
|
|
71
|
+
raise ValueError(f"No version exists at timestamp {at_value!r}: table did not exist before 2020")
|
|
72
|
+
if year <= 2020:
|
|
73
|
+
return 1
|
|
74
|
+
if year <= 2021:
|
|
75
|
+
return 2
|
|
76
|
+
return 3
|
|
77
|
+
|
|
78
|
+
raise ValueError(f"Unsupported at_unit: {at_unit!r}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(slots=True, frozen=True)
|
|
82
|
+
class VersionedDataFunctionArgs:
|
|
83
|
+
"""Arguments for VersionedDataFunction."""
|
|
84
|
+
|
|
85
|
+
version: Annotated[int, Arg(0, doc="Data version to return")]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(kw_only=True)
|
|
89
|
+
class VersionedDataState(ArrowSerializableDataclass):
|
|
90
|
+
"""State for VersionedDataFunction."""
|
|
91
|
+
|
|
92
|
+
done: bool = False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@init_single_worker
|
|
96
|
+
class VersionedDataFunction(TableFunctionGenerator[VersionedDataFunctionArgs, VersionedDataState]):
|
|
97
|
+
"""Returns version-specific data demonstrating time travel with schema evolution.
|
|
98
|
+
|
|
99
|
+
Each version has a different schema and different data:
|
|
100
|
+
|
|
101
|
+
- **Version 1**: ``(id int64)`` — 3 rows
|
|
102
|
+
- **Version 2**: ``(id int64, name string, score double, active bool)`` — 5 rows
|
|
103
|
+
- **Version 3** (current): ``(id int64, score double)`` — 4 rows
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
class Meta:
|
|
108
|
+
"""Metadata for VersionedDataFunction."""
|
|
109
|
+
|
|
110
|
+
name = "versioned_data_scan"
|
|
111
|
+
description = "Returns versioned data with schema evolution"
|
|
112
|
+
categories = ["generator", "testing"]
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def on_bind(cls, params: BindParams[VersionedDataFunctionArgs]) -> BindResponse:
|
|
116
|
+
"""Return version-specific output schema."""
|
|
117
|
+
version = params.args.version
|
|
118
|
+
if version not in _VERSIONED_SCHEMAS:
|
|
119
|
+
raise ValueError(f"Unknown version: {version}. Valid versions: {sorted(_VERSIONED_SCHEMAS)}")
|
|
120
|
+
return BindResponse(output_schema=_VERSIONED_SCHEMAS[version])
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def initial_state(cls, params: ProcessParams[VersionedDataFunctionArgs]) -> VersionedDataState:
|
|
124
|
+
"""Create initial state."""
|
|
125
|
+
return VersionedDataState()
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def process(
|
|
129
|
+
cls,
|
|
130
|
+
params: ProcessParams[VersionedDataFunctionArgs],
|
|
131
|
+
state: VersionedDataState,
|
|
132
|
+
out: OutputCollector,
|
|
133
|
+
) -> None:
|
|
134
|
+
"""Emit all rows for the requested version in one batch."""
|
|
135
|
+
if state.done:
|
|
136
|
+
out.finish()
|
|
137
|
+
return
|
|
138
|
+
state.done = True
|
|
139
|
+
version = params.args.version
|
|
140
|
+
data = _VERSIONED_DATA[version]
|
|
141
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ============================================================================
|
|
145
|
+
|
|
146
|
+
# Version 1: simple users table (id, name) — NOT NULL on id only
|
|
147
|
+
# Version 2: adds email column, PK on id, UNIQUE on email
|
|
148
|
+
# Version 3: adds department_id column, FK to departments
|
|
149
|
+
|
|
150
|
+
_VERSIONED_CONSTRAINTS_SCHEMAS: dict[int, pa.Schema] = {
|
|
151
|
+
1: schema(id=pa.int64(), name=pa.string()),
|
|
152
|
+
2: schema(id=pa.int64(), name=pa.string(), email=pa.string()),
|
|
153
|
+
3: schema(id=pa.int64(), name=pa.string(), email=pa.string(), department_id=pa.int64()),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
_VERSIONED_CONSTRAINTS_DATA: dict[int, dict[str, list[Any]]] = {
|
|
157
|
+
1: {"id": [1, 2], "name": ["Alice", "Bob"]},
|
|
158
|
+
2: {"id": [1, 2, 3], "name": ["Alice", "Bob", "Carol"], "email": ["a@co", "b@co", "c@co"]},
|
|
159
|
+
3: {
|
|
160
|
+
"id": [1, 2, 3],
|
|
161
|
+
"name": ["Alice", "Bob", "Carol"],
|
|
162
|
+
"email": ["a@co", "b@co", "c@co"],
|
|
163
|
+
"department_id": [1, 2, 1],
|
|
164
|
+
},
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
_VERSIONED_CONSTRAINTS_CURRENT = 3
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def resolve_versioned_constraints_version(at_unit: str | None, at_value: str | None) -> int:
|
|
171
|
+
"""Resolve AT clause for versioned_constraints table."""
|
|
172
|
+
if not at_unit:
|
|
173
|
+
return _VERSIONED_CONSTRAINTS_CURRENT
|
|
174
|
+
|
|
175
|
+
if at_unit.upper() == "VERSION":
|
|
176
|
+
version = int(at_value) # type: ignore[arg-type]
|
|
177
|
+
if version not in _VERSIONED_CONSTRAINTS_SCHEMAS:
|
|
178
|
+
raise ValueError(f"Unknown version: {version}. Valid versions: {sorted(_VERSIONED_CONSTRAINTS_SCHEMAS)}")
|
|
179
|
+
return version
|
|
180
|
+
|
|
181
|
+
raise ValueError(f"Unsupported at_unit: {at_unit!r}")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass(slots=True, frozen=True)
|
|
185
|
+
class _VersionedConstraintsArgs:
|
|
186
|
+
"""Arguments for VersionedConstraintsScanFunction."""
|
|
187
|
+
|
|
188
|
+
version: Annotated[int, Arg(0, doc="Data version")]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@init_single_worker
|
|
192
|
+
class VersionedConstraintsScanFunction(
|
|
193
|
+
TableFunctionGenerator[_VersionedConstraintsArgs, _OneShotState],
|
|
194
|
+
):
|
|
195
|
+
"""Returns version-specific data for constraint evolution testing."""
|
|
196
|
+
|
|
197
|
+
class Meta:
|
|
198
|
+
"""Metadata for VersionedConstraintsScanFunction."""
|
|
199
|
+
|
|
200
|
+
name = "versioned_constraints_scan"
|
|
201
|
+
description = "Scan versioned constraints table"
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def on_bind(cls, params: BindParams[_VersionedConstraintsArgs]) -> BindResponse:
|
|
205
|
+
"""Return output schema."""
|
|
206
|
+
version = params.args.version
|
|
207
|
+
if version not in _VERSIONED_CONSTRAINTS_SCHEMAS:
|
|
208
|
+
raise ValueError(f"Unknown version: {version}")
|
|
209
|
+
return BindResponse(output_schema=_VERSIONED_CONSTRAINTS_SCHEMAS[version])
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def initial_state(cls, params: ProcessParams[_VersionedConstraintsArgs]) -> _OneShotState:
|
|
213
|
+
"""Create initial state."""
|
|
214
|
+
return _OneShotState()
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def process(
|
|
218
|
+
cls,
|
|
219
|
+
params: ProcessParams[_VersionedConstraintsArgs],
|
|
220
|
+
state: _OneShotState,
|
|
221
|
+
out: OutputCollector,
|
|
222
|
+
) -> None:
|
|
223
|
+
"""Emit data."""
|
|
224
|
+
if state.done:
|
|
225
|
+
out.finish()
|
|
226
|
+
return
|
|
227
|
+
state.done = True
|
|
228
|
+
version = params.args.version
|
|
229
|
+
data = _VERSIONED_CONSTRAINTS_DATA[version]
|
|
230
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
|