vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Deliberately-broken batch_index fixtures for contract-enforcement testing.
|
|
4
|
+
|
|
5
|
+
These fixtures violate the ``Meta.supports_batch_index = True`` contract in
|
|
6
|
+
three different ways so SQL integration tests can assert that the C++
|
|
7
|
+
extension's contract checks (in ``InstallBatch``) and the worker library's
|
|
8
|
+
``_merge_batch_index`` validator (in ``vgi/protocol.py``) raise typed
|
|
9
|
+
errors. None of these is intended for production use.
|
|
10
|
+
|
|
11
|
+
The shape of the contract is documented at
|
|
12
|
+
``vgi-python/vgi/_test_fixtures/table/batch_index.py`` and
|
|
13
|
+
``vgi/src/vgi_table_function_impl.cpp::InstallBatch``.
|
|
14
|
+
|
|
15
|
+
* ``broken_missing_batch_index_tag`` — emits a data batch with NO
|
|
16
|
+
``vgi_batch_index`` metadata, bypassing the framework wrapper's
|
|
17
|
+
validation by reaching into the inner collector directly. The C++
|
|
18
|
+
extension's ``InstallBatch`` raises IOException "without
|
|
19
|
+
vgi_batch_index metadata" when the function opts in.
|
|
20
|
+
|
|
21
|
+
* ``broken_non_monotone_batch_index`` — emits batches with strictly
|
|
22
|
+
decreasing partition_ids on the same stream. The C++ extension's
|
|
23
|
+
``InstallBatch`` raises IOException "decreased from N to M on the
|
|
24
|
+
same stream" — DuckDB's per-thread monotonicity assertion is debug-
|
|
25
|
+
only, so VGI must enforce in release builds.
|
|
26
|
+
|
|
27
|
+
* ``broken_batch_index_overflow`` — emits a partition_id at 2^60, well
|
|
28
|
+
above DuckDB's ``BATCH_INCREMENT = 10^13`` per-pipeline cap. The
|
|
29
|
+
C++ extension's ``InstallBatch`` raises IOException "exceeds
|
|
30
|
+
DuckDB's per-pipeline cap" — without this, the worker would surface
|
|
31
|
+
an opaque DuckDB InternalException from the pipeline executor.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
from dataclasses import dataclass
|
|
37
|
+
from typing import Annotated, ClassVar, cast
|
|
38
|
+
|
|
39
|
+
import pyarrow as pa
|
|
40
|
+
from vgi_rpc import ArrowSerializableDataclass
|
|
41
|
+
from vgi_rpc.rpc import OutputCollector
|
|
42
|
+
|
|
43
|
+
from vgi._test_fixtures.table._common import _cardinality_from_count
|
|
44
|
+
from vgi.arguments import Arg
|
|
45
|
+
from vgi.metadata import OrderPreservation
|
|
46
|
+
from vgi.protocol import VgiOutputCollector
|
|
47
|
+
from vgi.schema_utils import schema
|
|
48
|
+
from vgi.table_function import (
|
|
49
|
+
ProcessParams,
|
|
50
|
+
TableFunctionGenerator,
|
|
51
|
+
bind_fixed_schema,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(slots=True, frozen=True)
|
|
56
|
+
class _BrokenArgs:
|
|
57
|
+
count: Annotated[int, Arg(0, doc="Total rows to attempt to generate", ge=1)]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(kw_only=True)
|
|
61
|
+
class _BrokenState(ArrowSerializableDataclass):
|
|
62
|
+
emitted: bool = False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@bind_fixed_schema
|
|
66
|
+
@_cardinality_from_count
|
|
67
|
+
class MissingBatchIndexTagFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
|
|
68
|
+
"""Opts in to batch_index but emits without a tag. C++ raises."""
|
|
69
|
+
|
|
70
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
|
|
71
|
+
|
|
72
|
+
class Meta:
|
|
73
|
+
name = "broken_missing_batch_index_tag"
|
|
74
|
+
description = (
|
|
75
|
+
"DELIBERATELY BROKEN: declares supports_batch_index=True but "
|
|
76
|
+
"emits a data batch with no vgi_batch_index metadata. C++ "
|
|
77
|
+
"extension's contract check raises."
|
|
78
|
+
)
|
|
79
|
+
categories = ["testing", "broken"]
|
|
80
|
+
preserves_order = OrderPreservation.FIXED_ORDER
|
|
81
|
+
supports_batch_index = True
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
|
|
85
|
+
return _BrokenState()
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def process(
|
|
89
|
+
cls,
|
|
90
|
+
params: ProcessParams[_BrokenArgs],
|
|
91
|
+
state: _BrokenState,
|
|
92
|
+
out: OutputCollector,
|
|
93
|
+
) -> None:
|
|
94
|
+
if state.emitted:
|
|
95
|
+
out.finish()
|
|
96
|
+
return
|
|
97
|
+
batch = pa.RecordBatch.from_pydict(
|
|
98
|
+
{"n": list(range(params.args.count))},
|
|
99
|
+
schema=params.output_schema,
|
|
100
|
+
)
|
|
101
|
+
# Reach into the wrapper stack and call the innermost inner directly.
|
|
102
|
+
# This is what makes this fixture "broken": the framework's
|
|
103
|
+
# _merge_batch_index validator never runs, so a data batch with no
|
|
104
|
+
# vgi_batch_index metadata reaches the C++ extension. The walk also
|
|
105
|
+
# exercises the contract that the wire format (not the wrapper
|
|
106
|
+
# layer) is the authoritative check — same defense the worker
|
|
107
|
+
# library provides for stand-alone OutputCollector consumers.
|
|
108
|
+
inner = out
|
|
109
|
+
while hasattr(inner, "_inner"):
|
|
110
|
+
inner = inner._inner
|
|
111
|
+
inner.emit(batch)
|
|
112
|
+
state.emitted = True
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@bind_fixed_schema
|
|
116
|
+
@_cardinality_from_count
|
|
117
|
+
class NonMonotoneBatchIndexFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
|
|
118
|
+
"""Emits two batches with strictly decreasing partition_id. C++ raises."""
|
|
119
|
+
|
|
120
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
|
|
121
|
+
|
|
122
|
+
class Meta:
|
|
123
|
+
name = "broken_non_monotone_batch_index"
|
|
124
|
+
description = (
|
|
125
|
+
"DELIBERATELY BROKEN: emits batches with strictly decreasing "
|
|
126
|
+
"partition_id on one stream. C++ extension's monotonicity check "
|
|
127
|
+
"raises (DuckDB's debug-only assertion is not relied upon)."
|
|
128
|
+
)
|
|
129
|
+
categories = ["testing", "broken"]
|
|
130
|
+
preserves_order = OrderPreservation.FIXED_ORDER
|
|
131
|
+
supports_batch_index = True
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
|
|
135
|
+
return _BrokenState()
|
|
136
|
+
|
|
137
|
+
# Reuse `emitted` to track which of the two batches we've sent.
|
|
138
|
+
@classmethod
|
|
139
|
+
def process(
|
|
140
|
+
cls,
|
|
141
|
+
params: ProcessParams[_BrokenArgs],
|
|
142
|
+
state: _BrokenState,
|
|
143
|
+
out: OutputCollector,
|
|
144
|
+
) -> None:
|
|
145
|
+
if state.emitted:
|
|
146
|
+
# Second call: emit with a LOWER batch_index than the first.
|
|
147
|
+
batch = pa.RecordBatch.from_pydict(
|
|
148
|
+
{"n": [42]},
|
|
149
|
+
schema=params.output_schema,
|
|
150
|
+
)
|
|
151
|
+
cast(VgiOutputCollector, out).emit(batch, batch_index=3)
|
|
152
|
+
out.finish()
|
|
153
|
+
return
|
|
154
|
+
batch = pa.RecordBatch.from_pydict(
|
|
155
|
+
{"n": list(range(params.args.count))},
|
|
156
|
+
schema=params.output_schema,
|
|
157
|
+
)
|
|
158
|
+
cast(VgiOutputCollector, out).emit(batch, batch_index=10)
|
|
159
|
+
state.emitted = True
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@bind_fixed_schema
|
|
163
|
+
@_cardinality_from_count
|
|
164
|
+
class BatchIndexOverflowFunction(TableFunctionGenerator[_BrokenArgs, _BrokenState]):
|
|
165
|
+
"""Emits a partition_id above the C++ cap. C++ raises."""
|
|
166
|
+
|
|
167
|
+
FIXED_SCHEMA: ClassVar[pa.Schema] = schema(n=pa.int64())
|
|
168
|
+
|
|
169
|
+
class Meta:
|
|
170
|
+
name = "broken_batch_index_overflow"
|
|
171
|
+
description = (
|
|
172
|
+
"DELIBERATELY BROKEN: emits a batch tagged with a partition_id "
|
|
173
|
+
"well above DuckDB's BATCH_INCREMENT=10^13 per-pipeline cap. "
|
|
174
|
+
"C++ extension rejects at parse time."
|
|
175
|
+
)
|
|
176
|
+
categories = ["testing", "broken"]
|
|
177
|
+
preserves_order = OrderPreservation.FIXED_ORDER
|
|
178
|
+
supports_batch_index = True
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def initial_state(cls, params: ProcessParams[_BrokenArgs]) -> _BrokenState:
|
|
182
|
+
return _BrokenState()
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def process(
|
|
186
|
+
cls,
|
|
187
|
+
params: ProcessParams[_BrokenArgs],
|
|
188
|
+
state: _BrokenState,
|
|
189
|
+
out: OutputCollector,
|
|
190
|
+
) -> None:
|
|
191
|
+
if state.emitted:
|
|
192
|
+
out.finish()
|
|
193
|
+
return
|
|
194
|
+
batch = pa.RecordBatch.from_pydict(
|
|
195
|
+
{"n": list(range(params.args.count))},
|
|
196
|
+
schema=params.output_schema,
|
|
197
|
+
)
|
|
198
|
+
# 2^60 — far above the 10^13 cap.
|
|
199
|
+
cast(VgiOutputCollector, out).emit(batch, batch_index=1 << 60)
|
|
200
|
+
state.emitted = True
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Static catalog scan functions (colors, departments, employees, products, projects)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
from vgi_rpc.rpc import OutputCollector
|
|
11
|
+
|
|
12
|
+
from vgi._test_fixtures.table._common import _EmptyArgs, _OneShotState
|
|
13
|
+
from vgi.invocation import BindResponse
|
|
14
|
+
from vgi.table_function import (
|
|
15
|
+
BindParams,
|
|
16
|
+
ProcessParams,
|
|
17
|
+
TableFunctionGenerator,
|
|
18
|
+
init_single_worker,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _static_scan_function(
|
|
23
|
+
func_name: str,
|
|
24
|
+
func_description: str,
|
|
25
|
+
output_schema: pa.Schema,
|
|
26
|
+
data: dict[str, list[Any]],
|
|
27
|
+
) -> type[TableFunctionGenerator[_EmptyArgs, _OneShotState]]:
|
|
28
|
+
"""Create a table function that returns static data in one batch.
|
|
29
|
+
|
|
30
|
+
This factory eliminates boilerplate for simple scan functions that
|
|
31
|
+
return a fixed dataset. Each generated class is decorated with
|
|
32
|
+
``@init_single_worker`` and has a unique ``Meta.name``.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@init_single_worker
|
|
36
|
+
class StaticScanFunction(TableFunctionGenerator[_EmptyArgs, _OneShotState]):
|
|
37
|
+
"""Returns static data."""
|
|
38
|
+
|
|
39
|
+
class Meta:
|
|
40
|
+
"""Function metadata."""
|
|
41
|
+
|
|
42
|
+
name = func_name
|
|
43
|
+
description = func_description
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def on_bind(cls, params: BindParams[_EmptyArgs]) -> BindResponse:
|
|
47
|
+
"""Return output schema."""
|
|
48
|
+
return BindResponse(output_schema=output_schema)
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def initial_state(cls, params: ProcessParams[_EmptyArgs]) -> _OneShotState:
|
|
52
|
+
"""Create initial state."""
|
|
53
|
+
return _OneShotState()
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def process(
|
|
57
|
+
cls,
|
|
58
|
+
params: ProcessParams[_EmptyArgs],
|
|
59
|
+
state: _OneShotState,
|
|
60
|
+
out: OutputCollector,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Emit data."""
|
|
63
|
+
if state.done:
|
|
64
|
+
out.finish()
|
|
65
|
+
return
|
|
66
|
+
state.done = True
|
|
67
|
+
out.emit(pa.RecordBatch.from_pydict(data, schema=params.output_schema))
|
|
68
|
+
|
|
69
|
+
StaticScanFunction.__name__ = func_name.title().replace("_", "") + "Function"
|
|
70
|
+
StaticScanFunction.__qualname__ = StaticScanFunction.__name__
|
|
71
|
+
|
|
72
|
+
return StaticScanFunction
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
DepartmentsScanFunction = _static_scan_function(
|
|
76
|
+
func_name="departments_scan",
|
|
77
|
+
func_description="Scan departments table",
|
|
78
|
+
output_schema=pa.schema(
|
|
79
|
+
[ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
|
|
80
|
+
pa.field("id", pa.int64()),
|
|
81
|
+
pa.field("name", pa.string()),
|
|
82
|
+
pa.field("budget", pa.float64()),
|
|
83
|
+
]
|
|
84
|
+
),
|
|
85
|
+
data={
|
|
86
|
+
"id": [1, 2, 3],
|
|
87
|
+
"name": ["Engineering", "Sales", "HR"],
|
|
88
|
+
"budget": [500000.0, 300000.0, 200000.0],
|
|
89
|
+
},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
EmployeesScanFunction = _static_scan_function(
|
|
93
|
+
func_name="employees_scan",
|
|
94
|
+
func_description="Scan employees table",
|
|
95
|
+
output_schema=pa.schema(
|
|
96
|
+
[ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
|
|
97
|
+
pa.field("id", pa.int64()),
|
|
98
|
+
pa.field("name", pa.string()),
|
|
99
|
+
pa.field("email", pa.string()),
|
|
100
|
+
pa.field("department_id", pa.int64()),
|
|
101
|
+
]
|
|
102
|
+
),
|
|
103
|
+
data={
|
|
104
|
+
"id": [1, 2, 3, 4, 5],
|
|
105
|
+
"name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
|
|
106
|
+
"email": ["alice@co.com", "bob@co.com", "carol@co.com", "dave@co.com", "eve@co.com"],
|
|
107
|
+
"department_id": [1, 1, 2, 2, 3],
|
|
108
|
+
},
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
ProjectsScanFunction = _static_scan_function(
|
|
112
|
+
func_name="projects_scan",
|
|
113
|
+
func_description="Scan projects table",
|
|
114
|
+
output_schema=pa.schema(
|
|
115
|
+
[ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
|
|
116
|
+
pa.field("department_id", pa.int64()),
|
|
117
|
+
pa.field("project_code", pa.string()),
|
|
118
|
+
pa.field("title", pa.string()),
|
|
119
|
+
]
|
|
120
|
+
),
|
|
121
|
+
data={
|
|
122
|
+
"department_id": [1, 1, 2],
|
|
123
|
+
"project_code": ["P001", "P002", "P003"],
|
|
124
|
+
"title": ["Backend API", "Frontend UI", "Sales Portal"],
|
|
125
|
+
},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
ProductsScanFunction = _static_scan_function(
|
|
129
|
+
func_name="products_scan",
|
|
130
|
+
func_description="Scan products table",
|
|
131
|
+
output_schema=pa.schema(
|
|
132
|
+
[ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
|
|
133
|
+
pa.field("id", pa.int64()),
|
|
134
|
+
pa.field("name", pa.string()),
|
|
135
|
+
pa.field("quantity", pa.int64()),
|
|
136
|
+
pa.field("price", pa.float64()),
|
|
137
|
+
]
|
|
138
|
+
),
|
|
139
|
+
data={
|
|
140
|
+
"id": [1, 2, 3],
|
|
141
|
+
"name": ["Widget", "Gadget", "Doohickey"],
|
|
142
|
+
"quantity": [100, 50, 200],
|
|
143
|
+
"price": [9.99, 24.99, 4.99],
|
|
144
|
+
},
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
ColorsScanFunction = _static_scan_function(
|
|
148
|
+
func_name="colors_scan",
|
|
149
|
+
func_description="Scan colors table (ENUM column)",
|
|
150
|
+
output_schema=pa.schema(
|
|
151
|
+
[ # type: ignore[arg-type] # pyarrow stubs: mixed-type fields
|
|
152
|
+
pa.field("id", pa.int64()),
|
|
153
|
+
pa.field("color", pa.string()),
|
|
154
|
+
pa.field("hex_code", pa.string()),
|
|
155
|
+
]
|
|
156
|
+
),
|
|
157
|
+
data={
|
|
158
|
+
"id": [1, 2, 3],
|
|
159
|
+
"color": ["blue", "green", "red"],
|
|
160
|
+
"hex_code": ["#0000FF", "#00FF00", "#FF0000"],
|
|
161
|
+
},
|
|
162
|
+
)
|