vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/client/cli.py
ADDED
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
r"""Command-line interface for the VGI client.
|
|
4
|
+
|
|
5
|
+
This module provides the CLI entry point for invoking VGI functions and
|
|
6
|
+
managing catalogs.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
# Table-in-out functions (with input):
|
|
10
|
+
vgi-client --input data.parquet --function echo
|
|
11
|
+
vgi-client --input data.parquet --function sum_all_columns
|
|
12
|
+
vgi-client --input data.parquet --function repeat_inputs --args '[3]'
|
|
13
|
+
|
|
14
|
+
# Table functions (no input):
|
|
15
|
+
vgi-client --function sequence --args '[100]'
|
|
16
|
+
vgi-client --function sequence --args '[5]' --named-arg increment=10
|
|
17
|
+
vgi-client --function range --args '[0, 10]'
|
|
18
|
+
|
|
19
|
+
# Scalar functions (with input, single-column output):
|
|
20
|
+
vgi-client --input data.parquet --function double \
|
|
21
|
+
--args '["x"]' --type scalar
|
|
22
|
+
|
|
23
|
+
# Specify table input position (for functions where TableInput isn't first):
|
|
24
|
+
vgi-client --input data.parquet --function transform --args '["prefix"]' \
|
|
25
|
+
--table-input-position 1
|
|
26
|
+
|
|
27
|
+
# Output in Arrow IPC format (useful for debugging):
|
|
28
|
+
vgi-client --function sequence --args '[10]' --format arrow-ipc -o out.arrow
|
|
29
|
+
vgi-client --function echo --input data.parquet --format arrow-ipc -o -
|
|
30
|
+
|
|
31
|
+
# Catalog operations (all nested under 'catalog'):
|
|
32
|
+
vgi-client catalog list --worker vgi-fixture-worker
|
|
33
|
+
vgi-client catalog attach example --worker vgi-fixture-worker
|
|
34
|
+
vgi-client catalog schema list $ATTACH_ID --worker vgi-fixture-worker
|
|
35
|
+
vgi-client catalog schema contents $ATTACH_ID main --worker vgi-fixture-worker
|
|
36
|
+
vgi-client catalog table get $ATTACH_ID main users --worker vgi-fixture-worker
|
|
37
|
+
vgi-client catalog transaction begin $ATTACH_ID --worker vgi-fixture-worker
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import io
|
|
42
|
+
import json
|
|
43
|
+
import logging
|
|
44
|
+
import sys
|
|
45
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
46
|
+
|
|
47
|
+
import pyarrow as pa
|
|
48
|
+
from pyarrow import ipc
|
|
49
|
+
|
|
50
|
+
from vgi.arguments import Arguments
|
|
51
|
+
from vgi.client.client import Client, ClientError
|
|
52
|
+
|
|
53
|
+
_logger = logging.getLogger("vgi.client.cli")
|
|
54
|
+
|
|
55
|
+
if TYPE_CHECKING:
|
|
56
|
+
import pyarrow.parquet as pq
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class OutputWriter:
|
|
60
|
+
"""Handles writing output batches in various formats.
|
|
61
|
+
|
|
62
|
+
Supported formats:
|
|
63
|
+
- json: JSON Lines format (one JSON object per row)
|
|
64
|
+
- csv: CSV with header
|
|
65
|
+
- parquet: Apache Parquet columnar format
|
|
66
|
+
- arrow-ipc: Apache Arrow IPC streaming format (useful for debugging)
|
|
67
|
+
|
|
68
|
+
The arrow-ipc format writes batches in the standard Arrow IPC streaming
|
|
69
|
+
format, which can be read by any Arrow implementation. This is useful for:
|
|
70
|
+
- Debugging VGI protocol issues
|
|
71
|
+
- Inspecting raw output data with tools like pyarrow or arrow CLI
|
|
72
|
+
- Piping data to other Arrow-aware tools
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, output_file: str | None, format: str, schema: pa.Schema | None = None):
|
|
77
|
+
"""Initialize the output writer.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
output_file: Path to output file, "-" for stdout, or None for logging.
|
|
81
|
+
format: Output format ("parquet", "csv", "json", or "arrow-ipc").
|
|
82
|
+
schema: Optional schema for the output data.
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
self.output_file = output_file
|
|
86
|
+
self.format = format
|
|
87
|
+
self.schema = schema
|
|
88
|
+
self._writer: pq.ParquetWriter | ipc.RecordBatchStreamWriter | None = None
|
|
89
|
+
self._is_stdout = output_file == "-"
|
|
90
|
+
self._first_write = True
|
|
91
|
+
self._output_file_handle: io.IOBase | None = None
|
|
92
|
+
|
|
93
|
+
def _get_output_stream(self) -> Any:
|
|
94
|
+
if self._is_stdout:
|
|
95
|
+
if self.format in ("parquet", "arrow-ipc"):
|
|
96
|
+
return sys.stdout.buffer
|
|
97
|
+
return sys.stdout
|
|
98
|
+
return self.output_file
|
|
99
|
+
|
|
100
|
+
def write_batch(self, batch: pa.RecordBatch) -> None:
|
|
101
|
+
"""Write a batch to the output destination in the configured format."""
|
|
102
|
+
import pyarrow.csv as csv
|
|
103
|
+
import pyarrow.parquet as pq
|
|
104
|
+
|
|
105
|
+
if self.output_file is None:
|
|
106
|
+
_logger.info("output_batch num_rows=%s batch=%s", batch.num_rows, batch)
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
if self.format == "parquet":
|
|
110
|
+
if self._writer is None:
|
|
111
|
+
if self._is_stdout:
|
|
112
|
+
self._writer = pq.ParquetWriter(
|
|
113
|
+
pa.PythonFile(cast(io.IOBase, sys.stdout.buffer), mode="w"),
|
|
114
|
+
batch.schema,
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
self._writer = pq.ParquetWriter(self.output_file, batch.schema)
|
|
118
|
+
self._writer.write_batch(batch)
|
|
119
|
+
|
|
120
|
+
elif self.format == "arrow-ipc":
|
|
121
|
+
if self._writer is None:
|
|
122
|
+
if self._is_stdout:
|
|
123
|
+
sink = pa.PythonFile(cast(io.IOBase, sys.stdout.buffer), mode="w")
|
|
124
|
+
else:
|
|
125
|
+
# Open file and keep handle for closing in close()
|
|
126
|
+
self._output_file_handle = open( # noqa: SIM115
|
|
127
|
+
self.output_file, "wb"
|
|
128
|
+
)
|
|
129
|
+
sink = pa.PythonFile(self._output_file_handle, mode="w")
|
|
130
|
+
self._writer = ipc.new_stream(sink, batch.schema)
|
|
131
|
+
# Type narrowing for mypy
|
|
132
|
+
assert isinstance(self._writer, ipc.RecordBatchStreamWriter)
|
|
133
|
+
self._writer.write_batch(batch)
|
|
134
|
+
|
|
135
|
+
elif self.format == "csv":
|
|
136
|
+
output = self._get_output_stream()
|
|
137
|
+
write_options = csv.WriteOptions(include_header=self._first_write)
|
|
138
|
+
if self._is_stdout:
|
|
139
|
+
csv.write_csv(pa.Table.from_batches([batch]), sys.stdout.buffer, write_options)
|
|
140
|
+
else:
|
|
141
|
+
if self._first_write:
|
|
142
|
+
csv.write_csv(pa.Table.from_batches([batch]), output, write_options)
|
|
143
|
+
else:
|
|
144
|
+
with open(output, "ab") as f:
|
|
145
|
+
csv.write_csv(
|
|
146
|
+
pa.Table.from_batches([batch]),
|
|
147
|
+
f,
|
|
148
|
+
csv.WriteOptions(include_header=False),
|
|
149
|
+
)
|
|
150
|
+
self._first_write = False
|
|
151
|
+
|
|
152
|
+
elif self.format == "json":
|
|
153
|
+
table = pa.Table.from_batches([batch])
|
|
154
|
+
rows = table.to_pylist()
|
|
155
|
+
if self._is_stdout:
|
|
156
|
+
for row in rows:
|
|
157
|
+
print(json.dumps(row))
|
|
158
|
+
else:
|
|
159
|
+
mode = "w" if self._first_write else "a"
|
|
160
|
+
with open(self.output_file, mode) as f:
|
|
161
|
+
for row in rows:
|
|
162
|
+
f.write(json.dumps(row) + "\n")
|
|
163
|
+
self._first_write = False
|
|
164
|
+
|
|
165
|
+
def close(self) -> None:
|
|
166
|
+
"""Close the underlying writer if one exists."""
|
|
167
|
+
if self._writer is not None:
|
|
168
|
+
self._writer.close()
|
|
169
|
+
if self._output_file_handle is not None:
|
|
170
|
+
self._output_file_handle.close()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
_CLI_EPILOG = """
|
|
174
|
+
\b
|
|
175
|
+
EXAMPLES:
|
|
176
|
+
# Table function (generates data, no input)
|
|
177
|
+
vgi-client --function sequence --args '[10]'
|
|
178
|
+
vgi-client --function sequence --args '[5]' --named-arg increment=10
|
|
179
|
+
\b
|
|
180
|
+
# Table-in-out function (transforms input)
|
|
181
|
+
vgi-client --input data.parquet --function echo
|
|
182
|
+
\b
|
|
183
|
+
# Scalar function (per-row, single output column)
|
|
184
|
+
vgi-client --input in.parquet --function upper_case -t scalar
|
|
185
|
+
\b
|
|
186
|
+
# Output to file with format
|
|
187
|
+
vgi-client --function sequence --args '[5]' -o out.json
|
|
188
|
+
vgi-client --function sequence --args '[5]' -o out.parquet -f parquet
|
|
189
|
+
vgi-client --function sequence --args '[5]' -o - -f arrow-ipc
|
|
190
|
+
\b
|
|
191
|
+
# Catalog operations
|
|
192
|
+
vgi-client catalog list -w vgi-fixture-worker
|
|
193
|
+
vgi-client catalog attach mydb -w vgi-fixture-worker
|
|
194
|
+
|
|
195
|
+
\b
|
|
196
|
+
FUNCTION TYPES:
|
|
197
|
+
table No input, generates data (sequence, range)
|
|
198
|
+
table-in-out Transforms input (echo, filter) - default with --input
|
|
199
|
+
scalar Per-row transform, single column output (upper_case)
|
|
200
|
+
auto Default: table-in-out if --input, else table
|
|
201
|
+
|
|
202
|
+
\b
|
|
203
|
+
ARGUMENT FORMAT (--args as JSON array):
|
|
204
|
+
'[]' No arguments
|
|
205
|
+
'[10]' Single integer
|
|
206
|
+
'["name"]' Single string (column name)
|
|
207
|
+
'[0, 100, 5]' Multiple integers
|
|
208
|
+
'[true, 3.14]' Mixed types
|
|
209
|
+
|
|
210
|
+
\b
|
|
211
|
+
NAMED ARGUMENTS (--named-arg key=value):
|
|
212
|
+
--named-arg increment=2 Integer value
|
|
213
|
+
--named-arg name="test" String value (use JSON quotes)
|
|
214
|
+
--named-arg flag=true Boolean value
|
|
215
|
+
|
|
216
|
+
\b
|
|
217
|
+
SETTINGS (-s/--setting key=value):
|
|
218
|
+
-s vgi_verbose_mode=true Enable verbose mode
|
|
219
|
+
-s greeting=Hello String setting
|
|
220
|
+
-s multiplier=2 Integer setting (passed as string)
|
|
221
|
+
|
|
222
|
+
\b
|
|
223
|
+
OUTPUT FORMATS (-f/--format):
|
|
224
|
+
json JSON Lines, one object per row (default)
|
|
225
|
+
csv CSV with header row
|
|
226
|
+
parquet Apache Parquet columnar format
|
|
227
|
+
arrow-ipc Arrow IPC stream (for debugging/piping)
|
|
228
|
+
|
|
229
|
+
\b
|
|
230
|
+
ENVIRONMENT VARIABLES:
|
|
231
|
+
VGI_WORKER_DEBUG=1 Enable DEBUG logging on worker and stderr passthrough on client
|
|
232
|
+
VGI_QUIET=1 Suppress worker startup logging
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _create_cli() -> Any:
|
|
237
|
+
"""Create the CLI command group. Separated for testability."""
|
|
238
|
+
import click
|
|
239
|
+
import pyarrow.parquet as pq
|
|
240
|
+
|
|
241
|
+
from vgi.client.cli_catalog import catalog
|
|
242
|
+
|
|
243
|
+
@click.group(invoke_without_command=True, epilog=_CLI_EPILOG)
|
|
244
|
+
@click.option(
|
|
245
|
+
"--input",
|
|
246
|
+
"input_file",
|
|
247
|
+
required=False,
|
|
248
|
+
type=click.Path(exists=True),
|
|
249
|
+
help=(
|
|
250
|
+
"Input parquet file path. Required for table-in-out and scalar functions. "
|
|
251
|
+
"Omit for table functions (generators)."
|
|
252
|
+
),
|
|
253
|
+
)
|
|
254
|
+
@click.option(
|
|
255
|
+
"--output",
|
|
256
|
+
"-o",
|
|
257
|
+
"output_file",
|
|
258
|
+
type=str,
|
|
259
|
+
help="Output file path. Use '-' for stdout. If omitted, outputs to log.",
|
|
260
|
+
)
|
|
261
|
+
@click.option(
|
|
262
|
+
"--format",
|
|
263
|
+
"-f",
|
|
264
|
+
"output_format",
|
|
265
|
+
type=click.Choice(["json", "csv", "parquet", "arrow-ipc"]),
|
|
266
|
+
default="json",
|
|
267
|
+
help="Output format: json (default), csv, parquet, or arrow-ipc.",
|
|
268
|
+
)
|
|
269
|
+
@click.option(
|
|
270
|
+
"--function",
|
|
271
|
+
"function_name",
|
|
272
|
+
required=False,
|
|
273
|
+
type=str,
|
|
274
|
+
help="Function name to invoke (e.g., sequence, echo, upper_case).",
|
|
275
|
+
)
|
|
276
|
+
@click.option(
|
|
277
|
+
"--args",
|
|
278
|
+
"arguments",
|
|
279
|
+
default="[]",
|
|
280
|
+
type=str,
|
|
281
|
+
help="JSON array of positional arguments. Example: '[10]' or '[\"col\"]'.",
|
|
282
|
+
)
|
|
283
|
+
@click.option(
|
|
284
|
+
"--worker",
|
|
285
|
+
"-w",
|
|
286
|
+
"worker_path",
|
|
287
|
+
default="vgi-fixture-worker",
|
|
288
|
+
type=str,
|
|
289
|
+
help="VGI worker command or path. Default: vgi-fixture-worker.",
|
|
290
|
+
)
|
|
291
|
+
@click.option(
|
|
292
|
+
"--type",
|
|
293
|
+
"-t",
|
|
294
|
+
"function_type",
|
|
295
|
+
type=click.Choice(["auto", "table", "table-in-out", "scalar"]),
|
|
296
|
+
default="auto",
|
|
297
|
+
help=(
|
|
298
|
+
"Function type: auto (default), table, table-in-out, or scalar. "
|
|
299
|
+
"'auto' uses table-in-out if --input provided, otherwise table."
|
|
300
|
+
),
|
|
301
|
+
)
|
|
302
|
+
@click.option(
|
|
303
|
+
"--worker-stderr",
|
|
304
|
+
is_flag=True,
|
|
305
|
+
default=False,
|
|
306
|
+
help="Pass worker stderr through to CLI stderr (for debugging).",
|
|
307
|
+
)
|
|
308
|
+
@click.option(
|
|
309
|
+
"--max-workers",
|
|
310
|
+
"max_workers",
|
|
311
|
+
type=int,
|
|
312
|
+
default=None,
|
|
313
|
+
help="Max worker processes. Clamps function's max_processes setting.",
|
|
314
|
+
)
|
|
315
|
+
@click.option(
|
|
316
|
+
"--projection-id",
|
|
317
|
+
"projection_ids",
|
|
318
|
+
multiple=True,
|
|
319
|
+
type=int,
|
|
320
|
+
help="Column ID for projection pushdown. Can be repeated.",
|
|
321
|
+
)
|
|
322
|
+
@click.option(
|
|
323
|
+
"--pushdown-filters",
|
|
324
|
+
"pushdown_filters",
|
|
325
|
+
type=str,
|
|
326
|
+
default=None,
|
|
327
|
+
help="Filter predicates as hex-encoded bytes for filter pushdown.",
|
|
328
|
+
)
|
|
329
|
+
@click.option(
|
|
330
|
+
"--table-input-position",
|
|
331
|
+
"table_input_position",
|
|
332
|
+
type=int,
|
|
333
|
+
default=None,
|
|
334
|
+
help=(
|
|
335
|
+
"Position (0-indexed) to insert table input in positional args. "
|
|
336
|
+
"Example: --args '[\"prefix\"]' --table-input-position 1"
|
|
337
|
+
),
|
|
338
|
+
)
|
|
339
|
+
@click.option(
|
|
340
|
+
"--attach-opaque-data",
|
|
341
|
+
"attach_opaque_data",
|
|
342
|
+
type=str,
|
|
343
|
+
default=None,
|
|
344
|
+
help="DuckDB attachment ID (hex string) for catalog context.",
|
|
345
|
+
)
|
|
346
|
+
@click.option(
|
|
347
|
+
"--transaction-opaque-data",
|
|
348
|
+
"transaction_opaque_data",
|
|
349
|
+
type=str,
|
|
350
|
+
default=None,
|
|
351
|
+
help="DuckDB transaction ID (hex string) for transactional operations.",
|
|
352
|
+
)
|
|
353
|
+
@click.option(
|
|
354
|
+
"--named-arg",
|
|
355
|
+
"named_arg_list",
|
|
356
|
+
multiple=True,
|
|
357
|
+
type=str,
|
|
358
|
+
help="Named argument as key=value. Can be repeated. E.g.: --named-arg x=2",
|
|
359
|
+
)
|
|
360
|
+
@click.option(
|
|
361
|
+
"--setting",
|
|
362
|
+
"-s",
|
|
363
|
+
"setting_list",
|
|
364
|
+
multiple=True,
|
|
365
|
+
type=str,
|
|
366
|
+
help="Setting as key=value. Can be repeated. E.g.: -s greeting=Hi",
|
|
367
|
+
)
|
|
368
|
+
@click.pass_context
|
|
369
|
+
def cli(
|
|
370
|
+
ctx: click.Context,
|
|
371
|
+
input_file: str | None,
|
|
372
|
+
output_file: str | None,
|
|
373
|
+
output_format: str,
|
|
374
|
+
function_name: str | None,
|
|
375
|
+
arguments: str,
|
|
376
|
+
worker_path: str,
|
|
377
|
+
worker_stderr: bool,
|
|
378
|
+
projection_ids: tuple[int, ...],
|
|
379
|
+
pushdown_filters: str | None,
|
|
380
|
+
max_workers: int | None,
|
|
381
|
+
table_input_position: int | None,
|
|
382
|
+
attach_opaque_data: str | None,
|
|
383
|
+
function_type: str,
|
|
384
|
+
transaction_opaque_data: str | None,
|
|
385
|
+
named_arg_list: tuple[str, ...],
|
|
386
|
+
setting_list: tuple[str, ...],
|
|
387
|
+
) -> None:
|
|
388
|
+
"""VGI client - invoke functions and manage catalogs.
|
|
389
|
+
|
|
390
|
+
QUICK START: Use --function to invoke a VGI function, or use the
|
|
391
|
+
'catalog' subcommand for catalog operations. See examples below.
|
|
392
|
+
"""
|
|
393
|
+
# If a subcommand is being invoked, skip function invocation
|
|
394
|
+
if ctx.invoked_subcommand is not None:
|
|
395
|
+
return
|
|
396
|
+
|
|
397
|
+
# Legacy function invocation mode - requires --function
|
|
398
|
+
if function_name is None:
|
|
399
|
+
click.echo(ctx.get_help())
|
|
400
|
+
return
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
args_list = json.loads(arguments)
|
|
404
|
+
if not isinstance(args_list, list):
|
|
405
|
+
raise click.ClickException("--args must be a JSON array")
|
|
406
|
+
except json.JSONDecodeError as e:
|
|
407
|
+
_logger.error("invalid_json_arguments error=%s", e)
|
|
408
|
+
raise click.ClickException(f"Invalid JSON in --args: {e}") from e
|
|
409
|
+
|
|
410
|
+
# Validate table_input_position
|
|
411
|
+
if table_input_position is not None:
|
|
412
|
+
if input_file is None:
|
|
413
|
+
raise click.ClickException("--table-input-position requires --input to be specified")
|
|
414
|
+
if table_input_position < 0:
|
|
415
|
+
raise click.ClickException("--table-input-position must be non-negative")
|
|
416
|
+
if table_input_position > len(args_list):
|
|
417
|
+
raise click.ClickException(
|
|
418
|
+
f"--table-input-position {table_input_position} is out of range "
|
|
419
|
+
f"for {len(args_list)} arguments (max: {len(args_list)})"
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Convert args_list to PyArrow scalars
|
|
423
|
+
positional_args = tuple(pa.scalar(arg) for arg in args_list)
|
|
424
|
+
|
|
425
|
+
# Parse named arguments into dict
|
|
426
|
+
named_args: dict[str, pa.Scalar[Any]] = {}
|
|
427
|
+
for named_arg in named_arg_list:
|
|
428
|
+
if "=" not in named_arg:
|
|
429
|
+
raise click.ClickException(f"Invalid --named-arg format: '{named_arg}'. Expected key=value.")
|
|
430
|
+
key, value_str = named_arg.split("=", 1)
|
|
431
|
+
# Try to parse value as JSON, fall back to string
|
|
432
|
+
try:
|
|
433
|
+
value = json.loads(value_str)
|
|
434
|
+
except json.JSONDecodeError:
|
|
435
|
+
# Treat as string if not valid JSON
|
|
436
|
+
value = value_str
|
|
437
|
+
named_args[key] = pa.scalar(value)
|
|
438
|
+
|
|
439
|
+
# Parse settings into dict (settings are always strings in the protocol)
|
|
440
|
+
settings: dict[str, str] | None = None
|
|
441
|
+
if setting_list:
|
|
442
|
+
settings = {}
|
|
443
|
+
for setting in setting_list:
|
|
444
|
+
if "=" not in setting:
|
|
445
|
+
raise click.ClickException(f"Invalid --setting format: '{setting}'. Expected key=value.")
|
|
446
|
+
key, value_str = setting.split("=", 1)
|
|
447
|
+
settings[key] = value_str
|
|
448
|
+
|
|
449
|
+
# Parse attach_opaque_data from hex string if provided
|
|
450
|
+
attach_opaque_data_bytes: bytes | None = None
|
|
451
|
+
if attach_opaque_data is not None:
|
|
452
|
+
try:
|
|
453
|
+
attach_opaque_data_bytes = bytes.fromhex(attach_opaque_data)
|
|
454
|
+
except ValueError as e:
|
|
455
|
+
raise click.ClickException(f"Invalid --attach-opaque-data: must be a valid hex string: {e}") from e
|
|
456
|
+
|
|
457
|
+
# Parse transaction_opaque_data from hex string if provided
|
|
458
|
+
transaction_opaque_data_bytes: bytes | None = None
|
|
459
|
+
if transaction_opaque_data is not None:
|
|
460
|
+
try:
|
|
461
|
+
transaction_opaque_data_bytes = bytes.fromhex(transaction_opaque_data)
|
|
462
|
+
except ValueError as e:
|
|
463
|
+
raise click.ClickException(f"Invalid --transaction-opaque-data: must be a valid hex string: {e}") from e
|
|
464
|
+
|
|
465
|
+
# Parse pushdown_filters from hex string if provided
|
|
466
|
+
pushdown_filters_bytes: bytes | None = None
|
|
467
|
+
if pushdown_filters is not None:
|
|
468
|
+
try:
|
|
469
|
+
pushdown_filters_bytes = bytes.fromhex(pushdown_filters)
|
|
470
|
+
except ValueError as e:
|
|
471
|
+
raise click.ClickException(f"Invalid --pushdown-filters: must be a valid hex string: {e}") from e
|
|
472
|
+
|
|
473
|
+
_logger.info("starting_worker function=%s worker_path=%s", function_name, worker_path)
|
|
474
|
+
|
|
475
|
+
# Validate function_type requirements
|
|
476
|
+
if function_type == "scalar" and input_file is None:
|
|
477
|
+
raise click.ClickException("--type scalar requires --input to be specified")
|
|
478
|
+
if function_type == "table-in-out" and input_file is None:
|
|
479
|
+
raise click.ClickException("--type table-in-out requires --input to be specified")
|
|
480
|
+
if function_type == "table" and input_file is not None:
|
|
481
|
+
raise click.ClickException("--type table does not accept --input (table functions have no input)")
|
|
482
|
+
|
|
483
|
+
output_writer: OutputWriter | None = None
|
|
484
|
+
try:
|
|
485
|
+
with Client(
|
|
486
|
+
worker_path,
|
|
487
|
+
passthrough_stderr=worker_stderr,
|
|
488
|
+
worker_limit=max_workers,
|
|
489
|
+
attach_opaque_data=attach_opaque_data_bytes,
|
|
490
|
+
) as client:
|
|
491
|
+
# Determine effective function type
|
|
492
|
+
if function_type == "auto":
|
|
493
|
+
effective_type = "table" if input_file is None else "table-in-out"
|
|
494
|
+
else:
|
|
495
|
+
effective_type = function_type
|
|
496
|
+
|
|
497
|
+
# Build arguments object
|
|
498
|
+
func_args = Arguments(positional=positional_args, named=named_args)
|
|
499
|
+
|
|
500
|
+
if effective_type == "table":
|
|
501
|
+
# Table function (no input)
|
|
502
|
+
_logger.info("invoking_table_function function=%s", function_name)
|
|
503
|
+
output_iterator = client.table_function(
|
|
504
|
+
function_name=function_name,
|
|
505
|
+
arguments=func_args,
|
|
506
|
+
projection_ids=list(projection_ids) if projection_ids else None,
|
|
507
|
+
pushdown_filters=pushdown_filters_bytes,
|
|
508
|
+
transaction_opaque_data=transaction_opaque_data_bytes,
|
|
509
|
+
settings=settings,
|
|
510
|
+
)
|
|
511
|
+
elif effective_type == "scalar":
|
|
512
|
+
# Scalar function (with input, single-column output)
|
|
513
|
+
assert input_file is not None # Validated earlier
|
|
514
|
+
_logger.info("invoking_scalar_function function=%s", function_name)
|
|
515
|
+
_logger.info("reading_input file=%s", input_file)
|
|
516
|
+
pf = pq.ParquetFile(input_file)
|
|
517
|
+
|
|
518
|
+
output_iterator = client.scalar_function(
|
|
519
|
+
function_name=function_name,
|
|
520
|
+
arguments=func_args,
|
|
521
|
+
input=pf.iter_batches(),
|
|
522
|
+
transaction_opaque_data=transaction_opaque_data_bytes,
|
|
523
|
+
settings=settings,
|
|
524
|
+
)
|
|
525
|
+
else:
|
|
526
|
+
# Table-in-out function (with input)
|
|
527
|
+
assert input_file is not None # Validated earlier
|
|
528
|
+
_logger.info("invoking_table_in_out_function function=%s", function_name)
|
|
529
|
+
_logger.info("reading_input file=%s", input_file)
|
|
530
|
+
pf = pq.ParquetFile(input_file)
|
|
531
|
+
|
|
532
|
+
# If table_input_position is specified, log it for debugging
|
|
533
|
+
# The table input position tells the user where the table data
|
|
534
|
+
# appears in the function signature (e.g., position 1 means the
|
|
535
|
+
# table is the second argument). This is purely informational
|
|
536
|
+
# for the CLI user - the protocol handles table data separately.
|
|
537
|
+
if table_input_position is not None:
|
|
538
|
+
_logger.debug(
|
|
539
|
+
"table_input_position_specified position=%s num_args=%s",
|
|
540
|
+
table_input_position,
|
|
541
|
+
len(positional_args),
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
output_iterator = client.table_in_out_function(
|
|
545
|
+
function_name=function_name,
|
|
546
|
+
arguments=func_args,
|
|
547
|
+
input=pf.iter_batches(),
|
|
548
|
+
projection_ids=list(projection_ids) if projection_ids else None,
|
|
549
|
+
pushdown_filters=pushdown_filters_bytes,
|
|
550
|
+
transaction_opaque_data=transaction_opaque_data_bytes,
|
|
551
|
+
settings=settings,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
for output_batch in output_iterator:
|
|
555
|
+
if output_writer is None:
|
|
556
|
+
output_writer = OutputWriter(output_file, output_format, output_batch.schema)
|
|
557
|
+
output_writer.write_batch(output_batch)
|
|
558
|
+
|
|
559
|
+
_logger.info("processing_complete function=%s", function_name)
|
|
560
|
+
except ClientError as e:
|
|
561
|
+
raise click.ClickException(str(e)) from e
|
|
562
|
+
finally:
|
|
563
|
+
if output_writer is not None:
|
|
564
|
+
output_writer.close()
|
|
565
|
+
|
|
566
|
+
# Add catalog subcommand group (schema/table/view/transaction nested under it)
|
|
567
|
+
cli.add_command(catalog)
|
|
568
|
+
|
|
569
|
+
return cli
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
# Module-level command for testing
|
|
573
|
+
cli = _create_cli()
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def main() -> None:
|
|
577
|
+
"""CLI entry point for vgi-client."""
|
|
578
|
+
cli()
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
if __name__ == "__main__":
|
|
582
|
+
main()
|