vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/client/cli.py ADDED
@@ -0,0 +1,582 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ r"""Command-line interface for the VGI client.
4
+
5
+ This module provides the CLI entry point for invoking VGI functions and
6
+ managing catalogs.
7
+
8
+ Usage:
9
+ # Table-in-out functions (with input):
10
+ vgi-client --input data.parquet --function echo
11
+ vgi-client --input data.parquet --function sum_all_columns
12
+ vgi-client --input data.parquet --function repeat_inputs --args '[3]'
13
+
14
+ # Table functions (no input):
15
+ vgi-client --function sequence --args '[100]'
16
+ vgi-client --function sequence --args '[5]' --named-arg increment=10
17
+ vgi-client --function range --args '[0, 10]'
18
+
19
+ # Scalar functions (with input, single-column output):
20
+ vgi-client --input data.parquet --function double \
21
+ --args '["x"]' --type scalar
22
+
23
+ # Specify table input position (for functions where TableInput isn't first):
24
+ vgi-client --input data.parquet --function transform --args '["prefix"]' \
25
+ --table-input-position 1
26
+
27
+ # Output in Arrow IPC format (useful for debugging):
28
+ vgi-client --function sequence --args '[10]' --format arrow-ipc -o out.arrow
29
+ vgi-client --function echo --input data.parquet --format arrow-ipc -o -
30
+
31
+ # Catalog operations (all nested under 'catalog'):
32
+ vgi-client catalog list --worker vgi-fixture-worker
33
+ vgi-client catalog attach example --worker vgi-fixture-worker
34
+ vgi-client catalog schema list $ATTACH_ID --worker vgi-fixture-worker
35
+ vgi-client catalog schema contents $ATTACH_ID main --worker vgi-fixture-worker
36
+ vgi-client catalog table get $ATTACH_ID main users --worker vgi-fixture-worker
37
+ vgi-client catalog transaction begin $ATTACH_ID --worker vgi-fixture-worker
38
+
39
+ """
40
+
41
+ import io
42
+ import json
43
+ import logging
44
+ import sys
45
+ from typing import TYPE_CHECKING, Any, cast
46
+
47
+ import pyarrow as pa
48
+ from pyarrow import ipc
49
+
50
+ from vgi.arguments import Arguments
51
+ from vgi.client.client import Client, ClientError
52
+
53
+ _logger = logging.getLogger("vgi.client.cli")
54
+
55
+ if TYPE_CHECKING:
56
+ import pyarrow.parquet as pq
57
+
58
+
59
+ class OutputWriter:
60
+ """Handles writing output batches in various formats.
61
+
62
+ Supported formats:
63
+ - json: JSON Lines format (one JSON object per row)
64
+ - csv: CSV with header
65
+ - parquet: Apache Parquet columnar format
66
+ - arrow-ipc: Apache Arrow IPC streaming format (useful for debugging)
67
+
68
+ The arrow-ipc format writes batches in the standard Arrow IPC streaming
69
+ format, which can be read by any Arrow implementation. This is useful for:
70
+ - Debugging VGI protocol issues
71
+ - Inspecting raw output data with tools like pyarrow or arrow CLI
72
+ - Piping data to other Arrow-aware tools
73
+
74
+ """
75
+
76
+ def __init__(self, output_file: str | None, format: str, schema: pa.Schema | None = None):
77
+ """Initialize the output writer.
78
+
79
+ Args:
80
+ output_file: Path to output file, "-" for stdout, or None for logging.
81
+ format: Output format ("parquet", "csv", "json", or "arrow-ipc").
82
+ schema: Optional schema for the output data.
83
+
84
+ """
85
+ self.output_file = output_file
86
+ self.format = format
87
+ self.schema = schema
88
+ self._writer: pq.ParquetWriter | ipc.RecordBatchStreamWriter | None = None
89
+ self._is_stdout = output_file == "-"
90
+ self._first_write = True
91
+ self._output_file_handle: io.IOBase | None = None
92
+
93
+ def _get_output_stream(self) -> Any:
94
+ if self._is_stdout:
95
+ if self.format in ("parquet", "arrow-ipc"):
96
+ return sys.stdout.buffer
97
+ return sys.stdout
98
+ return self.output_file
99
+
100
+ def write_batch(self, batch: pa.RecordBatch) -> None:
101
+ """Write a batch to the output destination in the configured format."""
102
+ import pyarrow.csv as csv
103
+ import pyarrow.parquet as pq
104
+
105
+ if self.output_file is None:
106
+ _logger.info("output_batch num_rows=%s batch=%s", batch.num_rows, batch)
107
+ return
108
+
109
+ if self.format == "parquet":
110
+ if self._writer is None:
111
+ if self._is_stdout:
112
+ self._writer = pq.ParquetWriter(
113
+ pa.PythonFile(cast(io.IOBase, sys.stdout.buffer), mode="w"),
114
+ batch.schema,
115
+ )
116
+ else:
117
+ self._writer = pq.ParquetWriter(self.output_file, batch.schema)
118
+ self._writer.write_batch(batch)
119
+
120
+ elif self.format == "arrow-ipc":
121
+ if self._writer is None:
122
+ if self._is_stdout:
123
+ sink = pa.PythonFile(cast(io.IOBase, sys.stdout.buffer), mode="w")
124
+ else:
125
+ # Open file and keep handle for closing in close()
126
+ self._output_file_handle = open( # noqa: SIM115
127
+ self.output_file, "wb"
128
+ )
129
+ sink = pa.PythonFile(self._output_file_handle, mode="w")
130
+ self._writer = ipc.new_stream(sink, batch.schema)
131
+ # Type narrowing for mypy
132
+ assert isinstance(self._writer, ipc.RecordBatchStreamWriter)
133
+ self._writer.write_batch(batch)
134
+
135
+ elif self.format == "csv":
136
+ output = self._get_output_stream()
137
+ write_options = csv.WriteOptions(include_header=self._first_write)
138
+ if self._is_stdout:
139
+ csv.write_csv(pa.Table.from_batches([batch]), sys.stdout.buffer, write_options)
140
+ else:
141
+ if self._first_write:
142
+ csv.write_csv(pa.Table.from_batches([batch]), output, write_options)
143
+ else:
144
+ with open(output, "ab") as f:
145
+ csv.write_csv(
146
+ pa.Table.from_batches([batch]),
147
+ f,
148
+ csv.WriteOptions(include_header=False),
149
+ )
150
+ self._first_write = False
151
+
152
+ elif self.format == "json":
153
+ table = pa.Table.from_batches([batch])
154
+ rows = table.to_pylist()
155
+ if self._is_stdout:
156
+ for row in rows:
157
+ print(json.dumps(row))
158
+ else:
159
+ mode = "w" if self._first_write else "a"
160
+ with open(self.output_file, mode) as f:
161
+ for row in rows:
162
+ f.write(json.dumps(row) + "\n")
163
+ self._first_write = False
164
+
165
+ def close(self) -> None:
166
+ """Close the underlying writer if one exists."""
167
+ if self._writer is not None:
168
+ self._writer.close()
169
+ if self._output_file_handle is not None:
170
+ self._output_file_handle.close()
171
+
172
+
173
+ _CLI_EPILOG = """
174
+ \b
175
+ EXAMPLES:
176
+ # Table function (generates data, no input)
177
+ vgi-client --function sequence --args '[10]'
178
+ vgi-client --function sequence --args '[5]' --named-arg increment=10
179
+ \b
180
+ # Table-in-out function (transforms input)
181
+ vgi-client --input data.parquet --function echo
182
+ \b
183
+ # Scalar function (per-row, single output column)
184
+ vgi-client --input in.parquet --function upper_case -t scalar
185
+ \b
186
+ # Output to file with format
187
+ vgi-client --function sequence --args '[5]' -o out.json
188
+ vgi-client --function sequence --args '[5]' -o out.parquet -f parquet
189
+ vgi-client --function sequence --args '[5]' -o - -f arrow-ipc
190
+ \b
191
+ # Catalog operations
192
+ vgi-client catalog list -w vgi-fixture-worker
193
+ vgi-client catalog attach mydb -w vgi-fixture-worker
194
+
195
+ \b
196
+ FUNCTION TYPES:
197
+ table No input, generates data (sequence, range)
198
+ table-in-out Transforms input (echo, filter) - default with --input
199
+ scalar Per-row transform, single column output (upper_case)
200
+ auto Default: table-in-out if --input, else table
201
+
202
+ \b
203
+ ARGUMENT FORMAT (--args as JSON array):
204
+ '[]' No arguments
205
+ '[10]' Single integer
206
+ '["name"]' Single string (column name)
207
+ '[0, 100, 5]' Multiple integers
208
+ '[true, 3.14]' Mixed types
209
+
210
+ \b
211
+ NAMED ARGUMENTS (--named-arg key=value):
212
+ --named-arg increment=2 Integer value
213
+ --named-arg name="test" String value (use JSON quotes)
214
+ --named-arg flag=true Boolean value
215
+
216
+ \b
217
+ SETTINGS (-s/--setting key=value):
218
+ -s vgi_verbose_mode=true Enable verbose mode
219
+ -s greeting=Hello String setting
220
+ -s multiplier=2 Integer setting (passed as string)
221
+
222
+ \b
223
+ OUTPUT FORMATS (-f/--format):
224
+ json JSON Lines, one object per row (default)
225
+ csv CSV with header row
226
+ parquet Apache Parquet columnar format
227
+ arrow-ipc Arrow IPC stream (for debugging/piping)
228
+
229
+ \b
230
+ ENVIRONMENT VARIABLES:
231
+ VGI_WORKER_DEBUG=1 Enable DEBUG logging on worker and stderr passthrough on client
232
+ VGI_QUIET=1 Suppress worker startup logging
233
+ """
234
+
235
+
236
+ def _create_cli() -> Any:
237
+ """Create the CLI command group. Separated for testability."""
238
+ import click
239
+ import pyarrow.parquet as pq
240
+
241
+ from vgi.client.cli_catalog import catalog
242
+
243
+ @click.group(invoke_without_command=True, epilog=_CLI_EPILOG)
244
+ @click.option(
245
+ "--input",
246
+ "input_file",
247
+ required=False,
248
+ type=click.Path(exists=True),
249
+ help=(
250
+ "Input parquet file path. Required for table-in-out and scalar functions. "
251
+ "Omit for table functions (generators)."
252
+ ),
253
+ )
254
+ @click.option(
255
+ "--output",
256
+ "-o",
257
+ "output_file",
258
+ type=str,
259
+ help="Output file path. Use '-' for stdout. If omitted, outputs to log.",
260
+ )
261
+ @click.option(
262
+ "--format",
263
+ "-f",
264
+ "output_format",
265
+ type=click.Choice(["json", "csv", "parquet", "arrow-ipc"]),
266
+ default="json",
267
+ help="Output format: json (default), csv, parquet, or arrow-ipc.",
268
+ )
269
+ @click.option(
270
+ "--function",
271
+ "function_name",
272
+ required=False,
273
+ type=str,
274
+ help="Function name to invoke (e.g., sequence, echo, upper_case).",
275
+ )
276
+ @click.option(
277
+ "--args",
278
+ "arguments",
279
+ default="[]",
280
+ type=str,
281
+ help="JSON array of positional arguments. Example: '[10]' or '[\"col\"]'.",
282
+ )
283
+ @click.option(
284
+ "--worker",
285
+ "-w",
286
+ "worker_path",
287
+ default="vgi-fixture-worker",
288
+ type=str,
289
+ help="VGI worker command or path. Default: vgi-fixture-worker.",
290
+ )
291
+ @click.option(
292
+ "--type",
293
+ "-t",
294
+ "function_type",
295
+ type=click.Choice(["auto", "table", "table-in-out", "scalar"]),
296
+ default="auto",
297
+ help=(
298
+ "Function type: auto (default), table, table-in-out, or scalar. "
299
+ "'auto' uses table-in-out if --input provided, otherwise table."
300
+ ),
301
+ )
302
+ @click.option(
303
+ "--worker-stderr",
304
+ is_flag=True,
305
+ default=False,
306
+ help="Pass worker stderr through to CLI stderr (for debugging).",
307
+ )
308
+ @click.option(
309
+ "--max-workers",
310
+ "max_workers",
311
+ type=int,
312
+ default=None,
313
+ help="Max worker processes. Clamps function's max_processes setting.",
314
+ )
315
+ @click.option(
316
+ "--projection-id",
317
+ "projection_ids",
318
+ multiple=True,
319
+ type=int,
320
+ help="Column ID for projection pushdown. Can be repeated.",
321
+ )
322
+ @click.option(
323
+ "--pushdown-filters",
324
+ "pushdown_filters",
325
+ type=str,
326
+ default=None,
327
+ help="Filter predicates as hex-encoded bytes for filter pushdown.",
328
+ )
329
+ @click.option(
330
+ "--table-input-position",
331
+ "table_input_position",
332
+ type=int,
333
+ default=None,
334
+ help=(
335
+ "Position (0-indexed) to insert table input in positional args. "
336
+ "Example: --args '[\"prefix\"]' --table-input-position 1"
337
+ ),
338
+ )
339
+ @click.option(
340
+ "--attach-opaque-data",
341
+ "attach_opaque_data",
342
+ type=str,
343
+ default=None,
344
+ help="DuckDB attachment ID (hex string) for catalog context.",
345
+ )
346
+ @click.option(
347
+ "--transaction-opaque-data",
348
+ "transaction_opaque_data",
349
+ type=str,
350
+ default=None,
351
+ help="DuckDB transaction ID (hex string) for transactional operations.",
352
+ )
353
+ @click.option(
354
+ "--named-arg",
355
+ "named_arg_list",
356
+ multiple=True,
357
+ type=str,
358
+ help="Named argument as key=value. Can be repeated. E.g.: --named-arg x=2",
359
+ )
360
+ @click.option(
361
+ "--setting",
362
+ "-s",
363
+ "setting_list",
364
+ multiple=True,
365
+ type=str,
366
+ help="Setting as key=value. Can be repeated. E.g.: -s greeting=Hi",
367
+ )
368
+ @click.pass_context
369
+ def cli(
370
+ ctx: click.Context,
371
+ input_file: str | None,
372
+ output_file: str | None,
373
+ output_format: str,
374
+ function_name: str | None,
375
+ arguments: str,
376
+ worker_path: str,
377
+ worker_stderr: bool,
378
+ projection_ids: tuple[int, ...],
379
+ pushdown_filters: str | None,
380
+ max_workers: int | None,
381
+ table_input_position: int | None,
382
+ attach_opaque_data: str | None,
383
+ function_type: str,
384
+ transaction_opaque_data: str | None,
385
+ named_arg_list: tuple[str, ...],
386
+ setting_list: tuple[str, ...],
387
+ ) -> None:
388
+ """VGI client - invoke functions and manage catalogs.
389
+
390
+ QUICK START: Use --function to invoke a VGI function, or use the
391
+ 'catalog' subcommand for catalog operations. See examples below.
392
+ """
393
+ # If a subcommand is being invoked, skip function invocation
394
+ if ctx.invoked_subcommand is not None:
395
+ return
396
+
397
+ # Legacy function invocation mode - requires --function
398
+ if function_name is None:
399
+ click.echo(ctx.get_help())
400
+ return
401
+
402
+ try:
403
+ args_list = json.loads(arguments)
404
+ if not isinstance(args_list, list):
405
+ raise click.ClickException("--args must be a JSON array")
406
+ except json.JSONDecodeError as e:
407
+ _logger.error("invalid_json_arguments error=%s", e)
408
+ raise click.ClickException(f"Invalid JSON in --args: {e}") from e
409
+
410
+ # Validate table_input_position
411
+ if table_input_position is not None:
412
+ if input_file is None:
413
+ raise click.ClickException("--table-input-position requires --input to be specified")
414
+ if table_input_position < 0:
415
+ raise click.ClickException("--table-input-position must be non-negative")
416
+ if table_input_position > len(args_list):
417
+ raise click.ClickException(
418
+ f"--table-input-position {table_input_position} is out of range "
419
+ f"for {len(args_list)} arguments (max: {len(args_list)})"
420
+ )
421
+
422
+ # Convert args_list to PyArrow scalars
423
+ positional_args = tuple(pa.scalar(arg) for arg in args_list)
424
+
425
+ # Parse named arguments into dict
426
+ named_args: dict[str, pa.Scalar[Any]] = {}
427
+ for named_arg in named_arg_list:
428
+ if "=" not in named_arg:
429
+ raise click.ClickException(f"Invalid --named-arg format: '{named_arg}'. Expected key=value.")
430
+ key, value_str = named_arg.split("=", 1)
431
+ # Try to parse value as JSON, fall back to string
432
+ try:
433
+ value = json.loads(value_str)
434
+ except json.JSONDecodeError:
435
+ # Treat as string if not valid JSON
436
+ value = value_str
437
+ named_args[key] = pa.scalar(value)
438
+
439
+ # Parse settings into dict (settings are always strings in the protocol)
440
+ settings: dict[str, str] | None = None
441
+ if setting_list:
442
+ settings = {}
443
+ for setting in setting_list:
444
+ if "=" not in setting:
445
+ raise click.ClickException(f"Invalid --setting format: '{setting}'. Expected key=value.")
446
+ key, value_str = setting.split("=", 1)
447
+ settings[key] = value_str
448
+
449
+ # Parse attach_opaque_data from hex string if provided
450
+ attach_opaque_data_bytes: bytes | None = None
451
+ if attach_opaque_data is not None:
452
+ try:
453
+ attach_opaque_data_bytes = bytes.fromhex(attach_opaque_data)
454
+ except ValueError as e:
455
+ raise click.ClickException(f"Invalid --attach-opaque-data: must be a valid hex string: {e}") from e
456
+
457
+ # Parse transaction_opaque_data from hex string if provided
458
+ transaction_opaque_data_bytes: bytes | None = None
459
+ if transaction_opaque_data is not None:
460
+ try:
461
+ transaction_opaque_data_bytes = bytes.fromhex(transaction_opaque_data)
462
+ except ValueError as e:
463
+ raise click.ClickException(f"Invalid --transaction-opaque-data: must be a valid hex string: {e}") from e
464
+
465
+ # Parse pushdown_filters from hex string if provided
466
+ pushdown_filters_bytes: bytes | None = None
467
+ if pushdown_filters is not None:
468
+ try:
469
+ pushdown_filters_bytes = bytes.fromhex(pushdown_filters)
470
+ except ValueError as e:
471
+ raise click.ClickException(f"Invalid --pushdown-filters: must be a valid hex string: {e}") from e
472
+
473
+ _logger.info("starting_worker function=%s worker_path=%s", function_name, worker_path)
474
+
475
+ # Validate function_type requirements
476
+ if function_type == "scalar" and input_file is None:
477
+ raise click.ClickException("--type scalar requires --input to be specified")
478
+ if function_type == "table-in-out" and input_file is None:
479
+ raise click.ClickException("--type table-in-out requires --input to be specified")
480
+ if function_type == "table" and input_file is not None:
481
+ raise click.ClickException("--type table does not accept --input (table functions have no input)")
482
+
483
+ output_writer: OutputWriter | None = None
484
+ try:
485
+ with Client(
486
+ worker_path,
487
+ passthrough_stderr=worker_stderr,
488
+ worker_limit=max_workers,
489
+ attach_opaque_data=attach_opaque_data_bytes,
490
+ ) as client:
491
+ # Determine effective function type
492
+ if function_type == "auto":
493
+ effective_type = "table" if input_file is None else "table-in-out"
494
+ else:
495
+ effective_type = function_type
496
+
497
+ # Build arguments object
498
+ func_args = Arguments(positional=positional_args, named=named_args)
499
+
500
+ if effective_type == "table":
501
+ # Table function (no input)
502
+ _logger.info("invoking_table_function function=%s", function_name)
503
+ output_iterator = client.table_function(
504
+ function_name=function_name,
505
+ arguments=func_args,
506
+ projection_ids=list(projection_ids) if projection_ids else None,
507
+ pushdown_filters=pushdown_filters_bytes,
508
+ transaction_opaque_data=transaction_opaque_data_bytes,
509
+ settings=settings,
510
+ )
511
+ elif effective_type == "scalar":
512
+ # Scalar function (with input, single-column output)
513
+ assert input_file is not None # Validated earlier
514
+ _logger.info("invoking_scalar_function function=%s", function_name)
515
+ _logger.info("reading_input file=%s", input_file)
516
+ pf = pq.ParquetFile(input_file)
517
+
518
+ output_iterator = client.scalar_function(
519
+ function_name=function_name,
520
+ arguments=func_args,
521
+ input=pf.iter_batches(),
522
+ transaction_opaque_data=transaction_opaque_data_bytes,
523
+ settings=settings,
524
+ )
525
+ else:
526
+ # Table-in-out function (with input)
527
+ assert input_file is not None # Validated earlier
528
+ _logger.info("invoking_table_in_out_function function=%s", function_name)
529
+ _logger.info("reading_input file=%s", input_file)
530
+ pf = pq.ParquetFile(input_file)
531
+
532
+ # If table_input_position is specified, log it for debugging
533
+ # The table input position tells the user where the table data
534
+ # appears in the function signature (e.g., position 1 means the
535
+ # table is the second argument). This is purely informational
536
+ # for the CLI user - the protocol handles table data separately.
537
+ if table_input_position is not None:
538
+ _logger.debug(
539
+ "table_input_position_specified position=%s num_args=%s",
540
+ table_input_position,
541
+ len(positional_args),
542
+ )
543
+
544
+ output_iterator = client.table_in_out_function(
545
+ function_name=function_name,
546
+ arguments=func_args,
547
+ input=pf.iter_batches(),
548
+ projection_ids=list(projection_ids) if projection_ids else None,
549
+ pushdown_filters=pushdown_filters_bytes,
550
+ transaction_opaque_data=transaction_opaque_data_bytes,
551
+ settings=settings,
552
+ )
553
+
554
+ for output_batch in output_iterator:
555
+ if output_writer is None:
556
+ output_writer = OutputWriter(output_file, output_format, output_batch.schema)
557
+ output_writer.write_batch(output_batch)
558
+
559
+ _logger.info("processing_complete function=%s", function_name)
560
+ except ClientError as e:
561
+ raise click.ClickException(str(e)) from e
562
+ finally:
563
+ if output_writer is not None:
564
+ output_writer.close()
565
+
566
+ # Add catalog subcommand group (schema/table/view/transaction nested under it)
567
+ cli.add_command(catalog)
568
+
569
+ return cli
570
+
571
+
572
+ # Module-level command for testing
573
+ cli = _create_cli()
574
+
575
+
576
+ def main() -> None:
577
+ """CLI entry point for vgi-client."""
578
+ cli()
579
+
580
+
581
+ if __name__ == "__main__":
582
+ main()