vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/schema_utils.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Schema building utilities for VGI functions.
|
|
4
|
+
|
|
5
|
+
This module provides helpers for creating and modifying Arrow schemas with
|
|
6
|
+
minimal boilerplate, making output_schema definitions more concise.
|
|
7
|
+
|
|
8
|
+
FUNCTIONS
|
|
9
|
+
---------
|
|
10
|
+
schema(**fields)
|
|
11
|
+
Build a schema from keyword arguments mapping names to types.
|
|
12
|
+
|
|
13
|
+
schema_like(source, add, remove, rename, replace)
|
|
14
|
+
Derive a new schema from an existing one with modifications.
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from collections.abc import Mapping
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
import pyarrow as pa
|
|
24
|
+
|
|
25
|
+
# A field spec is either a bare DataType or a (DataType, metadata) tuple.
|
|
26
|
+
FieldSpec = pa.DataType | tuple[pa.DataType, dict[bytes | str, bytes | str]]
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"FieldSpec",
|
|
30
|
+
"VGI_PARTITION_COLUMN_KEY",
|
|
31
|
+
"partition_field",
|
|
32
|
+
"schema",
|
|
33
|
+
"schema_like",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
#: KeyValueMetadata key that marks an Arrow field as a partition column
|
|
37
|
+
#: for VGI's Hive-style partitioning (PartitionColumns mode). Workers
|
|
38
|
+
#: opt in by setting ``Meta.partition_kind`` to a non-default
|
|
39
|
+
#: :class:`vgi.metadata.PartitionKind` AND annotating at least one
|
|
40
|
+
#: field of their bind schema with this key.
|
|
41
|
+
VGI_PARTITION_COLUMN_KEY: bytes = b"vgi.partition_column"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def partition_field(
|
|
45
|
+
name: str,
|
|
46
|
+
type: pa.DataType,
|
|
47
|
+
*,
|
|
48
|
+
nullable: bool = True,
|
|
49
|
+
metadata: dict[bytes | str, bytes | str] | None = None,
|
|
50
|
+
) -> pa.Field[Any]:
|
|
51
|
+
"""Build a ``pa.Field`` marked as a VGI partition column.
|
|
52
|
+
|
|
53
|
+
Equivalent to::
|
|
54
|
+
|
|
55
|
+
pa.field(name, type, nullable=nullable,
|
|
56
|
+
metadata={VGI_PARTITION_COLUMN_KEY: b"true",
|
|
57
|
+
**(metadata or {})})
|
|
58
|
+
|
|
59
|
+
Use in a bind schema when the function opts into PartitionColumns
|
|
60
|
+
mode by setting ``Meta.partition_kind`` to a non-default
|
|
61
|
+
:class:`vgi.metadata.PartitionKind`. Per-field metadata round-trips
|
|
62
|
+
through Arrow IPC, so the C++ extension can identify partition
|
|
63
|
+
columns from ``bind_result.output_schema`` without a parallel
|
|
64
|
+
list-of-names.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
name: Column name.
|
|
68
|
+
type: Arrow data type.
|
|
69
|
+
nullable: Whether the column can contain nulls.
|
|
70
|
+
metadata: Extra field-level metadata to merge with the
|
|
71
|
+
partition-column marker. Useful for extension types
|
|
72
|
+
(e.g. geoarrow.wkb's ``ARROW:extension:name`` key).
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A ``pa.Field`` carrying ``{VGI_PARTITION_COLUMN_KEY: b"true"}``
|
|
76
|
+
in its metadata.
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
merged: dict[bytes, bytes] = {VGI_PARTITION_COLUMN_KEY: b"true"}
|
|
80
|
+
if metadata:
|
|
81
|
+
for k, v in metadata.items():
|
|
82
|
+
key = k if isinstance(k, bytes) else k.encode()
|
|
83
|
+
val = v if isinstance(v, bytes) else v.encode()
|
|
84
|
+
merged[key] = val
|
|
85
|
+
return pa.field(name, type, nullable=nullable, metadata=merged)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def schema(
|
|
89
|
+
__fields: Mapping[str, FieldSpec] | None = None,
|
|
90
|
+
/,
|
|
91
|
+
**kwargs: FieldSpec,
|
|
92
|
+
) -> pa.Schema:
|
|
93
|
+
"""Build an Arrow schema from field definitions.
|
|
94
|
+
|
|
95
|
+
Creates a schema with fields in the order specified. Field names are
|
|
96
|
+
the keys and values are either Arrow data types or ``(type, metadata)``
|
|
97
|
+
tuples for attaching field-level metadata.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
__fields: Optional mapping of field names to specs (for programmatic use).
|
|
101
|
+
**kwargs: Field names mapped to Arrow data types or ``(type, metadata)`` tuples.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Arrow schema with the specified fields.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
TypeError: If a value is not a valid Arrow data type or field spec.
|
|
108
|
+
|
|
109
|
+
Examples::
|
|
110
|
+
|
|
111
|
+
schema(id=pa.int64(), name=pa.string())
|
|
112
|
+
schema(row_id=(pa.int64(), {b"is_row_id": b""}), id=pa.int64())
|
|
113
|
+
|
|
114
|
+
"""
|
|
115
|
+
# Combine __fields dict with kwargs
|
|
116
|
+
all_fields: dict[str, FieldSpec] = {}
|
|
117
|
+
if __fields is not None:
|
|
118
|
+
all_fields.update(__fields)
|
|
119
|
+
all_fields.update(kwargs)
|
|
120
|
+
|
|
121
|
+
# Validate and build schema
|
|
122
|
+
pa_fields: list[pa.Field[Any]] = []
|
|
123
|
+
for name, spec in all_fields.items():
|
|
124
|
+
if isinstance(spec, tuple):
|
|
125
|
+
dtype, metadata = spec
|
|
126
|
+
if not isinstance(dtype, pa.DataType):
|
|
127
|
+
raise TypeError(
|
|
128
|
+
f"Field '{name}': expected pa.DataType as first tuple element, "
|
|
129
|
+
f"got {type(dtype).__name__}. Use pa.int64(), pa.string(), etc."
|
|
130
|
+
)
|
|
131
|
+
pa_fields.append(pa.field(name, dtype, metadata=metadata))
|
|
132
|
+
elif isinstance(spec, pa.DataType):
|
|
133
|
+
pa_fields.append(pa.field(name, spec))
|
|
134
|
+
else:
|
|
135
|
+
raise TypeError(
|
|
136
|
+
f"Field '{name}': expected pa.DataType or (pa.DataType, metadata) tuple, "
|
|
137
|
+
f"got {type(spec).__name__}. Use pa.int64(), pa.string(), etc."
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return pa.schema(pa_fields)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def schema_like(
|
|
144
|
+
source: pa.Schema,
|
|
145
|
+
*,
|
|
146
|
+
add: Mapping[str, pa.DataType] | None = None,
|
|
147
|
+
remove: list[str] | None = None,
|
|
148
|
+
rename: Mapping[str, str] | None = None,
|
|
149
|
+
replace: Mapping[str, pa.DataType] | None = None,
|
|
150
|
+
) -> pa.Schema:
|
|
151
|
+
"""Derive a new schema from an existing one with modifications.
|
|
152
|
+
|
|
153
|
+
Creates a modified copy of the source schema. Operations are applied
|
|
154
|
+
in this order: remove -> rename -> replace -> add.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
source: The source schema to derive from.
|
|
158
|
+
add: Fields to add at the end. Dict mapping names to types.
|
|
159
|
+
remove: Field names to remove from the schema.
|
|
160
|
+
rename: Field name mappings (old_name -> new_name).
|
|
161
|
+
replace: Fields to replace with new types (keeps position).
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
New schema with the specified modifications.
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
KeyError: If a field to remove, rename, or replace doesn't exist.
|
|
168
|
+
ValueError: If trying to add a field that already exists.
|
|
169
|
+
|
|
170
|
+
"""
|
|
171
|
+
# Start with source field names for tracking
|
|
172
|
+
field_names = set(source.names)
|
|
173
|
+
|
|
174
|
+
# Validate remove fields exist
|
|
175
|
+
if remove:
|
|
176
|
+
for name in remove:
|
|
177
|
+
if name not in field_names:
|
|
178
|
+
raise KeyError(f"Cannot remove field '{name}': not found in schema. Available fields: {source.names}")
|
|
179
|
+
|
|
180
|
+
# Validate rename fields exist
|
|
181
|
+
if rename:
|
|
182
|
+
for old_name in rename:
|
|
183
|
+
if old_name not in field_names:
|
|
184
|
+
raise KeyError(
|
|
185
|
+
f"Cannot rename field '{old_name}': not found in schema. Available fields: {source.names}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Validate replace fields exist
|
|
189
|
+
if replace:
|
|
190
|
+
for name in replace:
|
|
191
|
+
if name not in field_names:
|
|
192
|
+
raise KeyError(f"Cannot replace field '{name}': not found in schema. Available fields: {source.names}")
|
|
193
|
+
|
|
194
|
+
# Build the new schema
|
|
195
|
+
# Step 1: Remove fields
|
|
196
|
+
remove_set = set(remove) if remove else set()
|
|
197
|
+
|
|
198
|
+
# Step 2 & 3: Process remaining fields (rename and replace)
|
|
199
|
+
rename_map = rename or {}
|
|
200
|
+
replace_map = replace or {}
|
|
201
|
+
|
|
202
|
+
new_fields: list[pa.Field[Any]] = []
|
|
203
|
+
final_names: set[str] = set()
|
|
204
|
+
|
|
205
|
+
for field in source:
|
|
206
|
+
# Skip removed fields
|
|
207
|
+
if field.name in remove_set:
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
# Get the (possibly renamed) name
|
|
211
|
+
new_name = rename_map.get(field.name, field.name)
|
|
212
|
+
|
|
213
|
+
# Get the (possibly replaced) type
|
|
214
|
+
new_type = replace_map.get(field.name, field.type)
|
|
215
|
+
|
|
216
|
+
new_fields.append(pa.field(new_name, new_type, metadata=field.metadata))
|
|
217
|
+
final_names.add(new_name)
|
|
218
|
+
|
|
219
|
+
# Step 4: Add new fields
|
|
220
|
+
if add:
|
|
221
|
+
for name, dtype in add.items():
|
|
222
|
+
if name in final_names:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Cannot add field '{name}': already exists in schema. "
|
|
225
|
+
f"Use 'replace' to change an existing field's type."
|
|
226
|
+
)
|
|
227
|
+
if not isinstance(dtype, pa.DataType):
|
|
228
|
+
raise TypeError(
|
|
229
|
+
f"Field '{name}': expected pa.DataType, "
|
|
230
|
+
f"got {type(dtype).__name__}. Use pa.int64(), pa.string(), etc."
|
|
231
|
+
)
|
|
232
|
+
new_fields.append(pa.field(name, dtype))
|
|
233
|
+
|
|
234
|
+
return pa.schema(new_fields)
|
vgi/secret_protocol.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""VGI secret protocol — the wire contract for Orchard's standalone secret service.
|
|
4
|
+
|
|
5
|
+
Orchard is an independently-deployed microservice that brokers downstream
|
|
6
|
+
credentials (S3/HTTP/GCS/…) for a single authenticated account. The DuckDB
|
|
7
|
+
extension's ``VgiRemoteSecretStorage`` calls :meth:`VgiSecretProtocol.secret_lookup`
|
|
8
|
+
lazily whenever a secret consumer (e.g. httpfs resolving an ``s3://`` path) asks
|
|
9
|
+
the secret manager for a credential.
|
|
10
|
+
|
|
11
|
+
This protocol is **versioned independently** of :class:`vgi.protocol.VgiProtocol`
|
|
12
|
+
(the worker/catalog protocol). It has exactly one method and a tiny surface so it
|
|
13
|
+
can evolve on its own cadence — see ``protocol_version`` below.
|
|
14
|
+
|
|
15
|
+
Wire shape
|
|
16
|
+
----------
|
|
17
|
+
``secret_lookup`` takes the requested ``path`` and ``type`` as direct scalar
|
|
18
|
+
parameters (not a wrapped ``request`` dataclass), so the generated C++ builder
|
|
19
|
+
``BuildSecretLookupParams(path, type)`` is directly callable without a hand-coded
|
|
20
|
+
inner serializer. The response is :class:`SecretLookupResponse`, IPC-serialized
|
|
21
|
+
into the unary ``result`` envelope and validated C++-side against
|
|
22
|
+
``SecretLookupResultSchema()``.
|
|
23
|
+
|
|
24
|
+
Identity is carried entirely by the OAuth bearer token on the HTTP request (the
|
|
25
|
+
same ``CatalogAuth`` the catalog established at ATTACH) — there is no
|
|
26
|
+
account/storage identifier in the request body.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from typing import Annotated, Any, ClassVar, Protocol
|
|
33
|
+
|
|
34
|
+
import pyarrow as pa
|
|
35
|
+
from vgi_rpc import ArrowSerializableDataclass, ArrowType
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def encode_secret_values(mapping: dict[str, Any]) -> pa.RecordBatch | None:
|
|
39
|
+
"""Build the one-row ``values`` RecordBatch from a Python mapping.
|
|
40
|
+
|
|
41
|
+
Each key becomes a column; the cell at row 0 is the secret value. Types are
|
|
42
|
+
inferred by pyarrow (str→utf8, int→int64, bool→bool, dict→struct, list→list,
|
|
43
|
+
…). Pass a ``pa.array([...])`` as a value for explicit control over the type.
|
|
44
|
+
Returns ``None`` for an empty mapping (no values to ship).
|
|
45
|
+
"""
|
|
46
|
+
if not mapping:
|
|
47
|
+
return None
|
|
48
|
+
columns: dict[str, pa.Array[Any]] = {}
|
|
49
|
+
for key, value in mapping.items():
|
|
50
|
+
columns[key] = value if isinstance(value, pa.Array) else pa.array([value])
|
|
51
|
+
return pa.RecordBatch.from_pydict(columns)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True, slots=True, kw_only=True)
|
|
55
|
+
class SecretLookupResponse(ArrowSerializableDataclass):
|
|
56
|
+
"""Response for :meth:`VgiSecretProtocol.secret_lookup`.
|
|
57
|
+
|
|
58
|
+
``values`` is the secret's key→value map carried as a **one-row RecordBatch**
|
|
59
|
+
(serialized to binary on the wire): each column is a secret key and its row-0
|
|
60
|
+
cell is the value. This lets values be any Arrow/DuckDB type — string, int64,
|
|
61
|
+
bool, struct, list, nested — not just strings. Build it with
|
|
62
|
+
:func:`encode_secret_values`. The C++ side converts each cell to a typed
|
|
63
|
+
DuckDB ``Value`` via the Arrow→DuckDB bridge. ``redact_keys`` lists the subset
|
|
64
|
+
of keys whose values must be redacted by ``duckdb_secrets()`` — honor it or
|
|
65
|
+
values leak.
|
|
66
|
+
|
|
67
|
+
``ttl_seconds`` is the server's suggested cache lifetime. ``expires_at_unix``
|
|
68
|
+
is the *credential's own* hard expiry as a Unix timestamp (0 = no intrinsic
|
|
69
|
+
expiry); the client caches for ``min(ttl_seconds, expires_at_unix - now)`` so
|
|
70
|
+
a short-lived STS token is never served past its own expiry.
|
|
71
|
+
|
|
72
|
+
When ``found`` is False every other field is empty/zero and the client caches
|
|
73
|
+
a short-TTL negative entry.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
found: bool
|
|
77
|
+
secret_type: str = ""
|
|
78
|
+
provider: str = ""
|
|
79
|
+
name: str = ""
|
|
80
|
+
scope: list[str] = field(default_factory=list)
|
|
81
|
+
values: Annotated[pa.RecordBatch | None, ArrowType(pa.binary())] = None
|
|
82
|
+
redact_keys: list[str] = field(default_factory=list)
|
|
83
|
+
ttl_seconds: int = 0
|
|
84
|
+
expires_at_unix: int = 0
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# VGI Secret Protocol
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class VgiSecretProtocol(Protocol):
|
|
93
|
+
"""Wire protocol for Orchard's standalone secret service.
|
|
94
|
+
|
|
95
|
+
A single unary method, ``secret_lookup``. ``vgi_rpc.RpcServer(VgiSecretProtocol,
|
|
96
|
+
impl)`` handles serialization, dispatching, and version enforcement exactly as
|
|
97
|
+
it does for :class:`vgi.protocol.VgiProtocol`.
|
|
98
|
+
|
|
99
|
+
Application protocol surface version
|
|
100
|
+
------------------------------------
|
|
101
|
+
``protocol_version`` is the canonical semver (MAJOR.MINOR.PATCH) of this
|
|
102
|
+
contract, **independent** of ``VgiProtocol.protocol_version``. The framework
|
|
103
|
+
enforces an exact major+minor match (patch ignored) at the dispatch boundary.
|
|
104
|
+
The C++ extension reads ``VGI_SECRET_PROTOCOL_VERSION`` from
|
|
105
|
+
``vgi/src/generated/vgi_secret_protocol_version.hpp`` (generated; sibling of
|
|
106
|
+
``vgi_protocol_version.hpp``) and passes it as a per-call
|
|
107
|
+
``protocol_version_override`` so it never collides with the worker protocol's
|
|
108
|
+
global version constant.
|
|
109
|
+
|
|
110
|
+
Bump rules mirror :class:`vgi.protocol.VgiProtocol`: major for any
|
|
111
|
+
backwards-incompatible change, minor for additive, patch for worker-side fixes.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
protocol_version: ClassVar[str] = "1.0.0"
|
|
115
|
+
|
|
116
|
+
def secret_lookup(self, path: str, type: str) -> SecretLookupResponse: # noqa: A002
|
|
117
|
+
"""Resolve the credential for ``path`` of secret ``type``.
|
|
118
|
+
|
|
119
|
+
``type`` is the lowercased DuckDB secret type the consumer probed for
|
|
120
|
+
(``s3`` / ``r2`` / ``gcs`` / ``aws`` / ``http`` / …). Identity comes from
|
|
121
|
+
the OAuth bearer on the transport. Return ``SecretLookupResponse(found=False)``
|
|
122
|
+
when the account has no matching credential.
|
|
123
|
+
"""
|
|
124
|
+
...
|
vgi/secret_service.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Standalone serving harness for the VGI secret protocol (Orchard).
|
|
4
|
+
|
|
5
|
+
Orchard's secret service is an *independently-deployed microservice* — separate
|
|
6
|
+
from the worker/catalog ``vgi-serve`` deployable and speaking
|
|
7
|
+
:class:`vgi.secret_protocol.VgiSecretProtocol`. This module provides:
|
|
8
|
+
|
|
9
|
+
- :func:`create_secret_app` — build a WSGI app for any ``VgiSecretProtocol``
|
|
10
|
+
implementation (usable with gunicorn/waitress/uwsgi).
|
|
11
|
+
- :func:`serve_secret_http` — run it under waitress (prints ``PORT:<n>`` for test
|
|
12
|
+
harnesses, mirroring :mod:`vgi.serve`).
|
|
13
|
+
- :class:`ExampleOrchardSecretService` — a reference implementation that returns a
|
|
14
|
+
canned ``s3`` credential for ``s3://test-bucket*``; the C++ integration tests
|
|
15
|
+
point ``vgi-secret-serve`` at this class.
|
|
16
|
+
- :func:`main` — the ``vgi-secret-serve`` CLI entry point.
|
|
17
|
+
|
|
18
|
+
Auth: identity is carried by the HTTP bearer. Production deployments wire an
|
|
19
|
+
``authenticate`` callback (reuse the ``VGI_BEARER_TOKENS`` / ``VGI_JWT_*`` env
|
|
20
|
+
vars supported by :mod:`vgi.serve`). The example service ignores identity.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import importlib
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
import time
|
|
30
|
+
from typing import TYPE_CHECKING, Any
|
|
31
|
+
|
|
32
|
+
import pyarrow as pa
|
|
33
|
+
|
|
34
|
+
from vgi.secret_protocol import SecretLookupResponse, VgiSecretProtocol, encode_secret_values
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from collections.abc import Callable
|
|
38
|
+
|
|
39
|
+
import falcon
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# --------------------------------------------------------------------------- #
|
|
43
|
+
# Reference implementation (also the integration-test fixture)
|
|
44
|
+
# --------------------------------------------------------------------------- #
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ExampleOrchardSecretService:
|
|
48
|
+
"""Reference :class:`VgiSecretProtocol` implementation for tests/demos.
|
|
49
|
+
|
|
50
|
+
Returns a canned ``s3`` credential for any path under ``s3://test-bucket``
|
|
51
|
+
with a short ``expires_at_unix`` (so the ``min(ttl, expiry)`` cache path is
|
|
52
|
+
exercised) and ``secret`` marked for redaction. Everything else is a miss.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
#: Seconds until the canned credential's intrinsic expiry.
|
|
56
|
+
credential_lifetime_seconds: int = 30
|
|
57
|
+
|
|
58
|
+
def secret_lookup(self, path: str, type: str) -> SecretLookupResponse: # noqa: A002
|
|
59
|
+
"""Return the canned credential for known test paths; empty otherwise."""
|
|
60
|
+
if type == "s3" and path.startswith("s3://test-bucket"):
|
|
61
|
+
# Heterogeneous typed values: string, int64, bool, and a nested struct
|
|
62
|
+
# — exercising the full Arrow→DuckDB Value bridge, not just string→string.
|
|
63
|
+
values: dict[str, object] = {
|
|
64
|
+
"key_id": "AKIAEXAMPLEORCHARD",
|
|
65
|
+
"secret": "examplesecretvalue",
|
|
66
|
+
"region": "us-east-1",
|
|
67
|
+
"port": pa.array([9000], pa.int64()),
|
|
68
|
+
"use_ssl": True,
|
|
69
|
+
"endpoint_config": {"connect_timeout_ms": 5000, "max_retries": 3},
|
|
70
|
+
}
|
|
71
|
+
# When VGI_MOCK_S3_ENDPOINT is set, point httpfs at a local mock S3 so
|
|
72
|
+
# a real `SELECT … FROM 's3://…'` read exercises the null-ClientContext
|
|
73
|
+
# system-transaction lookup path end to end.
|
|
74
|
+
mock_endpoint = os.environ.get("VGI_MOCK_S3_ENDPOINT")
|
|
75
|
+
if mock_endpoint:
|
|
76
|
+
values["endpoint"] = mock_endpoint # host:port, no scheme
|
|
77
|
+
values["use_ssl"] = False
|
|
78
|
+
values["url_style"] = "path"
|
|
79
|
+
return SecretLookupResponse(
|
|
80
|
+
found=True,
|
|
81
|
+
secret_type="s3",
|
|
82
|
+
provider="orchard",
|
|
83
|
+
name="orchard_test_bucket",
|
|
84
|
+
scope=["s3://test-bucket"],
|
|
85
|
+
values=encode_secret_values(values),
|
|
86
|
+
redact_keys=["secret"],
|
|
87
|
+
ttl_seconds=60,
|
|
88
|
+
expires_at_unix=int(time.time()) + self.credential_lifetime_seconds,
|
|
89
|
+
)
|
|
90
|
+
return SecretLookupResponse(found=False)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# --------------------------------------------------------------------------- #
|
|
94
|
+
# WSGI app + HTTP server
|
|
95
|
+
# --------------------------------------------------------------------------- #
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def create_secret_app(
|
|
99
|
+
impl: object,
|
|
100
|
+
*,
|
|
101
|
+
prefix: str = "",
|
|
102
|
+
cors_origins: str = "*",
|
|
103
|
+
signing_key: bytes | None = None,
|
|
104
|
+
authenticate: Callable[[falcon.Request], Any] | None = None,
|
|
105
|
+
oauth_resource_metadata: Any = None,
|
|
106
|
+
) -> falcon.App[Any, Any]:
|
|
107
|
+
"""Build a WSGI app serving *impl* over :class:`VgiSecretProtocol`.
|
|
108
|
+
|
|
109
|
+
*impl* is any object implementing ``secret_lookup(path, type)``. The default
|
|
110
|
+
landing/describe pages are disabled — this is a credential endpoint, not a
|
|
111
|
+
browsable worker.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
from vgi_rpc.http import make_wsgi_app
|
|
115
|
+
except ImportError:
|
|
116
|
+
sys.stderr.write(
|
|
117
|
+
"Error: HTTP dependencies not installed.\nInstall with: pip install vgi[http] (or: uv sync --extra http)\n"
|
|
118
|
+
)
|
|
119
|
+
sys.exit(1)
|
|
120
|
+
|
|
121
|
+
from vgi_rpc.rpc import RpcServer
|
|
122
|
+
|
|
123
|
+
if signing_key is None:
|
|
124
|
+
signing_key = os.urandom(32)
|
|
125
|
+
|
|
126
|
+
server = RpcServer(VgiSecretProtocol, impl, enable_describe=False)
|
|
127
|
+
return make_wsgi_app(
|
|
128
|
+
server,
|
|
129
|
+
prefix=prefix,
|
|
130
|
+
cors_origins=cors_origins,
|
|
131
|
+
token_key=signing_key,
|
|
132
|
+
authenticate=authenticate,
|
|
133
|
+
oauth_resource_metadata=oauth_resource_metadata,
|
|
134
|
+
enable_landing_page=False,
|
|
135
|
+
enable_describe_page=False,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def serve_secret_http(
|
|
140
|
+
impl: object,
|
|
141
|
+
*,
|
|
142
|
+
host: str = "0.0.0.0",
|
|
143
|
+
port: int | None = None,
|
|
144
|
+
prefix: str = "",
|
|
145
|
+
cors_origins: str = "*",
|
|
146
|
+
signing_key: bytes | None = None,
|
|
147
|
+
authenticate: Callable[..., Any] | None = None,
|
|
148
|
+
oauth_resource_metadata: Any = None,
|
|
149
|
+
) -> None:
|
|
150
|
+
"""Serve *impl* over HTTP under waitress. Prints ``PORT:<n>`` once bound."""
|
|
151
|
+
import socket
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
import waitress # type: ignore[import-untyped]
|
|
155
|
+
except ImportError:
|
|
156
|
+
sys.stderr.write(
|
|
157
|
+
"Error: waitress not installed.\nInstall with: pip install vgi[http] (or: uv sync --extra http)\n"
|
|
158
|
+
)
|
|
159
|
+
sys.exit(1)
|
|
160
|
+
|
|
161
|
+
if port is None:
|
|
162
|
+
env_port = os.environ.get("PORT")
|
|
163
|
+
port = int(env_port) if env_port else 8080
|
|
164
|
+
if port == 0:
|
|
165
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
166
|
+
s.bind((host, 0))
|
|
167
|
+
port = int(s.getsockname()[1])
|
|
168
|
+
|
|
169
|
+
wsgi_app = create_secret_app(
|
|
170
|
+
impl,
|
|
171
|
+
prefix=prefix,
|
|
172
|
+
cors_origins=cors_origins,
|
|
173
|
+
signing_key=signing_key,
|
|
174
|
+
authenticate=authenticate,
|
|
175
|
+
oauth_resource_metadata=oauth_resource_metadata,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
print(f"PORT:{port}", flush=True)
|
|
179
|
+
sys.stderr.write(f"Serving {type(impl).__name__} (VgiSecretProtocol) on http://{host}:{port}{prefix}\n")
|
|
180
|
+
sys.stderr.flush()
|
|
181
|
+
waitress.serve(wsgi_app, host=host, port=port, _quiet=True)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _load_impl(reference: str) -> object:
|
|
185
|
+
"""Instantiate a ``VgiSecretProtocol`` implementation from ``module:Class``."""
|
|
186
|
+
if ":" not in reference:
|
|
187
|
+
sys.stderr.write(f"Error: expected 'module:ClassName', got {reference!r}\n")
|
|
188
|
+
sys.exit(1)
|
|
189
|
+
module_ref, class_name = reference.rsplit(":", 1)
|
|
190
|
+
try:
|
|
191
|
+
module = importlib.import_module(module_ref)
|
|
192
|
+
except ImportError as exc:
|
|
193
|
+
sys.stderr.write(f"Error: could not import {module_ref!r}: {exc}\n")
|
|
194
|
+
sys.exit(1)
|
|
195
|
+
cls = getattr(module, class_name, None)
|
|
196
|
+
if cls is None or not isinstance(cls, type):
|
|
197
|
+
sys.stderr.write(f"Error: {class_name!r} not found in {module_ref!r}\n")
|
|
198
|
+
sys.exit(1)
|
|
199
|
+
return cls()
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def main() -> None:
|
|
203
|
+
"""CLI entry point for ``vgi-secret-serve``."""
|
|
204
|
+
from vgi.serve import _resolve_authenticate, _resolve_oauth_resource_metadata, _resolve_signing_key
|
|
205
|
+
|
|
206
|
+
parser = argparse.ArgumentParser(
|
|
207
|
+
prog="vgi-secret-serve",
|
|
208
|
+
description="Serve a VgiSecretProtocol implementation over HTTP (Orchard secret service).",
|
|
209
|
+
)
|
|
210
|
+
parser.add_argument(
|
|
211
|
+
"impl",
|
|
212
|
+
nargs="?",
|
|
213
|
+
default="vgi.secret_service:ExampleOrchardSecretService",
|
|
214
|
+
help="Implementation reference: module:ClassName (default: the built-in ExampleOrchardSecretService fixture).",
|
|
215
|
+
)
|
|
216
|
+
parser.add_argument("--host", default="0.0.0.0", help="HTTP bind address")
|
|
217
|
+
parser.add_argument(
|
|
218
|
+
"--port", "-p", type=int, default=None, help="HTTP port (default: $PORT or 8080; 0 = ephemeral)"
|
|
219
|
+
)
|
|
220
|
+
parser.add_argument("--prefix", default="", help="URL prefix for RPC endpoints")
|
|
221
|
+
parser.add_argument("--cors-origins", default="*", help="Allowed CORS origins")
|
|
222
|
+
args = parser.parse_args()
|
|
223
|
+
|
|
224
|
+
impl = _load_impl(args.impl)
|
|
225
|
+
serve_secret_http(
|
|
226
|
+
impl,
|
|
227
|
+
host=args.host,
|
|
228
|
+
port=args.port,
|
|
229
|
+
prefix=args.prefix,
|
|
230
|
+
cors_origins=args.cors_origins,
|
|
231
|
+
signing_key=_resolve_signing_key(),
|
|
232
|
+
authenticate=_resolve_authenticate(),
|
|
233
|
+
oauth_resource_metadata=_resolve_oauth_resource_metadata(),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
if __name__ == "__main__":
|
|
238
|
+
main()
|