vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/schema_utils.py ADDED
@@ -0,0 +1,234 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Schema building utilities for VGI functions.
4
+
5
+ This module provides helpers for creating and modifying Arrow schemas with
6
+ minimal boilerplate, making output_schema definitions more concise.
7
+
8
+ FUNCTIONS
9
+ ---------
10
+ schema(**fields)
11
+ Build a schema from keyword arguments mapping names to types.
12
+
13
+ schema_like(source, add, remove, rename, replace)
14
+ Derive a new schema from an existing one with modifications.
15
+
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from collections.abc import Mapping
21
+ from typing import Any
22
+
23
+ import pyarrow as pa
24
+
25
+ # A field spec is either a bare DataType or a (DataType, metadata) tuple.
26
+ FieldSpec = pa.DataType | tuple[pa.DataType, dict[bytes | str, bytes | str]]
27
+
28
+ __all__ = [
29
+ "FieldSpec",
30
+ "VGI_PARTITION_COLUMN_KEY",
31
+ "partition_field",
32
+ "schema",
33
+ "schema_like",
34
+ ]
35
+
36
+ #: KeyValueMetadata key that marks an Arrow field as a partition column
37
+ #: for VGI's Hive-style partitioning (PartitionColumns mode). Workers
38
+ #: opt in by setting ``Meta.partition_kind`` to a non-default
39
+ #: :class:`vgi.metadata.PartitionKind` AND annotating at least one
40
+ #: field of their bind schema with this key.
41
+ VGI_PARTITION_COLUMN_KEY: bytes = b"vgi.partition_column"
42
+
43
+
44
+ def partition_field(
45
+ name: str,
46
+ type: pa.DataType,
47
+ *,
48
+ nullable: bool = True,
49
+ metadata: dict[bytes | str, bytes | str] | None = None,
50
+ ) -> pa.Field[Any]:
51
+ """Build a ``pa.Field`` marked as a VGI partition column.
52
+
53
+ Equivalent to::
54
+
55
+ pa.field(name, type, nullable=nullable,
56
+ metadata={VGI_PARTITION_COLUMN_KEY: b"true",
57
+ **(metadata or {})})
58
+
59
+ Use in a bind schema when the function opts into PartitionColumns
60
+ mode by setting ``Meta.partition_kind`` to a non-default
61
+ :class:`vgi.metadata.PartitionKind`. Per-field metadata round-trips
62
+ through Arrow IPC, so the C++ extension can identify partition
63
+ columns from ``bind_result.output_schema`` without a parallel
64
+ list-of-names.
65
+
66
+ Args:
67
+ name: Column name.
68
+ type: Arrow data type.
69
+ nullable: Whether the column can contain nulls.
70
+ metadata: Extra field-level metadata to merge with the
71
+ partition-column marker. Useful for extension types
72
+ (e.g. geoarrow.wkb's ``ARROW:extension:name`` key).
73
+
74
+ Returns:
75
+ A ``pa.Field`` carrying ``{VGI_PARTITION_COLUMN_KEY: b"true"}``
76
+ in its metadata.
77
+
78
+ """
79
+ merged: dict[bytes, bytes] = {VGI_PARTITION_COLUMN_KEY: b"true"}
80
+ if metadata:
81
+ for k, v in metadata.items():
82
+ key = k if isinstance(k, bytes) else k.encode()
83
+ val = v if isinstance(v, bytes) else v.encode()
84
+ merged[key] = val
85
+ return pa.field(name, type, nullable=nullable, metadata=merged)
86
+
87
+
88
+ def schema(
89
+ __fields: Mapping[str, FieldSpec] | None = None,
90
+ /,
91
+ **kwargs: FieldSpec,
92
+ ) -> pa.Schema:
93
+ """Build an Arrow schema from field definitions.
94
+
95
+ Creates a schema with fields in the order specified. Field names are
96
+ the keys and values are either Arrow data types or ``(type, metadata)``
97
+ tuples for attaching field-level metadata.
98
+
99
+ Args:
100
+ __fields: Optional mapping of field names to specs (for programmatic use).
101
+ **kwargs: Field names mapped to Arrow data types or ``(type, metadata)`` tuples.
102
+
103
+ Returns:
104
+ Arrow schema with the specified fields.
105
+
106
+ Raises:
107
+ TypeError: If a value is not a valid Arrow data type or field spec.
108
+
109
+ Examples::
110
+
111
+ schema(id=pa.int64(), name=pa.string())
112
+ schema(row_id=(pa.int64(), {b"is_row_id": b""}), id=pa.int64())
113
+
114
+ """
115
+ # Combine __fields dict with kwargs
116
+ all_fields: dict[str, FieldSpec] = {}
117
+ if __fields is not None:
118
+ all_fields.update(__fields)
119
+ all_fields.update(kwargs)
120
+
121
+ # Validate and build schema
122
+ pa_fields: list[pa.Field[Any]] = []
123
+ for name, spec in all_fields.items():
124
+ if isinstance(spec, tuple):
125
+ dtype, metadata = spec
126
+ if not isinstance(dtype, pa.DataType):
127
+ raise TypeError(
128
+ f"Field '{name}': expected pa.DataType as first tuple element, "
129
+ f"got {type(dtype).__name__}. Use pa.int64(), pa.string(), etc."
130
+ )
131
+ pa_fields.append(pa.field(name, dtype, metadata=metadata))
132
+ elif isinstance(spec, pa.DataType):
133
+ pa_fields.append(pa.field(name, spec))
134
+ else:
135
+ raise TypeError(
136
+ f"Field '{name}': expected pa.DataType or (pa.DataType, metadata) tuple, "
137
+ f"got {type(spec).__name__}. Use pa.int64(), pa.string(), etc."
138
+ )
139
+
140
+ return pa.schema(pa_fields)
141
+
142
+
143
+ def schema_like(
144
+ source: pa.Schema,
145
+ *,
146
+ add: Mapping[str, pa.DataType] | None = None,
147
+ remove: list[str] | None = None,
148
+ rename: Mapping[str, str] | None = None,
149
+ replace: Mapping[str, pa.DataType] | None = None,
150
+ ) -> pa.Schema:
151
+ """Derive a new schema from an existing one with modifications.
152
+
153
+ Creates a modified copy of the source schema. Operations are applied
154
+ in this order: remove -> rename -> replace -> add.
155
+
156
+ Args:
157
+ source: The source schema to derive from.
158
+ add: Fields to add at the end. Dict mapping names to types.
159
+ remove: Field names to remove from the schema.
160
+ rename: Field name mappings (old_name -> new_name).
161
+ replace: Fields to replace with new types (keeps position).
162
+
163
+ Returns:
164
+ New schema with the specified modifications.
165
+
166
+ Raises:
167
+ KeyError: If a field to remove, rename, or replace doesn't exist.
168
+ ValueError: If trying to add a field that already exists.
169
+
170
+ """
171
+ # Start with source field names for tracking
172
+ field_names = set(source.names)
173
+
174
+ # Validate remove fields exist
175
+ if remove:
176
+ for name in remove:
177
+ if name not in field_names:
178
+ raise KeyError(f"Cannot remove field '{name}': not found in schema. Available fields: {source.names}")
179
+
180
+ # Validate rename fields exist
181
+ if rename:
182
+ for old_name in rename:
183
+ if old_name not in field_names:
184
+ raise KeyError(
185
+ f"Cannot rename field '{old_name}': not found in schema. Available fields: {source.names}"
186
+ )
187
+
188
+ # Validate replace fields exist
189
+ if replace:
190
+ for name in replace:
191
+ if name not in field_names:
192
+ raise KeyError(f"Cannot replace field '{name}': not found in schema. Available fields: {source.names}")
193
+
194
+ # Build the new schema
195
+ # Step 1: Remove fields
196
+ remove_set = set(remove) if remove else set()
197
+
198
+ # Step 2 & 3: Process remaining fields (rename and replace)
199
+ rename_map = rename or {}
200
+ replace_map = replace or {}
201
+
202
+ new_fields: list[pa.Field[Any]] = []
203
+ final_names: set[str] = set()
204
+
205
+ for field in source:
206
+ # Skip removed fields
207
+ if field.name in remove_set:
208
+ continue
209
+
210
+ # Get the (possibly renamed) name
211
+ new_name = rename_map.get(field.name, field.name)
212
+
213
+ # Get the (possibly replaced) type
214
+ new_type = replace_map.get(field.name, field.type)
215
+
216
+ new_fields.append(pa.field(new_name, new_type, metadata=field.metadata))
217
+ final_names.add(new_name)
218
+
219
+ # Step 4: Add new fields
220
+ if add:
221
+ for name, dtype in add.items():
222
+ if name in final_names:
223
+ raise ValueError(
224
+ f"Cannot add field '{name}': already exists in schema. "
225
+ f"Use 'replace' to change an existing field's type."
226
+ )
227
+ if not isinstance(dtype, pa.DataType):
228
+ raise TypeError(
229
+ f"Field '{name}': expected pa.DataType, "
230
+ f"got {type(dtype).__name__}. Use pa.int64(), pa.string(), etc."
231
+ )
232
+ new_fields.append(pa.field(name, dtype))
233
+
234
+ return pa.schema(new_fields)
vgi/secret_protocol.py ADDED
@@ -0,0 +1,124 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """VGI secret protocol — the wire contract for Orchard's standalone secret service.
4
+
5
+ Orchard is an independently-deployed microservice that brokers downstream
6
+ credentials (S3/HTTP/GCS/…) for a single authenticated account. The DuckDB
7
+ extension's ``VgiRemoteSecretStorage`` calls :meth:`VgiSecretProtocol.secret_lookup`
8
+ lazily whenever a secret consumer (e.g. httpfs resolving an ``s3://`` path) asks
9
+ the secret manager for a credential.
10
+
11
+ This protocol is **versioned independently** of :class:`vgi.protocol.VgiProtocol`
12
+ (the worker/catalog protocol). It has exactly one method and a tiny surface so it
13
+ can evolve on its own cadence — see ``protocol_version`` below.
14
+
15
+ Wire shape
16
+ ----------
17
+ ``secret_lookup`` takes the requested ``path`` and ``type`` as direct scalar
18
+ parameters (not a wrapped ``request`` dataclass), so the generated C++ builder
19
+ ``BuildSecretLookupParams(path, type)`` is directly callable without a hand-coded
20
+ inner serializer. The response is :class:`SecretLookupResponse`, IPC-serialized
21
+ into the unary ``result`` envelope and validated C++-side against
22
+ ``SecretLookupResultSchema()``.
23
+
24
+ Identity is carried entirely by the OAuth bearer token on the HTTP request (the
25
+ same ``CatalogAuth`` the catalog established at ATTACH) — there is no
26
+ account/storage identifier in the request body.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ from dataclasses import dataclass, field
32
+ from typing import Annotated, Any, ClassVar, Protocol
33
+
34
+ import pyarrow as pa
35
+ from vgi_rpc import ArrowSerializableDataclass, ArrowType
36
+
37
+
38
+ def encode_secret_values(mapping: dict[str, Any]) -> pa.RecordBatch | None:
39
+ """Build the one-row ``values`` RecordBatch from a Python mapping.
40
+
41
+ Each key becomes a column; the cell at row 0 is the secret value. Types are
42
+ inferred by pyarrow (str→utf8, int→int64, bool→bool, dict→struct, list→list,
43
+ …). Pass a ``pa.array([...])`` as a value for explicit control over the type.
44
+ Returns ``None`` for an empty mapping (no values to ship).
45
+ """
46
+ if not mapping:
47
+ return None
48
+ columns: dict[str, pa.Array[Any]] = {}
49
+ for key, value in mapping.items():
50
+ columns[key] = value if isinstance(value, pa.Array) else pa.array([value])
51
+ return pa.RecordBatch.from_pydict(columns)
52
+
53
+
54
+ @dataclass(frozen=True, slots=True, kw_only=True)
55
+ class SecretLookupResponse(ArrowSerializableDataclass):
56
+ """Response for :meth:`VgiSecretProtocol.secret_lookup`.
57
+
58
+ ``values`` is the secret's key→value map carried as a **one-row RecordBatch**
59
+ (serialized to binary on the wire): each column is a secret key and its row-0
60
+ cell is the value. This lets values be any Arrow/DuckDB type — string, int64,
61
+ bool, struct, list, nested — not just strings. Build it with
62
+ :func:`encode_secret_values`. The C++ side converts each cell to a typed
63
+ DuckDB ``Value`` via the Arrow→DuckDB bridge. ``redact_keys`` lists the subset
64
+ of keys whose values must be redacted by ``duckdb_secrets()`` — honor it or
65
+ values leak.
66
+
67
+ ``ttl_seconds`` is the server's suggested cache lifetime. ``expires_at_unix``
68
+ is the *credential's own* hard expiry as a Unix timestamp (0 = no intrinsic
69
+ expiry); the client caches for ``min(ttl_seconds, expires_at_unix - now)`` so
70
+ a short-lived STS token is never served past its own expiry.
71
+
72
+ When ``found`` is False every other field is empty/zero and the client caches
73
+ a short-TTL negative entry.
74
+ """
75
+
76
+ found: bool
77
+ secret_type: str = ""
78
+ provider: str = ""
79
+ name: str = ""
80
+ scope: list[str] = field(default_factory=list)
81
+ values: Annotated[pa.RecordBatch | None, ArrowType(pa.binary())] = None
82
+ redact_keys: list[str] = field(default_factory=list)
83
+ ttl_seconds: int = 0
84
+ expires_at_unix: int = 0
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # VGI Secret Protocol
89
+ # ---------------------------------------------------------------------------
90
+
91
+
92
+ class VgiSecretProtocol(Protocol):
93
+ """Wire protocol for Orchard's standalone secret service.
94
+
95
+ A single unary method, ``secret_lookup``. ``vgi_rpc.RpcServer(VgiSecretProtocol,
96
+ impl)`` handles serialization, dispatching, and version enforcement exactly as
97
+ it does for :class:`vgi.protocol.VgiProtocol`.
98
+
99
+ Application protocol surface version
100
+ ------------------------------------
101
+ ``protocol_version`` is the canonical semver (MAJOR.MINOR.PATCH) of this
102
+ contract, **independent** of ``VgiProtocol.protocol_version``. The framework
103
+ enforces an exact major+minor match (patch ignored) at the dispatch boundary.
104
+ The C++ extension reads ``VGI_SECRET_PROTOCOL_VERSION`` from
105
+ ``vgi/src/generated/vgi_secret_protocol_version.hpp`` (generated; sibling of
106
+ ``vgi_protocol_version.hpp``) and passes it as a per-call
107
+ ``protocol_version_override`` so it never collides with the worker protocol's
108
+ global version constant.
109
+
110
+ Bump rules mirror :class:`vgi.protocol.VgiProtocol`: major for any
111
+ backwards-incompatible change, minor for additive, patch for worker-side fixes.
112
+ """
113
+
114
+ protocol_version: ClassVar[str] = "1.0.0"
115
+
116
+ def secret_lookup(self, path: str, type: str) -> SecretLookupResponse: # noqa: A002
117
+ """Resolve the credential for ``path`` of secret ``type``.
118
+
119
+ ``type`` is the lowercased DuckDB secret type the consumer probed for
120
+ (``s3`` / ``r2`` / ``gcs`` / ``aws`` / ``http`` / …). Identity comes from
121
+ the OAuth bearer on the transport. Return ``SecretLookupResponse(found=False)``
122
+ when the account has no matching credential.
123
+ """
124
+ ...
vgi/secret_service.py ADDED
@@ -0,0 +1,238 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Standalone serving harness for the VGI secret protocol (Orchard).
4
+
5
+ Orchard's secret service is an *independently-deployed microservice* — separate
6
+ from the worker/catalog ``vgi-serve`` deployable and speaking
7
+ :class:`vgi.secret_protocol.VgiSecretProtocol`. This module provides:
8
+
9
+ - :func:`create_secret_app` — build a WSGI app for any ``VgiSecretProtocol``
10
+ implementation (usable with gunicorn/waitress/uwsgi).
11
+ - :func:`serve_secret_http` — run it under waitress (prints ``PORT:<n>`` for test
12
+ harnesses, mirroring :mod:`vgi.serve`).
13
+ - :class:`ExampleOrchardSecretService` — a reference implementation that returns a
14
+ canned ``s3`` credential for ``s3://test-bucket*``; the C++ integration tests
15
+ point ``vgi-secret-serve`` at this class.
16
+ - :func:`main` — the ``vgi-secret-serve`` CLI entry point.
17
+
18
+ Auth: identity is carried by the HTTP bearer. Production deployments wire an
19
+ ``authenticate`` callback (reuse the ``VGI_BEARER_TOKENS`` / ``VGI_JWT_*`` env
20
+ vars supported by :mod:`vgi.serve`). The example service ignores identity.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import importlib
27
+ import os
28
+ import sys
29
+ import time
30
+ from typing import TYPE_CHECKING, Any
31
+
32
+ import pyarrow as pa
33
+
34
+ from vgi.secret_protocol import SecretLookupResponse, VgiSecretProtocol, encode_secret_values
35
+
36
+ if TYPE_CHECKING:
37
+ from collections.abc import Callable
38
+
39
+ import falcon
40
+
41
+
42
+ # --------------------------------------------------------------------------- #
43
+ # Reference implementation (also the integration-test fixture)
44
+ # --------------------------------------------------------------------------- #
45
+
46
+
47
+ class ExampleOrchardSecretService:
48
+ """Reference :class:`VgiSecretProtocol` implementation for tests/demos.
49
+
50
+ Returns a canned ``s3`` credential for any path under ``s3://test-bucket``
51
+ with a short ``expires_at_unix`` (so the ``min(ttl, expiry)`` cache path is
52
+ exercised) and ``secret`` marked for redaction. Everything else is a miss.
53
+ """
54
+
55
+ #: Seconds until the canned credential's intrinsic expiry.
56
+ credential_lifetime_seconds: int = 30
57
+
58
+ def secret_lookup(self, path: str, type: str) -> SecretLookupResponse: # noqa: A002
59
+ """Return the canned credential for known test paths; empty otherwise."""
60
+ if type == "s3" and path.startswith("s3://test-bucket"):
61
+ # Heterogeneous typed values: string, int64, bool, and a nested struct
62
+ # — exercising the full Arrow→DuckDB Value bridge, not just string→string.
63
+ values: dict[str, object] = {
64
+ "key_id": "AKIAEXAMPLEORCHARD",
65
+ "secret": "examplesecretvalue",
66
+ "region": "us-east-1",
67
+ "port": pa.array([9000], pa.int64()),
68
+ "use_ssl": True,
69
+ "endpoint_config": {"connect_timeout_ms": 5000, "max_retries": 3},
70
+ }
71
+ # When VGI_MOCK_S3_ENDPOINT is set, point httpfs at a local mock S3 so
72
+ # a real `SELECT … FROM 's3://…'` read exercises the null-ClientContext
73
+ # system-transaction lookup path end to end.
74
+ mock_endpoint = os.environ.get("VGI_MOCK_S3_ENDPOINT")
75
+ if mock_endpoint:
76
+ values["endpoint"] = mock_endpoint # host:port, no scheme
77
+ values["use_ssl"] = False
78
+ values["url_style"] = "path"
79
+ return SecretLookupResponse(
80
+ found=True,
81
+ secret_type="s3",
82
+ provider="orchard",
83
+ name="orchard_test_bucket",
84
+ scope=["s3://test-bucket"],
85
+ values=encode_secret_values(values),
86
+ redact_keys=["secret"],
87
+ ttl_seconds=60,
88
+ expires_at_unix=int(time.time()) + self.credential_lifetime_seconds,
89
+ )
90
+ return SecretLookupResponse(found=False)
91
+
92
+
93
+ # --------------------------------------------------------------------------- #
94
+ # WSGI app + HTTP server
95
+ # --------------------------------------------------------------------------- #
96
+
97
+
98
+ def create_secret_app(
99
+ impl: object,
100
+ *,
101
+ prefix: str = "",
102
+ cors_origins: str = "*",
103
+ signing_key: bytes | None = None,
104
+ authenticate: Callable[[falcon.Request], Any] | None = None,
105
+ oauth_resource_metadata: Any = None,
106
+ ) -> falcon.App[Any, Any]:
107
+ """Build a WSGI app serving *impl* over :class:`VgiSecretProtocol`.
108
+
109
+ *impl* is any object implementing ``secret_lookup(path, type)``. The default
110
+ landing/describe pages are disabled — this is a credential endpoint, not a
111
+ browsable worker.
112
+ """
113
+ try:
114
+ from vgi_rpc.http import make_wsgi_app
115
+ except ImportError:
116
+ sys.stderr.write(
117
+ "Error: HTTP dependencies not installed.\nInstall with: pip install vgi[http] (or: uv sync --extra http)\n"
118
+ )
119
+ sys.exit(1)
120
+
121
+ from vgi_rpc.rpc import RpcServer
122
+
123
+ if signing_key is None:
124
+ signing_key = os.urandom(32)
125
+
126
+ server = RpcServer(VgiSecretProtocol, impl, enable_describe=False)
127
+ return make_wsgi_app(
128
+ server,
129
+ prefix=prefix,
130
+ cors_origins=cors_origins,
131
+ token_key=signing_key,
132
+ authenticate=authenticate,
133
+ oauth_resource_metadata=oauth_resource_metadata,
134
+ enable_landing_page=False,
135
+ enable_describe_page=False,
136
+ )
137
+
138
+
139
+ def serve_secret_http(
140
+ impl: object,
141
+ *,
142
+ host: str = "0.0.0.0",
143
+ port: int | None = None,
144
+ prefix: str = "",
145
+ cors_origins: str = "*",
146
+ signing_key: bytes | None = None,
147
+ authenticate: Callable[..., Any] | None = None,
148
+ oauth_resource_metadata: Any = None,
149
+ ) -> None:
150
+ """Serve *impl* over HTTP under waitress. Prints ``PORT:<n>`` once bound."""
151
+ import socket
152
+
153
+ try:
154
+ import waitress # type: ignore[import-untyped]
155
+ except ImportError:
156
+ sys.stderr.write(
157
+ "Error: waitress not installed.\nInstall with: pip install vgi[http] (or: uv sync --extra http)\n"
158
+ )
159
+ sys.exit(1)
160
+
161
+ if port is None:
162
+ env_port = os.environ.get("PORT")
163
+ port = int(env_port) if env_port else 8080
164
+ if port == 0:
165
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
166
+ s.bind((host, 0))
167
+ port = int(s.getsockname()[1])
168
+
169
+ wsgi_app = create_secret_app(
170
+ impl,
171
+ prefix=prefix,
172
+ cors_origins=cors_origins,
173
+ signing_key=signing_key,
174
+ authenticate=authenticate,
175
+ oauth_resource_metadata=oauth_resource_metadata,
176
+ )
177
+
178
+ print(f"PORT:{port}", flush=True)
179
+ sys.stderr.write(f"Serving {type(impl).__name__} (VgiSecretProtocol) on http://{host}:{port}{prefix}\n")
180
+ sys.stderr.flush()
181
+ waitress.serve(wsgi_app, host=host, port=port, _quiet=True)
182
+
183
+
184
+ def _load_impl(reference: str) -> object:
185
+ """Instantiate a ``VgiSecretProtocol`` implementation from ``module:Class``."""
186
+ if ":" not in reference:
187
+ sys.stderr.write(f"Error: expected 'module:ClassName', got {reference!r}\n")
188
+ sys.exit(1)
189
+ module_ref, class_name = reference.rsplit(":", 1)
190
+ try:
191
+ module = importlib.import_module(module_ref)
192
+ except ImportError as exc:
193
+ sys.stderr.write(f"Error: could not import {module_ref!r}: {exc}\n")
194
+ sys.exit(1)
195
+ cls = getattr(module, class_name, None)
196
+ if cls is None or not isinstance(cls, type):
197
+ sys.stderr.write(f"Error: {class_name!r} not found in {module_ref!r}\n")
198
+ sys.exit(1)
199
+ return cls()
200
+
201
+
202
+ def main() -> None:
203
+ """CLI entry point for ``vgi-secret-serve``."""
204
+ from vgi.serve import _resolve_authenticate, _resolve_oauth_resource_metadata, _resolve_signing_key
205
+
206
+ parser = argparse.ArgumentParser(
207
+ prog="vgi-secret-serve",
208
+ description="Serve a VgiSecretProtocol implementation over HTTP (Orchard secret service).",
209
+ )
210
+ parser.add_argument(
211
+ "impl",
212
+ nargs="?",
213
+ default="vgi.secret_service:ExampleOrchardSecretService",
214
+ help="Implementation reference: module:ClassName (default: the built-in ExampleOrchardSecretService fixture).",
215
+ )
216
+ parser.add_argument("--host", default="0.0.0.0", help="HTTP bind address")
217
+ parser.add_argument(
218
+ "--port", "-p", type=int, default=None, help="HTTP port (default: $PORT or 8080; 0 = ephemeral)"
219
+ )
220
+ parser.add_argument("--prefix", default="", help="URL prefix for RPC endpoints")
221
+ parser.add_argument("--cors-origins", default="*", help="Allowed CORS origins")
222
+ args = parser.parse_args()
223
+
224
+ impl = _load_impl(args.impl)
225
+ serve_secret_http(
226
+ impl,
227
+ host=args.host,
228
+ port=args.port,
229
+ prefix=args.prefix,
230
+ cors_origins=args.cors_origins,
231
+ signing_key=_resolve_signing_key(),
232
+ authenticate=_resolve_authenticate(),
233
+ oauth_resource_metadata=_resolve_oauth_resource_metadata(),
234
+ )
235
+
236
+
237
+ if __name__ == "__main__":
238
+ main()