vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/invocation.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Response types for the VGI protocol.
|
|
4
|
+
|
|
5
|
+
This module defines response dataclasses and the FunctionType enum:
|
|
6
|
+
|
|
7
|
+
- FunctionType: Enum for scalar, table, and aggregate function types.
|
|
8
|
+
- BindResponse: Result of bind phase with output schema.
|
|
9
|
+
- BaseInitResponse: Base class for init responses.
|
|
10
|
+
- GlobalInitResponse: Result of init phase with max_workers.
|
|
11
|
+
|
|
12
|
+
Request types (BindRequest, InitRequest) are in ``vgi.protocol``.
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import uuid
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import Annotated
|
|
22
|
+
|
|
23
|
+
import pyarrow as pa
|
|
24
|
+
from vgi_rpc import ArrowSerializableDataclass, ArrowType
|
|
25
|
+
|
|
26
|
+
from vgi.arguments import SecretLookupEntry
|
|
27
|
+
from vgi.metadata import DEFAULT_MAX_WORKERS
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"FunctionType",
|
|
31
|
+
"BaseInitResponse",
|
|
32
|
+
"BindResponse",
|
|
33
|
+
"GlobalInitResponse",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FunctionType(Enum):
|
|
38
|
+
"""Type of function being invoked.
|
|
39
|
+
|
|
40
|
+
Used in BindRequest to indicate which function category is being bound,
|
|
41
|
+
allowing the worker to apply appropriate validation and processing.
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
AGGREGATE = "aggregate"
|
|
46
|
+
SCALAR = "scalar"
|
|
47
|
+
TABLE = "table"
|
|
48
|
+
TABLE_BUFFERING = "table_buffering"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True, slots=True, kw_only=True)
|
|
52
|
+
class BindResponse(ArrowSerializableDataclass):
|
|
53
|
+
"""The result of calling bind() on a function.
|
|
54
|
+
|
|
55
|
+
The bind result is created by calling bind() and importantly contains
|
|
56
|
+
the function's output characteristics. It is serialized and sent to the
|
|
57
|
+
client before any data processing begins.
|
|
58
|
+
|
|
59
|
+
When ``lookup_secret_types`` is non-empty, this is a **secret scope
|
|
60
|
+
request** rather than a normal bind response. C++ resolves the requested
|
|
61
|
+
secrets and retries bind with ``resolved_secrets_provided=True``. The
|
|
62
|
+
developer never constructs scope requests directly — the framework
|
|
63
|
+
generates them when ``SecretsAccessor`` has pending lookups after
|
|
64
|
+
``on_bind()`` returns.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
output_schema: Arrow schema describing the structure of output batches.
|
|
68
|
+
opaque_data: Serialized data that is opaque to the caller that must
|
|
69
|
+
be passed to any init() invocations.
|
|
70
|
+
lookup_secret_types: Secret types for scoped lookup requests (empty = normal response).
|
|
71
|
+
lookup_scopes: Scopes for scoped lookup requests (parallel to lookup_secret_types).
|
|
72
|
+
lookup_names: Names for scoped lookup requests (parallel to lookup_secret_types).
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
output_schema: Annotated[pa.Schema, ArrowType(pa.binary())]
|
|
77
|
+
# Wire-facing field — the bytes are produced by the framework calling
|
|
78
|
+
# ``.serialize_to_bytes()`` on the typed ``BindResult.opaque_data`` at
|
|
79
|
+
# the bind→response boundary (see vgi.scalar_function /
|
|
80
|
+
# vgi.table_function / vgi.table_in_out_function). Consumers
|
|
81
|
+
# reconstruct via ``MyConcreteDataclass.deserialize_from_bytes(raw)``;
|
|
82
|
+
# the abstract-base typed-roundtrip can't be done in Python without a
|
|
83
|
+
# class registry, so we kept the wire honest about being bytes.
|
|
84
|
+
opaque_data: Annotated[bytes | None, ArrowType(pa.binary())] = None
|
|
85
|
+
lookup_secret_types: list[str] = field(default_factory=list)
|
|
86
|
+
lookup_scopes: list[str] = field(default_factory=list)
|
|
87
|
+
lookup_names: list[str] = field(default_factory=list)
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def is_secret_scope_request(self) -> bool:
|
|
91
|
+
"""True if this is a secret scope request, not a normal bind response."""
|
|
92
|
+
return len(self.lookup_secret_types) > 0
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def secret_scope_request(entries: list[SecretLookupEntry]) -> BindResponse:
|
|
96
|
+
"""Create a secret scope request from lookup entries.
|
|
97
|
+
|
|
98
|
+
The framework calls this when ``SecretsAccessor`` has pending lookups.
|
|
99
|
+
C++ detects the non-empty ``lookup_secret_types`` and resolves them.
|
|
100
|
+
"""
|
|
101
|
+
return BindResponse(
|
|
102
|
+
output_schema=pa.schema([]),
|
|
103
|
+
lookup_secret_types=[e.secret_type for e in entries],
|
|
104
|
+
lookup_scopes=[e.scope or "" for e in entries],
|
|
105
|
+
lookup_names=[e.secret_name or "" for e in entries],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def secret_scope_entries(self) -> list[SecretLookupEntry]:
|
|
109
|
+
"""Convert lookup fields back to SecretLookupEntry objects."""
|
|
110
|
+
return [
|
|
111
|
+
SecretLookupEntry(
|
|
112
|
+
secret_type=t,
|
|
113
|
+
scope=s or None,
|
|
114
|
+
secret_name=n or None,
|
|
115
|
+
)
|
|
116
|
+
for t, s, n in zip(
|
|
117
|
+
self.lookup_secret_types,
|
|
118
|
+
self.lookup_scopes,
|
|
119
|
+
self.lookup_names,
|
|
120
|
+
strict=True,
|
|
121
|
+
)
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass(frozen=True, slots=True, kw_only=True)
|
|
126
|
+
class BaseInitResponse(ArrowSerializableDataclass):
|
|
127
|
+
"""The result of calling init() on a function.
|
|
128
|
+
|
|
129
|
+
Attributes:
|
|
130
|
+
execution_id: A unique id for the function execution.
|
|
131
|
+
opaque_data: Serialized data that is opaque to the caller that must
|
|
132
|
+
be passed to any init() invocations.
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
execution_id: bytes = field(default_factory=lambda: uuid.uuid4().bytes)
|
|
137
|
+
# Wire-facing field — see comment on ``BindResponse.opaque_data``
|
|
138
|
+
# above for the typed-producer / bytes-wire / explicit-consumer
|
|
139
|
+
# contract.
|
|
140
|
+
opaque_data: Annotated[bytes | None, ArrowType(pa.binary())] = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass(frozen=True, slots=True, kw_only=True)
|
|
144
|
+
class GlobalInitResponse(BaseInitResponse):
|
|
145
|
+
"""The result of calling init() on a function.
|
|
146
|
+
|
|
147
|
+
Attributes:
|
|
148
|
+
max_workers: The maximum number of worker processes that may be
|
|
149
|
+
used for this function execution. This allows the function to control
|
|
150
|
+
parallelism.
|
|
151
|
+
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
max_workers: int = DEFAULT_MAX_WORKERS
|
vgi/logging_config.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Shared logging configuration for VGI worker CLIs.
|
|
4
|
+
|
|
5
|
+
Provides enums, known-logger registry, and a configure function that
|
|
6
|
+
mirrors the vgi_rpc CLI logging setup so that ``--debug``, ``--log-level``,
|
|
7
|
+
``--log-logger``, and ``--log-format`` behave identically across all
|
|
8
|
+
VGI workers.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import sys
|
|
15
|
+
from enum import StrEnum
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LogLevel(StrEnum):
|
|
19
|
+
"""Python logging level for ``--log-level``."""
|
|
20
|
+
|
|
21
|
+
DEBUG = "DEBUG"
|
|
22
|
+
INFO = "INFO"
|
|
23
|
+
WARNING = "WARNING"
|
|
24
|
+
ERROR = "ERROR"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LogFormat(StrEnum):
|
|
28
|
+
"""Stderr log format for ``--log-format``."""
|
|
29
|
+
|
|
30
|
+
text = "text"
|
|
31
|
+
json = "json"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# (name, description, typical-scenario)
|
|
35
|
+
_KNOWN_LOGGERS: list[tuple[str, str, str]] = [
|
|
36
|
+
("vgi", "VGI root logger", "all VGI messages"),
|
|
37
|
+
("vgi.worker", "Worker lifecycle", "startup, shutdown"),
|
|
38
|
+
("vgi.client", "Client operations", "spawn, bind, exchange"),
|
|
39
|
+
("vgi.client.cli", "CLI front-end", "argument parsing"),
|
|
40
|
+
("vgi.filter_pushdown", "Filter pushdown debug", "filter deserialization / evaluation"),
|
|
41
|
+
("vgi_rpc", "vgi_rpc root logger", "all vgi_rpc messages"),
|
|
42
|
+
("vgi_rpc.access", "RPC access log (enriched by VGI)", "per-request structured access log"),
|
|
43
|
+
("vgi_rpc.wire.request", "RPC wire request", "serialised request bytes"),
|
|
44
|
+
("vgi_rpc.wire.response", "RPC wire response", "serialised response bytes"),
|
|
45
|
+
("vgi_rpc.wire.transport", "Transport layer", "pipe / HTTP transport debug"),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def configure_worker_logging(
|
|
50
|
+
*,
|
|
51
|
+
debug: bool = False,
|
|
52
|
+
log_level: LogLevel = LogLevel.INFO,
|
|
53
|
+
log_loggers: list[str] | None = None,
|
|
54
|
+
log_format: LogFormat = LogFormat.text,
|
|
55
|
+
) -> int:
|
|
56
|
+
"""Configure stdlib logging for a VGI worker process.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
debug: If True, force DEBUG on all default loggers (overrides *log_level*).
|
|
60
|
+
log_level: Logging level when *debug* is False.
|
|
61
|
+
log_loggers: Logger names to configure. Defaults to ``["vgi", "vgi_rpc"]``.
|
|
62
|
+
log_format: Stderr output format (``text`` or ``json``).
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The effective numeric log level.
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
effective_level = logging.DEBUG if debug else getattr(logging, log_level.value)
|
|
69
|
+
|
|
70
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
71
|
+
|
|
72
|
+
if log_format == LogFormat.json:
|
|
73
|
+
from vgi_rpc.logging_utils import VgiJsonFormatter
|
|
74
|
+
|
|
75
|
+
handler.setFormatter(VgiJsonFormatter())
|
|
76
|
+
else:
|
|
77
|
+
handler.setFormatter(
|
|
78
|
+
logging.Formatter("%(asctime)s %(name)-30s %(levelname)-5s %(message)s", datefmt="%H:%M:%S")
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
targets = log_loggers if log_loggers else ["vgi", "vgi_rpc"]
|
|
82
|
+
|
|
83
|
+
known_names = {name for name, _, _ in _KNOWN_LOGGERS}
|
|
84
|
+
for name in targets:
|
|
85
|
+
if name not in known_names:
|
|
86
|
+
# Still configure it — the user may know what they're doing
|
|
87
|
+
sys.stderr.write(f"warning: unknown logger {name!r}\n")
|
|
88
|
+
logger = logging.getLogger(name)
|
|
89
|
+
logger.handlers.clear()
|
|
90
|
+
logger.setLevel(effective_level)
|
|
91
|
+
logger.addHandler(handler)
|
|
92
|
+
|
|
93
|
+
return effective_level
|