vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/exceptions.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Exception classes for VGI.
|
|
4
|
+
|
|
5
|
+
This module defines custom exceptions used throughout the VGI framework.
|
|
6
|
+
|
|
7
|
+
Classes:
|
|
8
|
+
InitIdentifierError: Raised when execution_identifier is required but not set.
|
|
9
|
+
SchemaValidationError: Raised when a batch schema doesn't match expected schema.
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import pyarrow as pa
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"BindStateNotFoundError",
|
|
22
|
+
"CatalogReadOnlyError",
|
|
23
|
+
"ExecutionIdentifierError",
|
|
24
|
+
"SchemaValidationError",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BindStateNotFoundError(Exception):
|
|
29
|
+
"""Raised when init is called with invalid or missing bind_data.
|
|
30
|
+
|
|
31
|
+
This exception is raised when:
|
|
32
|
+
- INIT invocation is missing bind_data field
|
|
33
|
+
- The bind_data is corrupted or cannot be deserialized
|
|
34
|
+
- The function_name in bind state doesn't match the invocation
|
|
35
|
+
|
|
36
|
+
The client should catch this and provide a clear error message indicating
|
|
37
|
+
that a BIND call must be made before INIT.
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class CatalogReadOnlyError(Exception):
|
|
43
|
+
"""Raised when a DDL operation is attempted on a read-only catalog.
|
|
44
|
+
|
|
45
|
+
This exception is raised by ReadOnlyCatalogInterface when any
|
|
46
|
+
create, drop, rename, or modify operation is attempted.
|
|
47
|
+
|
|
48
|
+
Read-only catalogs only support:
|
|
49
|
+
- catalogs() - list catalogs
|
|
50
|
+
- catalog_attach/detach - attach to/detach from catalogs
|
|
51
|
+
- schemas() - list schemas
|
|
52
|
+
- schema_get() - get schema info
|
|
53
|
+
- schema_contents() - list schema contents
|
|
54
|
+
- table_get(), view_get() - get table/view info
|
|
55
|
+
- table_scan_function_get() - get scan function for tables
|
|
56
|
+
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ExecutionIdentifierError(ValueError):
|
|
61
|
+
"""Raised when an operation requires an execution_identifier that hasn't been set.
|
|
62
|
+
|
|
63
|
+
This typically occurs when:
|
|
64
|
+
- store_state() is called before initialize_global_state() or load_global_state()
|
|
65
|
+
- collect_states() is called before initialize_global_state() or load_global_state()
|
|
66
|
+
- Work queue operations are attempted before initialization
|
|
67
|
+
|
|
68
|
+
The execution_identifier is automatically set during:
|
|
69
|
+
- initialize_global_state() for the primary worker
|
|
70
|
+
- load_global_state() for secondary workers
|
|
71
|
+
|
|
72
|
+
Resolution:
|
|
73
|
+
- Ensure your function calls super().initialize_global_state()
|
|
74
|
+
- Ensure the worker correctly calls load_global_state() for secondary workers
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class SchemaValidationError(Exception):
|
|
80
|
+
"""Raised when a batch schema doesn't match the expected schema.
|
|
81
|
+
|
|
82
|
+
This error is raised by the framework during input/output validation.
|
|
83
|
+
It indicates a programming error where a batch doesn't conform to the
|
|
84
|
+
declared schema.
|
|
85
|
+
|
|
86
|
+
The error message includes detailed information about what differs:
|
|
87
|
+
- Missing fields (in expected but not in actual)
|
|
88
|
+
- Extra fields (in actual but not in expected)
|
|
89
|
+
- Type mismatches (same field name, different types)
|
|
90
|
+
- Field order differences
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
expected: The expected Arrow schema.
|
|
94
|
+
actual: The actual Arrow schema that was received.
|
|
95
|
+
context: Description of where the validation occurred.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
message: str,
|
|
102
|
+
*,
|
|
103
|
+
expected: pa.Schema | None = None,
|
|
104
|
+
actual: pa.Schema | None = None,
|
|
105
|
+
context: str = "",
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Initialize with schema comparison details.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
message: Base error message.
|
|
111
|
+
expected: The expected Arrow schema.
|
|
112
|
+
actual: The actual Arrow schema.
|
|
113
|
+
context: Where the error occurred (e.g., "output from transform()").
|
|
114
|
+
|
|
115
|
+
"""
|
|
116
|
+
self.expected = expected
|
|
117
|
+
self.actual = actual
|
|
118
|
+
self.context = context
|
|
119
|
+
|
|
120
|
+
if expected is not None and actual is not None:
|
|
121
|
+
full_message = self._build_detailed_message(message, expected, actual)
|
|
122
|
+
else:
|
|
123
|
+
full_message = message
|
|
124
|
+
|
|
125
|
+
super().__init__(full_message)
|
|
126
|
+
|
|
127
|
+
def _build_detailed_message(self, base_message: str, expected: pa.Schema, actual: pa.Schema) -> str:
|
|
128
|
+
"""Build a detailed message showing exactly what differs."""
|
|
129
|
+
lines = [base_message, ""]
|
|
130
|
+
|
|
131
|
+
if self.context:
|
|
132
|
+
lines.append(f" Context: {self.context}")
|
|
133
|
+
lines.append("")
|
|
134
|
+
|
|
135
|
+
# Build field maps for comparison
|
|
136
|
+
expected_fields = {f.name: f for f in expected}
|
|
137
|
+
actual_fields = {f.name: f for f in actual}
|
|
138
|
+
|
|
139
|
+
expected_names = set(expected_fields.keys())
|
|
140
|
+
actual_names = set(actual_fields.keys())
|
|
141
|
+
|
|
142
|
+
# Find differences
|
|
143
|
+
missing = expected_names - actual_names
|
|
144
|
+
extra = actual_names - expected_names
|
|
145
|
+
common = expected_names & actual_names
|
|
146
|
+
|
|
147
|
+
# Check for type mismatches in common fields
|
|
148
|
+
type_mismatches = []
|
|
149
|
+
for name in common:
|
|
150
|
+
exp_field = expected_fields[name]
|
|
151
|
+
act_field = actual_fields[name]
|
|
152
|
+
if exp_field.type != act_field.type:
|
|
153
|
+
type_mismatches.append((name, exp_field.type, act_field.type))
|
|
154
|
+
elif exp_field.nullable != act_field.nullable:
|
|
155
|
+
exp_null = "nullable" if exp_field.nullable else "non-nullable"
|
|
156
|
+
act_null = "nullable" if act_field.nullable else "non-nullable"
|
|
157
|
+
type_mismatches.append((name, exp_null, act_null))
|
|
158
|
+
|
|
159
|
+
# Check for order differences (only if names match but order differs)
|
|
160
|
+
order_differs = False
|
|
161
|
+
if not missing and not extra and not type_mismatches:
|
|
162
|
+
expected_order = [f.name for f in expected]
|
|
163
|
+
actual_order = [f.name for f in actual]
|
|
164
|
+
if expected_order != actual_order:
|
|
165
|
+
order_differs = True
|
|
166
|
+
|
|
167
|
+
# Report missing fields
|
|
168
|
+
if missing:
|
|
169
|
+
lines.append(" Missing fields (expected but not found):")
|
|
170
|
+
for name in sorted(missing):
|
|
171
|
+
field = expected_fields[name]
|
|
172
|
+
lines.append(f" - {name}: {field.type}")
|
|
173
|
+
|
|
174
|
+
# Report extra fields
|
|
175
|
+
if extra:
|
|
176
|
+
lines.append(" Extra fields (found but not expected):")
|
|
177
|
+
for name in sorted(extra):
|
|
178
|
+
field = actual_fields[name]
|
|
179
|
+
lines.append(f" - {name}: {field.type}")
|
|
180
|
+
|
|
181
|
+
# Report type mismatches
|
|
182
|
+
if type_mismatches:
|
|
183
|
+
lines.append(" Type mismatches:")
|
|
184
|
+
for name, exp_type, act_type in type_mismatches:
|
|
185
|
+
lines.append(f" - {name}: expected {exp_type}, got {act_type}")
|
|
186
|
+
|
|
187
|
+
# Report order differences
|
|
188
|
+
if order_differs:
|
|
189
|
+
lines.append(" Field order differs:")
|
|
190
|
+
lines.append(f" Expected: {[f.name for f in expected]}")
|
|
191
|
+
lines.append(f" Actual: {[f.name for f in actual]}")
|
|
192
|
+
|
|
193
|
+
# Summary of schemas
|
|
194
|
+
lines.append("")
|
|
195
|
+
lines.append(" Expected schema:")
|
|
196
|
+
for field in expected:
|
|
197
|
+
nullable = " (nullable)" if field.nullable else ""
|
|
198
|
+
lines.append(f" {field.name}: {field.type}{nullable}")
|
|
199
|
+
|
|
200
|
+
lines.append(" Actual schema:")
|
|
201
|
+
for field in actual:
|
|
202
|
+
nullable = " (nullable)" if field.nullable else ""
|
|
203
|
+
lines.append(f" {field.name}: {field.type}{nullable}")
|
|
204
|
+
|
|
205
|
+
return "\n".join(lines)
|
vgi/function.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Core data structures for VGI function calls and bind results.
|
|
4
|
+
|
|
5
|
+
This module defines the foundational classes used during function binding
|
|
6
|
+
in the VGI protocol. When a client invokes a function, it sends the
|
|
7
|
+
function name, arguments, input schema, and function type.
|
|
8
|
+
|
|
9
|
+
Classes:
|
|
10
|
+
Function: Base class for all VGI functions.
|
|
11
|
+
|
|
12
|
+
See Also:
|
|
13
|
+
vgi.scalar_function: Scalar functions with 1:1 row transforms.
|
|
14
|
+
vgi.table_function: Table functions with cardinality hints.
|
|
15
|
+
vgi.table_in_out_function: Streaming table functions for batch transforms.
|
|
16
|
+
vgi_rpc.log: Level and Message for in-band function diagnostics.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
from abc import ABC
|
|
25
|
+
from typing import (
|
|
26
|
+
Annotated,
|
|
27
|
+
Any,
|
|
28
|
+
ClassVar,
|
|
29
|
+
final,
|
|
30
|
+
get_args,
|
|
31
|
+
get_origin,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
import pyarrow as pa
|
|
35
|
+
|
|
36
|
+
from vgi.exceptions import SchemaValidationError
|
|
37
|
+
from vgi.function_storage import FunctionStorage, FunctionStorageSqlite
|
|
38
|
+
from vgi.metadata import MetadataMixin, ResolvedMetadata
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _resolve_storage() -> FunctionStorage:
|
|
42
|
+
"""Resolve the default FunctionStorage backend from environment."""
|
|
43
|
+
backend = os.environ.get("VGI_WORKER_SHARED_STORAGE", "sqlite").lower()
|
|
44
|
+
if backend == "memory":
|
|
45
|
+
# In-process tier: SQLite at ":memory:" (shared-cache, process-local).
|
|
46
|
+
# Ignores VGI_WORKER_SQLITE_PATH by design — "memory" always means a
|
|
47
|
+
# process-local store with no cross-process coordination. Correct only
|
|
48
|
+
# for single-process deployments; use "sqlite" (file) for multi-process
|
|
49
|
+
# on one machine, "cloudflare-do" for cross-machine.
|
|
50
|
+
return FunctionStorageSqlite(db_path=":memory:")
|
|
51
|
+
if backend == "sqlite":
|
|
52
|
+
# VGI_WORKER_SQLITE_PATH=":memory:" picks the in-process shared-cache
|
|
53
|
+
# in-memory backend. Used by single-process test fixtures (notably
|
|
54
|
+
# the test fixture HTTP server) to avoid per-op WAL fsync cost.
|
|
55
|
+
db_path = os.environ.get("VGI_WORKER_SQLITE_PATH") or None
|
|
56
|
+
if os.environ.get("VGI_SQLITE_SHARD") == "1":
|
|
57
|
+
# Debug: partition sqlite by shard_key to reproduce cloudflare-do
|
|
58
|
+
# per-DO isolation locally (surfaces shard-routing bugs sqlite hides).
|
|
59
|
+
from vgi.function_storage import ShardedSqliteStorage
|
|
60
|
+
|
|
61
|
+
return ShardedSqliteStorage(db_path)
|
|
62
|
+
return FunctionStorageSqlite(db_path=db_path)
|
|
63
|
+
if backend == "azure-sql":
|
|
64
|
+
from vgi.function_storage_azure_sql import FunctionStorageAzureSql
|
|
65
|
+
|
|
66
|
+
return FunctionStorageAzureSql.from_env()
|
|
67
|
+
if backend == "cloudflare-do":
|
|
68
|
+
from vgi.function_storage_cf_do import FunctionStorageCfDo
|
|
69
|
+
|
|
70
|
+
return FunctionStorageCfDo.from_env()
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Unknown VGI_WORKER_SHARED_STORAGE backend: {backend!r}. "
|
|
73
|
+
"Supported: 'memory', 'sqlite', 'azure-sql', 'cloudflare-do'"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class _DefaultStorageDescriptor:
|
|
78
|
+
"""Resolve FunctionStorage lazily on first attribute access.
|
|
79
|
+
|
|
80
|
+
This avoids evaluating environment variables at import time. When a
|
|
81
|
+
subclass explicitly sets ``storage = SomeStorage(...)``, the plain
|
|
82
|
+
attribute shadows this descriptor — no interference.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
_resolved: FunctionStorage | None = None
|
|
86
|
+
|
|
87
|
+
def __get__(self, obj: object | None, objtype: type | None = None) -> FunctionStorage:
|
|
88
|
+
if self._resolved is None:
|
|
89
|
+
self._resolved = _resolve_storage()
|
|
90
|
+
return self._resolved
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# Default max_workers when not explicitly specified (effectively unlimited)
|
|
94
|
+
DEFAULT_MAX_WORKERS = 99999
|
|
95
|
+
|
|
96
|
+
__all__ = [
|
|
97
|
+
"Function",
|
|
98
|
+
"DEFAULT_MAX_WORKERS",
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class Function(ABC, MetadataMixin):
|
|
103
|
+
"""Base class for all VGI functions.
|
|
104
|
+
|
|
105
|
+
Provides shared infrastructure (metadata, storage, Arg descriptor extraction,
|
|
106
|
+
schema validation) for all function types. Since the child classes have very
|
|
107
|
+
different APIs, there are not many standard methods here.
|
|
108
|
+
|
|
109
|
+
Subclasses can define a nested Meta class to provide metadata.
|
|
110
|
+
|
|
111
|
+
Available Meta attributes:
|
|
112
|
+
name: Function name for registration (default: class name to snake_case)
|
|
113
|
+
description: Human-readable description (default: docstring first line)
|
|
114
|
+
categories: Classification tags
|
|
115
|
+
examples: List of SQL examples
|
|
116
|
+
See vgi.metadata for all available attributes.
|
|
117
|
+
|
|
118
|
+
Attributes:
|
|
119
|
+
logger: Structured logger for function diagnostics.
|
|
120
|
+
|
|
121
|
+
See Also:
|
|
122
|
+
vgi.scalar_function.ScalarFunction: Scalar 1:1 row transforms.
|
|
123
|
+
vgi.table_function.TableFunctionGenerator: Table functions.
|
|
124
|
+
vgi.table_in_out_function.TableInOutFunction: Table-in-out batch transforms.
|
|
125
|
+
vgi.metadata: Metadata documentation for functions.
|
|
126
|
+
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
storage: ClassVar[FunctionStorage] = _DefaultStorageDescriptor() # type: ignore[assignment]
|
|
130
|
+
|
|
131
|
+
# Cache for resolved metadata
|
|
132
|
+
_metadata_cache: ClassVar[ResolvedMetadata | None] = None
|
|
133
|
+
|
|
134
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
135
|
+
"""Extract Arg descriptors from Annotated type hints.
|
|
136
|
+
|
|
137
|
+
The Arg is extracted from the annotation metadata and installed
|
|
138
|
+
as a class attribute (descriptor).
|
|
139
|
+
"""
|
|
140
|
+
super().__init_subclass__(**kwargs)
|
|
141
|
+
|
|
142
|
+
# Import here to avoid circular imports
|
|
143
|
+
from vgi.arguments import AnyArrowValue, Arg
|
|
144
|
+
|
|
145
|
+
# Get type hints with include_extras=True to access Annotated metadata
|
|
146
|
+
# We only look at the class's own annotations (not inherited) to avoid
|
|
147
|
+
# issues with forward references that can't be resolved in this module
|
|
148
|
+
annotations = getattr(cls, "__annotations__", {})
|
|
149
|
+
if not annotations:
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
# Build evaluation namespace from module globals
|
|
153
|
+
module = __import__(cls.__module__, fromlist=[""])
|
|
154
|
+
globalns = getattr(module, "__dict__", {})
|
|
155
|
+
# Add common typing imports that might be needed
|
|
156
|
+
globalns.setdefault("Annotated", Annotated)
|
|
157
|
+
|
|
158
|
+
for attr_name, annotation in annotations.items():
|
|
159
|
+
# Evaluate string annotation if needed (from __future__ import annotations)
|
|
160
|
+
if isinstance(annotation, str):
|
|
161
|
+
try:
|
|
162
|
+
hint = eval(annotation, globalns) # noqa: S307
|
|
163
|
+
except Exception:
|
|
164
|
+
# Can't evaluate this annotation, skip it
|
|
165
|
+
continue
|
|
166
|
+
else:
|
|
167
|
+
hint = annotation
|
|
168
|
+
# Skip if not Annotated
|
|
169
|
+
if get_origin(hint) is not Annotated:
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# Get the base type and metadata from Annotated[BaseType, metadata...]
|
|
173
|
+
args = get_args(hint)
|
|
174
|
+
if not args:
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
base_type = args[0]
|
|
178
|
+
metadata = args[1:]
|
|
179
|
+
|
|
180
|
+
# Look for Arg in the metadata
|
|
181
|
+
for meta in metadata:
|
|
182
|
+
if isinstance(meta, Arg):
|
|
183
|
+
# Check if an Arg descriptor already exists for this name
|
|
184
|
+
# (could be from a parent class or explicit assignment)
|
|
185
|
+
existing = getattr(cls, attr_name, None)
|
|
186
|
+
if isinstance(existing, Arg):
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Set the name on the Arg (normally done by __set_name__)
|
|
190
|
+
meta._name = attr_name
|
|
191
|
+
|
|
192
|
+
# Set _returns_any_arrow_value based on the annotated type
|
|
193
|
+
meta._returns_any_arrow_value = base_type is AnyArrowValue
|
|
194
|
+
|
|
195
|
+
# Infer _type_param from the base type for metadata extraction
|
|
196
|
+
# and type_bound validation
|
|
197
|
+
if base_type is AnyArrowValue or meta.type_bound is not None:
|
|
198
|
+
# AnyArrowValue or type_bound means this is an AnyArrow arg
|
|
199
|
+
from vgi.arguments import AnyArrow
|
|
200
|
+
|
|
201
|
+
meta._type_param = AnyArrow
|
|
202
|
+
elif meta._type_param is None:
|
|
203
|
+
# Use the annotation type as the type param
|
|
204
|
+
meta._type_param = base_type
|
|
205
|
+
|
|
206
|
+
# Install the Arg as a class attribute
|
|
207
|
+
setattr(cls, attr_name, meta)
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
def __init__(
|
|
211
|
+
self,
|
|
212
|
+
*,
|
|
213
|
+
logger: logging.Logger,
|
|
214
|
+
):
|
|
215
|
+
"""Initialize the function with invocation data and logger.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
logger: Logger for function diagnostics.
|
|
219
|
+
|
|
220
|
+
"""
|
|
221
|
+
self.logger = logger
|
|
222
|
+
|
|
223
|
+
@final
|
|
224
|
+
@classmethod
|
|
225
|
+
def _validate_output_schema(cls, batch: pa.RecordBatch, output_schema: pa.Schema) -> None:
|
|
226
|
+
"""Validate that a batch conforms to the expected output schema."""
|
|
227
|
+
if batch.schema != output_schema:
|
|
228
|
+
raise SchemaValidationError(
|
|
229
|
+
"Output batch schema does not match expected output_schema.",
|
|
230
|
+
expected=output_schema,
|
|
231
|
+
actual=batch.schema,
|
|
232
|
+
context=f"output from {cls.__name__}",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
@final
|
|
236
|
+
@classmethod
|
|
237
|
+
def _validate_input_schema(cls, batch: pa.RecordBatch, input_schema: pa.Schema) -> None:
|
|
238
|
+
"""Validate that a batch conforms to the expected input schema."""
|
|
239
|
+
if batch.schema != input_schema:
|
|
240
|
+
raise SchemaValidationError(
|
|
241
|
+
"Input batch schema does not match expected input_schema.",
|
|
242
|
+
expected=input_schema,
|
|
243
|
+
actual=batch.schema,
|
|
244
|
+
context=f"input to {cls.__name__}",
|
|
245
|
+
)
|