vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/catalog/setting.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Setting descriptor for declarative worker settings.
|
|
4
|
+
|
|
5
|
+
This module provides the Setting descriptor class for defining worker settings
|
|
6
|
+
using Python's Annotated type hints, similar to how Arg works for function arguments.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import (
|
|
12
|
+
TYPE_CHECKING,
|
|
13
|
+
Annotated,
|
|
14
|
+
Any,
|
|
15
|
+
ClassVar,
|
|
16
|
+
cast,
|
|
17
|
+
get_args,
|
|
18
|
+
get_origin,
|
|
19
|
+
get_type_hints,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
from vgi_rpc.utils import deserialize_record_batch, serialize_record_batch_bytes
|
|
24
|
+
|
|
25
|
+
from vgi.schema_utils import schema
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from typing import Self
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"Setting",
|
|
32
|
+
"SettingSpec",
|
|
33
|
+
"extract_setting_specs",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class SettingSpec:
|
|
39
|
+
"""Extracted setting metadata for catalog serialization.
|
|
40
|
+
|
|
41
|
+
This is the resolved form of a Setting, with all types inferred and
|
|
42
|
+
ready for serialization.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
name: The setting name (from the class attribute name).
|
|
46
|
+
desc: Human-readable description.
|
|
47
|
+
type: The Arrow data type for this setting.
|
|
48
|
+
default: The default value (Python object).
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
name: str
|
|
53
|
+
desc: str
|
|
54
|
+
type: pa.DataType
|
|
55
|
+
default: Any
|
|
56
|
+
|
|
57
|
+
ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema(
|
|
58
|
+
[
|
|
59
|
+
pa.field("name", pa.string(), nullable=False),
|
|
60
|
+
pa.field("description", pa.string(), nullable=False),
|
|
61
|
+
pa.field("type", pa.binary(), nullable=False),
|
|
62
|
+
pa.field("default_value", pa.binary(), nullable=True),
|
|
63
|
+
] # type: ignore[arg-type]
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def serialize(self) -> bytes:
|
|
67
|
+
"""Serialize to Arrow IPC bytes."""
|
|
68
|
+
# Serialize type as a single-field schema
|
|
69
|
+
type_schema = schema(value=self.type)
|
|
70
|
+
type_bytes = type_schema.serialize().to_pybytes()
|
|
71
|
+
|
|
72
|
+
# Serialize default value if present
|
|
73
|
+
default_bytes: bytes | None = None
|
|
74
|
+
if self.default is not None:
|
|
75
|
+
default_batch = pa.RecordBatch.from_pydict({"value": [self.default]}, schema=type_schema)
|
|
76
|
+
default_bytes = serialize_record_batch_bytes(default_batch)
|
|
77
|
+
|
|
78
|
+
batch = pa.RecordBatch.from_pylist(
|
|
79
|
+
[
|
|
80
|
+
{
|
|
81
|
+
"name": self.name,
|
|
82
|
+
"description": self.desc,
|
|
83
|
+
"type": type_bytes,
|
|
84
|
+
"default_value": default_bytes,
|
|
85
|
+
}
|
|
86
|
+
],
|
|
87
|
+
schema=self.ARROW_SCHEMA,
|
|
88
|
+
)
|
|
89
|
+
return serialize_record_batch_bytes(batch)
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def deserialize(cls, batch: pa.RecordBatch) -> "Self":
|
|
93
|
+
"""Deserialize from Arrow RecordBatch."""
|
|
94
|
+
from vgi_rpc.utils import _validate_single_row_batch
|
|
95
|
+
|
|
96
|
+
row = _validate_single_row_batch(
|
|
97
|
+
batch,
|
|
98
|
+
cls.__name__,
|
|
99
|
+
required_fields=["name", "description", "type"],
|
|
100
|
+
)
|
|
101
|
+
# Deserialize type from schema bytes
|
|
102
|
+
type_schema = pa.ipc.read_schema(pa.py_buffer(cast(bytes, row["type"])))
|
|
103
|
+
data_type = type_schema.field("value").type
|
|
104
|
+
|
|
105
|
+
# Deserialize default value if present
|
|
106
|
+
default: Any = None
|
|
107
|
+
if row["default_value"] is not None:
|
|
108
|
+
default_batch, _ = deserialize_record_batch(cast(bytes, row["default_value"]))
|
|
109
|
+
default = default_batch.column("value")[0].as_py()
|
|
110
|
+
|
|
111
|
+
return cls(
|
|
112
|
+
name=cast(str, row["name"]),
|
|
113
|
+
desc=cast(str, row["description"]),
|
|
114
|
+
type=data_type,
|
|
115
|
+
default=default,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# Python type to Arrow type mapping
|
|
120
|
+
_PYTHON_TO_ARROW: dict[type, pa.DataType] = {
|
|
121
|
+
bool: pa.bool_(),
|
|
122
|
+
int: pa.int64(),
|
|
123
|
+
float: pa.float64(),
|
|
124
|
+
str: pa.string(),
|
|
125
|
+
bytes: pa.binary(),
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _resolve_arrow_type(type_hint: type | pa.DataType) -> pa.DataType:
|
|
130
|
+
"""Resolve Arrow type from either a Python type or Arrow DataType.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
type_hint: A Python type (bool, int, float, str, bytes) or Arrow DataType.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
The resolved Arrow DataType.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
TypeError: If the type cannot be resolved.
|
|
140
|
+
|
|
141
|
+
"""
|
|
142
|
+
# If already an Arrow DataType, use it directly
|
|
143
|
+
if isinstance(type_hint, pa.DataType):
|
|
144
|
+
return type_hint
|
|
145
|
+
|
|
146
|
+
# Map Python types to Arrow types
|
|
147
|
+
if type_hint in _PYTHON_TO_ARROW:
|
|
148
|
+
return _PYTHON_TO_ARROW[type_hint]
|
|
149
|
+
|
|
150
|
+
raise TypeError(
|
|
151
|
+
f"Cannot resolve Arrow type from: {type_hint}. "
|
|
152
|
+
"Use a Python type (bool, int, float, str, bytes) or Arrow DataType."
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@dataclass
|
|
157
|
+
class Setting:
|
|
158
|
+
"""Descriptor for declarative setting definitions using Annotated.
|
|
159
|
+
|
|
160
|
+
Use with Annotated type hints to declare settings in a Worker's Settings class.
|
|
161
|
+
The Arrow type is resolved from the base type in the Annotated hint.
|
|
162
|
+
|
|
163
|
+
Attributes:
|
|
164
|
+
desc: Human-readable description of the setting.
|
|
165
|
+
arrow_type: Optional explicit Arrow type (overrides inference from annotation).
|
|
166
|
+
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
desc: str = ""
|
|
170
|
+
arrow_type: pa.DataType | None = None
|
|
171
|
+
|
|
172
|
+
# Internal fields set during class creation
|
|
173
|
+
_name: str = field(default="", init=False, repr=False)
|
|
174
|
+
|
|
175
|
+
def __set_name__(self, owner: type, name: str) -> None:
|
|
176
|
+
"""Store the attribute name when assigned to a class."""
|
|
177
|
+
self._name = name
|
|
178
|
+
|
|
179
|
+
def __get__(self, obj: object | None, objtype: type | None = None) -> Any:
|
|
180
|
+
"""Get the setting value.
|
|
181
|
+
|
|
182
|
+
When accessed on the class, returns the descriptor itself.
|
|
183
|
+
When accessed on an instance, returns the default value.
|
|
184
|
+
"""
|
|
185
|
+
if obj is None:
|
|
186
|
+
return self
|
|
187
|
+
# Return the class-level default
|
|
188
|
+
return getattr(type(obj), self._name, None)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def extract_setting_specs(settings_cls: type) -> list[SettingSpec]:
|
|
192
|
+
"""Extract SettingSpec objects from a Settings class.
|
|
193
|
+
|
|
194
|
+
Parses a Settings class with Annotated type hints and extracts
|
|
195
|
+
SettingSpec objects for each setting definition.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
settings_cls: A class with Annotated[type, Setting(...)] attributes.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
List of SettingSpec objects extracted from the class.
|
|
202
|
+
|
|
203
|
+
Raises:
|
|
204
|
+
TypeError: If a setting's Arrow type cannot be resolved.
|
|
205
|
+
|
|
206
|
+
"""
|
|
207
|
+
specs: list[SettingSpec] = []
|
|
208
|
+
|
|
209
|
+
# Get type hints with extras (preserves Annotated)
|
|
210
|
+
try:
|
|
211
|
+
hints = get_type_hints(settings_cls, include_extras=True)
|
|
212
|
+
except Exception:
|
|
213
|
+
# If type hints can't be resolved, return empty list
|
|
214
|
+
return specs
|
|
215
|
+
|
|
216
|
+
for name, hint in hints.items():
|
|
217
|
+
# Skip non-Annotated hints
|
|
218
|
+
if get_origin(hint) is not Annotated:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
args = get_args(hint)
|
|
222
|
+
if len(args) < 2:
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
base_type = args[0]
|
|
226
|
+
|
|
227
|
+
# Find Setting in the annotation args
|
|
228
|
+
setting = None
|
|
229
|
+
for arg in args[1:]:
|
|
230
|
+
if isinstance(arg, Setting):
|
|
231
|
+
setting = arg
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
if setting is None:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
# Get default value from class attribute
|
|
238
|
+
default = getattr(settings_cls, name, None)
|
|
239
|
+
|
|
240
|
+
# Resolve Arrow type: explicit Setting.type takes precedence,
|
|
241
|
+
# otherwise resolve from base_type (Python type or Arrow DataType)
|
|
242
|
+
arrow_type = setting.arrow_type if setting.arrow_type is not None else _resolve_arrow_type(base_type)
|
|
243
|
+
|
|
244
|
+
specs.append(
|
|
245
|
+
SettingSpec(
|
|
246
|
+
name=name,
|
|
247
|
+
desc=setting.desc,
|
|
248
|
+
type=arrow_type,
|
|
249
|
+
default=default,
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
return specs
|
vgi/catalog/storage.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Storage for VGI catalog state.
|
|
4
|
+
|
|
5
|
+
This module provides a storage protocol and implementation for persisting
|
|
6
|
+
catalog attach_opaque_data and transaction_opaque_data state across worker processes.
|
|
7
|
+
|
|
8
|
+
Protocol:
|
|
9
|
+
CatalogStorage: Protocol for catalog state persistence.
|
|
10
|
+
|
|
11
|
+
Implementation:
|
|
12
|
+
CatalogStorageSqlite: SQLite-backed storage implementation.
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import random
|
|
17
|
+
import sqlite3
|
|
18
|
+
import uuid
|
|
19
|
+
from typing import Any, Protocol
|
|
20
|
+
|
|
21
|
+
from vgi.catalog.catalog_interface import AttachOpaqueData, TransactionOpaqueData
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"CatalogStorage",
|
|
25
|
+
"CatalogStorageSqlite",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_default_db_path() -> str:
|
|
30
|
+
"""Return the default SQLite database path for catalog storage."""
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
from platformdirs import user_state_dir
|
|
34
|
+
|
|
35
|
+
state_dir = Path(user_state_dir("vgi"))
|
|
36
|
+
state_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
return str((state_dir / "vgi_catalog.db").resolve())
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CatalogStorage(Protocol):
|
|
41
|
+
"""Storage protocol for VGI catalog state persistence.
|
|
42
|
+
|
|
43
|
+
Provides two access patterns for catalog state:
|
|
44
|
+
|
|
45
|
+
**Attachments** - Track catalog attachments with their options.
|
|
46
|
+
Stores the mapping from attach_opaque_data to catalog name and options.
|
|
47
|
+
|
|
48
|
+
**Transactions** - Track active transactions.
|
|
49
|
+
Stores transaction state for catalogs that support transactions.
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
# --- Attachment State ---
|
|
54
|
+
|
|
55
|
+
def attach_put(self, attach_opaque_data: AttachOpaqueData, catalog_name: str, options: dict[str, Any]) -> None:
|
|
56
|
+
"""Store attachment state.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
attach_opaque_data: Unique identifier for the attachment.
|
|
60
|
+
catalog_name: Name of the attached catalog.
|
|
61
|
+
options: Options passed during attachment.
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
def attach_get(self, attach_opaque_data: AttachOpaqueData) -> tuple[str, dict[str, Any]] | None:
|
|
67
|
+
"""Retrieve attachment state by attach_opaque_data.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
attach_opaque_data: Unique identifier for the attachment.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Tuple of (catalog_name, options), or None if not found.
|
|
74
|
+
|
|
75
|
+
"""
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def attach_delete(self, attach_opaque_data: AttachOpaqueData) -> None:
|
|
79
|
+
"""Delete attachment state.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
attach_opaque_data: Unique identifier for the attachment.
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
def attach_list(self) -> list[AttachOpaqueData]:
|
|
88
|
+
"""List all active attachments.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of all attach opaque data values in storage.
|
|
92
|
+
|
|
93
|
+
"""
|
|
94
|
+
...
|
|
95
|
+
|
|
96
|
+
# --- Transaction State ---
|
|
97
|
+
|
|
98
|
+
def transaction_put(
|
|
99
|
+
self, transaction_opaque_data: TransactionOpaqueData, attach_opaque_data: AttachOpaqueData, state: bytes
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Store transaction state.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
transaction_opaque_data: Unique identifier for the transaction.
|
|
105
|
+
attach_opaque_data: Attachment the transaction belongs to.
|
|
106
|
+
state: Serialized transaction state.
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
...
|
|
110
|
+
|
|
111
|
+
def transaction_get(self, transaction_opaque_data: TransactionOpaqueData) -> tuple[AttachOpaqueData, bytes] | None:
|
|
112
|
+
"""Retrieve transaction state.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
transaction_opaque_data: Unique identifier for the transaction.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Tuple of (attach_opaque_data, state bytes), or None if not found.
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
...
|
|
122
|
+
|
|
123
|
+
def transaction_delete(self, transaction_opaque_data: TransactionOpaqueData) -> None:
|
|
124
|
+
"""Delete transaction state.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
transaction_opaque_data: Unique identifier for the transaction.
|
|
128
|
+
|
|
129
|
+
"""
|
|
130
|
+
...
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class CatalogStorageSqlite:
|
|
134
|
+
"""SQLite-backed storage for VGI catalog state.
|
|
135
|
+
|
|
136
|
+
This implementation uses SQLite with WAL mode to allow multiple worker
|
|
137
|
+
processes to share catalog state. It manages two tables:
|
|
138
|
+
|
|
139
|
+
- catalog_attachments: Maps attach_opaque_data to catalog name and options
|
|
140
|
+
- catalog_transactions: Tracks active transactions
|
|
141
|
+
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
def __init__(self, db_path: str | None = None) -> None:
|
|
145
|
+
"""Initialize SQLite catalog storage.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
db_path: Path to the SQLite database file. If None, uses a default
|
|
149
|
+
location in the user's state directory.
|
|
150
|
+
|
|
151
|
+
"""
|
|
152
|
+
self.db_path = db_path if db_path is not None else _get_default_db_path()
|
|
153
|
+
self._ensure_tables()
|
|
154
|
+
|
|
155
|
+
def _connect(self) -> sqlite3.Connection:
|
|
156
|
+
"""Create a new database connection."""
|
|
157
|
+
conn = sqlite3.connect(self.db_path, timeout=30.0)
|
|
158
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
159
|
+
return conn
|
|
160
|
+
|
|
161
|
+
def _ensure_tables(self) -> None:
|
|
162
|
+
"""Create all storage tables if they don't exist."""
|
|
163
|
+
conn = self._connect()
|
|
164
|
+
try:
|
|
165
|
+
# Attachment table
|
|
166
|
+
conn.execute("""
|
|
167
|
+
CREATE TABLE IF NOT EXISTS catalog_attachments (
|
|
168
|
+
attach_opaque_data BLOB PRIMARY KEY,
|
|
169
|
+
catalog_name TEXT NOT NULL,
|
|
170
|
+
options_json TEXT NOT NULL,
|
|
171
|
+
created_at REAL DEFAULT (julianday('now'))
|
|
172
|
+
)
|
|
173
|
+
""")
|
|
174
|
+
# Transaction table
|
|
175
|
+
conn.execute("""
|
|
176
|
+
CREATE TABLE IF NOT EXISTS catalog_transactions (
|
|
177
|
+
transaction_opaque_data BLOB PRIMARY KEY,
|
|
178
|
+
attach_opaque_data BLOB NOT NULL,
|
|
179
|
+
state_data BLOB NOT NULL,
|
|
180
|
+
created_at REAL DEFAULT (julianday('now')),
|
|
181
|
+
FOREIGN KEY (attach_opaque_data) REFERENCES catalog_attachments(attach_opaque_data)
|
|
182
|
+
)
|
|
183
|
+
""")
|
|
184
|
+
conn.execute("""
|
|
185
|
+
CREATE INDEX IF NOT EXISTS idx_transactions_attach
|
|
186
|
+
ON catalog_transactions(attach_opaque_data)
|
|
187
|
+
""")
|
|
188
|
+
conn.commit()
|
|
189
|
+
finally:
|
|
190
|
+
conn.close()
|
|
191
|
+
|
|
192
|
+
# --- Attachment State ---
|
|
193
|
+
|
|
194
|
+
def attach_put(self, attach_opaque_data: AttachOpaqueData, catalog_name: str, options: dict[str, Any]) -> None:
|
|
195
|
+
"""Store attachment state."""
|
|
196
|
+
import json
|
|
197
|
+
|
|
198
|
+
# Opportunistically clean old entries (1% of calls)
|
|
199
|
+
if random.random() < 0.01:
|
|
200
|
+
self.cleanup_old_entries(max_age_days=7.0)
|
|
201
|
+
|
|
202
|
+
options_json = json.dumps(options)
|
|
203
|
+
|
|
204
|
+
conn = self._connect()
|
|
205
|
+
try:
|
|
206
|
+
conn.execute(
|
|
207
|
+
"""
|
|
208
|
+
INSERT OR REPLACE INTO catalog_attachments
|
|
209
|
+
(attach_opaque_data, catalog_name, options_json, created_at)
|
|
210
|
+
VALUES (?, ?, ?, julianday('now'))
|
|
211
|
+
""",
|
|
212
|
+
(attach_opaque_data, catalog_name, options_json),
|
|
213
|
+
)
|
|
214
|
+
conn.commit()
|
|
215
|
+
finally:
|
|
216
|
+
conn.close()
|
|
217
|
+
|
|
218
|
+
def attach_get(self, attach_opaque_data: AttachOpaqueData) -> tuple[str, dict[str, Any]] | None:
|
|
219
|
+
"""Retrieve attachment state by attach_opaque_data."""
|
|
220
|
+
import json
|
|
221
|
+
|
|
222
|
+
conn = self._connect()
|
|
223
|
+
try:
|
|
224
|
+
cursor = conn.execute(
|
|
225
|
+
"""SELECT catalog_name, options_json
|
|
226
|
+
FROM catalog_attachments WHERE attach_opaque_data = ?""",
|
|
227
|
+
(attach_opaque_data,),
|
|
228
|
+
)
|
|
229
|
+
row = cursor.fetchone()
|
|
230
|
+
finally:
|
|
231
|
+
conn.close()
|
|
232
|
+
|
|
233
|
+
if row is None:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
catalog_name: str = row[0]
|
|
237
|
+
options: dict[str, Any] = json.loads(row[1])
|
|
238
|
+
return (catalog_name, options)
|
|
239
|
+
|
|
240
|
+
def attach_delete(self, attach_opaque_data: AttachOpaqueData) -> None:
|
|
241
|
+
"""Delete attachment state."""
|
|
242
|
+
conn = self._connect()
|
|
243
|
+
try:
|
|
244
|
+
# Delete associated transactions first
|
|
245
|
+
conn.execute(
|
|
246
|
+
"DELETE FROM catalog_transactions WHERE attach_opaque_data = ?",
|
|
247
|
+
(attach_opaque_data,),
|
|
248
|
+
)
|
|
249
|
+
conn.execute(
|
|
250
|
+
"DELETE FROM catalog_attachments WHERE attach_opaque_data = ?",
|
|
251
|
+
(attach_opaque_data,),
|
|
252
|
+
)
|
|
253
|
+
conn.commit()
|
|
254
|
+
finally:
|
|
255
|
+
conn.close()
|
|
256
|
+
|
|
257
|
+
def attach_list(self) -> list[AttachOpaqueData]:
|
|
258
|
+
"""List all active attachment IDs."""
|
|
259
|
+
conn = self._connect()
|
|
260
|
+
try:
|
|
261
|
+
cursor = conn.execute("SELECT attach_opaque_data FROM catalog_attachments")
|
|
262
|
+
return [AttachOpaqueData(row[0]) for row in cursor.fetchall()]
|
|
263
|
+
finally:
|
|
264
|
+
conn.close()
|
|
265
|
+
|
|
266
|
+
# --- Transaction State ---
|
|
267
|
+
|
|
268
|
+
def transaction_put(
|
|
269
|
+
self, transaction_opaque_data: TransactionOpaqueData, attach_opaque_data: AttachOpaqueData, state: bytes
|
|
270
|
+
) -> None:
|
|
271
|
+
"""Store transaction state."""
|
|
272
|
+
# Opportunistically clean old entries (1% of calls)
|
|
273
|
+
if random.random() < 0.01:
|
|
274
|
+
self.cleanup_old_entries(max_age_days=7.0)
|
|
275
|
+
|
|
276
|
+
conn = self._connect()
|
|
277
|
+
try:
|
|
278
|
+
conn.execute(
|
|
279
|
+
"""
|
|
280
|
+
INSERT OR REPLACE INTO catalog_transactions
|
|
281
|
+
(transaction_opaque_data, attach_opaque_data, state_data, created_at)
|
|
282
|
+
VALUES (?, ?, ?, julianday('now'))
|
|
283
|
+
""",
|
|
284
|
+
(transaction_opaque_data, attach_opaque_data, state),
|
|
285
|
+
)
|
|
286
|
+
conn.commit()
|
|
287
|
+
finally:
|
|
288
|
+
conn.close()
|
|
289
|
+
|
|
290
|
+
def transaction_get(self, transaction_opaque_data: TransactionOpaqueData) -> tuple[AttachOpaqueData, bytes] | None:
|
|
291
|
+
"""Retrieve transaction state."""
|
|
292
|
+
conn = self._connect()
|
|
293
|
+
try:
|
|
294
|
+
cursor = conn.execute(
|
|
295
|
+
"""SELECT attach_opaque_data, state_data
|
|
296
|
+
FROM catalog_transactions WHERE transaction_opaque_data = ?""",
|
|
297
|
+
(transaction_opaque_data,),
|
|
298
|
+
)
|
|
299
|
+
row = cursor.fetchone()
|
|
300
|
+
finally:
|
|
301
|
+
conn.close()
|
|
302
|
+
|
|
303
|
+
if row is None:
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
return (AttachOpaqueData(row[0]), row[1])
|
|
307
|
+
|
|
308
|
+
def transaction_delete(self, transaction_opaque_data: TransactionOpaqueData) -> None:
|
|
309
|
+
"""Delete transaction state."""
|
|
310
|
+
conn = self._connect()
|
|
311
|
+
try:
|
|
312
|
+
conn.execute(
|
|
313
|
+
"DELETE FROM catalog_transactions WHERE transaction_opaque_data = ?",
|
|
314
|
+
(transaction_opaque_data,),
|
|
315
|
+
)
|
|
316
|
+
conn.commit()
|
|
317
|
+
finally:
|
|
318
|
+
conn.close()
|
|
319
|
+
|
|
320
|
+
# --- Utility Methods ---
|
|
321
|
+
|
|
322
|
+
def generate_attach_opaque_data(self) -> AttachOpaqueData:
|
|
323
|
+
"""Generate a new unique attach_opaque_data.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
A new AttachOpaqueData based on UUID4.
|
|
327
|
+
|
|
328
|
+
"""
|
|
329
|
+
return AttachOpaqueData(uuid.uuid4().bytes)
|
|
330
|
+
|
|
331
|
+
def generate_transaction_opaque_data(self) -> TransactionOpaqueData:
|
|
332
|
+
"""Generate a new unique transaction_opaque_data.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
A new TransactionOpaqueData based on UUID4.
|
|
336
|
+
|
|
337
|
+
"""
|
|
338
|
+
return TransactionOpaqueData(uuid.uuid4().bytes)
|
|
339
|
+
|
|
340
|
+
# --- Maintenance ---
|
|
341
|
+
|
|
342
|
+
def cleanup_old_entries(self, max_age_days: float = 7.0) -> int:
|
|
343
|
+
"""Remove entries older than the specified age from all tables.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
max_age_days: Maximum age in days for entries to keep.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Total number of entries deleted.
|
|
350
|
+
|
|
351
|
+
"""
|
|
352
|
+
conn = self._connect()
|
|
353
|
+
try:
|
|
354
|
+
# Delete old transactions first (foreign key constraint)
|
|
355
|
+
cursor1 = conn.execute(
|
|
356
|
+
"""
|
|
357
|
+
DELETE FROM catalog_transactions
|
|
358
|
+
WHERE julianday('now') - created_at > ?
|
|
359
|
+
""",
|
|
360
|
+
(max_age_days,),
|
|
361
|
+
)
|
|
362
|
+
cursor2 = conn.execute(
|
|
363
|
+
"""
|
|
364
|
+
DELETE FROM catalog_attachments
|
|
365
|
+
WHERE julianday('now') - created_at > ?
|
|
366
|
+
""",
|
|
367
|
+
(max_age_days,),
|
|
368
|
+
)
|
|
369
|
+
conn.commit()
|
|
370
|
+
return int(cursor1.rowcount) + int(cursor2.rowcount)
|
|
371
|
+
finally:
|
|
372
|
+
conn.close()
|
vgi/client/__init__.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""VGI client package for communicating with VGI workers.
|
|
4
|
+
|
|
5
|
+
This package provides:
|
|
6
|
+
- Client: A class for programmatic interaction with VGI workers, including
|
|
7
|
+
both function invocation and catalog operations
|
|
8
|
+
- ClientError: Exception raised by Client function operations
|
|
9
|
+
- CatalogClientMixin: Mixin class providing catalog operations
|
|
10
|
+
- OutputWriter: Helper for writing output in various formats
|
|
11
|
+
- main: CLI entry point
|
|
12
|
+
|
|
13
|
+
Usage (API):
|
|
14
|
+
from vgi.client import Client, ClientError
|
|
15
|
+
from vgi.arguments import Arguments
|
|
16
|
+
|
|
17
|
+
with Client("./my_worker.py") as client:
|
|
18
|
+
for batch in client.table_in_out_function(
|
|
19
|
+
function_name="echo",
|
|
20
|
+
arguments=Arguments(positional=[], named={}),
|
|
21
|
+
input=input_batches,
|
|
22
|
+
):
|
|
23
|
+
process(batch)
|
|
24
|
+
|
|
25
|
+
Usage (Catalog API):
|
|
26
|
+
from vgi.client import Client
|
|
27
|
+
|
|
28
|
+
client = Client("./my_worker")
|
|
29
|
+
result = client.catalog_attach(
|
|
30
|
+
name="my_catalog", options={}, data_version_spec=None, implementation_version=None
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
Usage (CLI):
|
|
34
|
+
vgi-client --input data.parquet --function echo
|
|
35
|
+
vgi-client --input data.parquet --function sum_all_columns
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from typing import TYPE_CHECKING, Any
|
|
40
|
+
|
|
41
|
+
from vgi.client.catalog_mixin import CatalogClientMixin
|
|
42
|
+
from vgi.client.client import Client, ClientError, ResumableTableScan, ResumeUnsupported
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
from vgi.client.cli import OutputWriter, main
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
"CatalogClientMixin",
|
|
49
|
+
"Client",
|
|
50
|
+
"ClientError",
|
|
51
|
+
"OutputWriter",
|
|
52
|
+
"ResumableTableScan",
|
|
53
|
+
"ResumeUnsupported",
|
|
54
|
+
"main",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Lazy-load the CLI surface. ``vgi.client.cli`` transitively imports
|
|
59
|
+
# ``pyarrow.parquet`` / ``pyarrow._s3fs`` / ``pyarrow._gcsfs`` etc., which add
|
|
60
|
+
# ~2 seconds to the cold import path. Programmatic users of ``Client`` don't
|
|
61
|
+
# need any of that; only the ``vgi-client`` CLI entry point does.
|
|
62
|
+
def __getattr__(name: str) -> Any:
|
|
63
|
+
if name in {"OutputWriter", "main"}:
|
|
64
|
+
from vgi.client import cli
|
|
65
|
+
|
|
66
|
+
return getattr(cli, name)
|
|
67
|
+
raise AttributeError(f"module 'vgi.client' has no attribute {name!r}")
|