vgi-python 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. vgi/__init__.py +152 -0
  2. vgi/_duckdb.py +62 -0
  3. vgi/_storage_profile.py +132 -0
  4. vgi/_test_fixtures/__init__.py +20 -0
  5. vgi/_test_fixtures/accumulate/__init__.py +19 -0
  6. vgi/_test_fixtures/accumulate/worker.py +762 -0
  7. vgi/_test_fixtures/aggregate/__init__.py +62 -0
  8. vgi/_test_fixtures/aggregate/_common.py +21 -0
  9. vgi/_test_fixtures/aggregate/basic.py +232 -0
  10. vgi/_test_fixtures/aggregate/dynamic.py +409 -0
  11. vgi/_test_fixtures/aggregate/generic.py +86 -0
  12. vgi/_test_fixtures/aggregate/listagg.py +71 -0
  13. vgi/_test_fixtures/aggregate/percentile.py +107 -0
  14. vgi/_test_fixtures/aggregate/streaming.py +192 -0
  15. vgi/_test_fixtures/aggregate/varargs.py +75 -0
  16. vgi/_test_fixtures/aggregate/window.py +380 -0
  17. vgi/_test_fixtures/attach_options.py +308 -0
  18. vgi/_test_fixtures/bad_protocol.py +62 -0
  19. vgi/_test_fixtures/cancellable.py +336 -0
  20. vgi/_test_fixtures/catalog.py +813 -0
  21. vgi/_test_fixtures/http_server.py +394 -0
  22. vgi/_test_fixtures/nest_tensor.py +614 -0
  23. vgi/_test_fixtures/orchard_catalog.py +47 -0
  24. vgi/_test_fixtures/projection_repro/__init__.py +6 -0
  25. vgi/_test_fixtures/projection_repro/worker.py +454 -0
  26. vgi/_test_fixtures/scalar/__init__.py +116 -0
  27. vgi/_test_fixtures/scalar/_common.py +69 -0
  28. vgi/_test_fixtures/scalar/arithmetic.py +321 -0
  29. vgi/_test_fixtures/scalar/binary.py +120 -0
  30. vgi/_test_fixtures/scalar/formatting.py +176 -0
  31. vgi/_test_fixtures/scalar/geo.py +300 -0
  32. vgi/_test_fixtures/scalar/null_handling.py +107 -0
  33. vgi/_test_fixtures/scalar/random_demo.py +171 -0
  34. vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
  35. vgi/_test_fixtures/scalar/type_info.py +219 -0
  36. vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
  37. vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
  38. vgi/_test_fixtures/simple_writable.py +793 -0
  39. vgi/_test_fixtures/table/__init__.py +221 -0
  40. vgi/_test_fixtures/table/_common.py +162 -0
  41. vgi/_test_fixtures/table/batch_index.py +283 -0
  42. vgi/_test_fixtures/table/batch_index_broken.py +200 -0
  43. vgi/_test_fixtures/table/catalog_scans.py +162 -0
  44. vgi/_test_fixtures/table/filters.py +1005 -0
  45. vgi/_test_fixtures/table/late_materialization.py +249 -0
  46. vgi/_test_fixtures/table/make_series.py +273 -0
  47. vgi/_test_fixtures/table/misc.py +499 -0
  48. vgi/_test_fixtures/table/order_modes.py +164 -0
  49. vgi/_test_fixtures/table/pairs.py +437 -0
  50. vgi/_test_fixtures/table/partition_columns.py +472 -0
  51. vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
  52. vgi/_test_fixtures/table/profiling_example.py +195 -0
  53. vgi/_test_fixtures/table/required_filters.py +234 -0
  54. vgi/_test_fixtures/table/sequence.py +710 -0
  55. vgi/_test_fixtures/table/settings.py +426 -0
  56. vgi/_test_fixtures/table/transaction_storage.py +162 -0
  57. vgi/_test_fixtures/table/tt_pushdown.py +191 -0
  58. vgi/_test_fixtures/table/versioned.py +230 -0
  59. vgi/_test_fixtures/table_in_out.py +1392 -0
  60. vgi/_test_fixtures/versioned.py +155 -0
  61. vgi/_test_fixtures/versioned_tables.py +595 -0
  62. vgi/_test_fixtures/worker.py +1631 -0
  63. vgi/_test_fixtures/writable/__init__.py +8 -0
  64. vgi/_test_fixtures/writable/generic.py +236 -0
  65. vgi/_test_fixtures/writable/table.py +149 -0
  66. vgi/_test_fixtures/writable/worker.py +1148 -0
  67. vgi/aggregate_function.py +607 -0
  68. vgi/argument_spec.py +472 -0
  69. vgi/arguments.py +1747 -0
  70. vgi/auth.py +55 -0
  71. vgi/catalog/__init__.py +88 -0
  72. vgi/catalog/attach_option.py +206 -0
  73. vgi/catalog/catalog_interface.py +2767 -0
  74. vgi/catalog/descriptors.py +870 -0
  75. vgi/catalog/duckdb_statistics.py +377 -0
  76. vgi/catalog/secret_type.py +96 -0
  77. vgi/catalog/setting.py +253 -0
  78. vgi/catalog/storage.py +372 -0
  79. vgi/client/__init__.py +67 -0
  80. vgi/client/catalog_mixin.py +1251 -0
  81. vgi/client/cli.py +582 -0
  82. vgi/client/cli_catalog.py +182 -0
  83. vgi/client/cli_schema.py +270 -0
  84. vgi/client/cli_table.py +907 -0
  85. vgi/client/cli_transaction.py +97 -0
  86. vgi/client/cli_utils.py +441 -0
  87. vgi/client/cli_view.py +303 -0
  88. vgi/client/client.py +2183 -0
  89. vgi/exceptions.py +205 -0
  90. vgi/function.py +245 -0
  91. vgi/function_storage.py +1636 -0
  92. vgi/function_storage_azure_sql.py +922 -0
  93. vgi/function_storage_cf_do.py +740 -0
  94. vgi/http/__init__.py +25 -0
  95. vgi/http/demo_storage.py +212 -0
  96. vgi/http/worker_page.py +1252 -0
  97. vgi/invocation.py +154 -0
  98. vgi/logging_config.py +93 -0
  99. vgi/meta_worker.py +661 -0
  100. vgi/metadata.py +1403 -0
  101. vgi/otel.py +406 -0
  102. vgi/protocol.py +2418 -0
  103. vgi/protocol_version.txt +1 -0
  104. vgi/py.typed +0 -0
  105. vgi/scalar_function.py +1211 -0
  106. vgi/schema_utils.py +234 -0
  107. vgi/secret_protocol.py +124 -0
  108. vgi/secret_service.py +238 -0
  109. vgi/serve.py +769 -0
  110. vgi/table_buffering_function.py +443 -0
  111. vgi/table_filter_pushdown.py +1528 -0
  112. vgi/table_function.py +1130 -0
  113. vgi/table_in_out_function.py +383 -0
  114. vgi/transactor/__init__.py +24 -0
  115. vgi/transactor/_duckdb_compat.py +27 -0
  116. vgi/transactor/client.py +137 -0
  117. vgi/transactor/protocol.py +149 -0
  118. vgi/transactor/server.py +740 -0
  119. vgi/worker.py +4761 -0
  120. vgi_python-0.8.0.dist-info/METADATA +735 -0
  121. vgi_python-0.8.0.dist-info/RECORD +124 -0
  122. vgi_python-0.8.0.dist-info/WHEEL +4 -0
  123. vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
  124. vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
@@ -0,0 +1,383 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Framework for implementing streaming table-in-table-out functions.
4
+
5
+ TableInOutGenerator processes input batches via a per-batch callback.
6
+ Each call to process() emits one output batch via out.emit().
7
+
8
+ TableInOutFunction provides a simpler callback API (transform/finish)
9
+ with automatic state serialization for distributed processing.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ from dataclasses import dataclass
16
+ from typing import TYPE_CHECKING, final, get_args, get_origin
17
+
18
+ import pyarrow as pa
19
+ from vgi_rpc import ArrowSerializableDataclass
20
+ from vgi_rpc.rpc import OutputCollector
21
+ from vgi_rpc.utils import empty_batch
22
+
23
+ from vgi.function_storage import BoundStorage, FrameworkNS
24
+ from vgi.invocation import (
25
+ BindResponse,
26
+ )
27
+ from vgi.table_function import (
28
+ _ON_CANCEL_CAVEATS,
29
+ BindParams,
30
+ ProcessParams,
31
+ TableFunctionBase,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ pass
36
+
37
+ __all__ = [
38
+ "TableInOutGenerator",
39
+ "TableInOutFunction",
40
+ "TableInOutFunctionStateNoOp",
41
+ "pack_int_cursor",
42
+ "unpack_int_cursor",
43
+ ]
44
+
45
+
46
+ # --- Cursor helpers for cursor-based finalize streams -----------------------
47
+ #
48
+ # The framework's BufferedFinalizeState carries an opaque ``cursor: bytes``
49
+ # wire-state field. The canonical encoding is the int64 of the last
50
+ # state_log id consumed; these helpers make that intent explicit at
51
+ # call sites without coupling user code to struct layout.
52
+
53
+
54
+ def pack_int_cursor(value: int) -> bytes:
55
+ """Encode a signed int64 cursor (e.g., last log_id consumed)."""
56
+ return value.to_bytes(8, "little", signed=True)
57
+
58
+
59
+ def unpack_int_cursor(cursor: bytes, default: int = -1) -> int:
60
+ """Decode a packed int64 cursor; ``b""`` returns ``default``.
61
+
62
+ Use ``default=-1`` (before-first sentinel) to start at the beginning
63
+ of a state_log when no prior cursor exists.
64
+ """
65
+ if not cursor:
66
+ return default
67
+ return int.from_bytes(cursor, "little", signed=True)
68
+
69
+
70
+ class TableInOutGenerator[TArgs, TState = None](TableFunctionBase[TArgs]):
71
+ """Base class for streaming table functions that transform Arrow RecordBatches.
72
+
73
+ Each call to process() should emit exactly one output batch via out.emit().
74
+ Use TState to persist state between process() calls.
75
+
76
+ For functions that need a finalize phase (e.g., aggregation), override
77
+ finalize() to return the final output batches.
78
+
79
+ """
80
+
81
+ # Subclasses opt into framework-managed state by setting this to a
82
+ # concrete ArrowSerializableDataclass type. Default None means
83
+ # process()/finalize() get state=None and the framework skips its
84
+ # round-trip. TableInOutFunction's __init_subclass__ infers this from
85
+ # the TState type parameter when a subclass declares one. Constrained
86
+ # to ArrowSerializableDataclass so the framework can call
87
+ # serialize_to_bytes / deserialize_from_bytes on instances without
88
+ # further type narrowing at the call site.
89
+ state_class: type[ArrowSerializableDataclass] | None = None
90
+
91
+ @classmethod
92
+ def has_finalize_override(cls) -> bool:
93
+ """Whether this class's ``finalize``/``finish`` represents real work.
94
+
95
+ Returns True iff either:
96
+
97
+ - The class's ``Meta`` declares ``has_finalize`` as ``True`` or ``False``
98
+ (explicit override — the declared value wins, even if it disagrees
99
+ with the auto-detection).
100
+ - Auto-detection finds a user subclass (one that is itself a
101
+ ``TableInOutGenerator`` subclass) strictly above the VGI bases in
102
+ the MRO defining a callable ``finish`` or ``finalize`` attribute.
103
+
104
+ The framework uses this to decide whether to advertise a finalize
105
+ callback to DuckDB; DuckDB rejects LATERAL with correlated input on
106
+ table functions that register ``in_out_function_final``.
107
+ """
108
+ # Explicit Meta override.
109
+ meta = getattr(cls, "Meta", None)
110
+ explicit = getattr(meta, "has_finalize", None) if meta is not None else None
111
+ if explicit is not None:
112
+ return bool(explicit)
113
+
114
+ # Auto-detect.
115
+ bases: set[type] = {TableInOutGenerator, TableInOutFunction}
116
+ for klass in cls.__mro__:
117
+ if klass in bases:
118
+ return False
119
+ # Only count overrides defined on an actual TableInOut subclass, so
120
+ # an unrelated mixin with an identically-named attribute can't
121
+ # trigger a false positive.
122
+ if not (isinstance(klass, type) and issubclass(klass, TableInOutGenerator)):
123
+ continue
124
+ for attr_name in ("finish", "finalize"):
125
+ raw = klass.__dict__.get(attr_name)
126
+ if raw is None:
127
+ continue
128
+ if isinstance(raw, (classmethod, staticmethod)):
129
+ raw = raw.__func__
130
+ if callable(raw):
131
+ return True
132
+ return False
133
+
134
+ @classmethod
135
+ def on_bind(
136
+ cls,
137
+ params: BindParams[TArgs],
138
+ ) -> BindResponse:
139
+ """Pass-through default — output schema is the input schema.
140
+
141
+ Override to compute a dynamic output type or validate arguments.
142
+ See ``TableFunctionBase.on_bind`` for the broader contract.
143
+ """
144
+ assert params.bind_call.input_schema is not None
145
+ return BindResponse(output_schema=params.bind_call.input_schema)
146
+
147
+ # bind / on_init / global_init are defined on TableFunctionBase.
148
+
149
+ @classmethod
150
+ def initial_state(cls, params: ProcessParams[TArgs]) -> TState | None:
151
+ """Create initial processing state. Override when TState is used.
152
+
153
+ Called once during init to create the state object that will be
154
+ passed to process() on each input batch.
155
+
156
+ Args:
157
+ params: Process parameters including arguments and schemas.
158
+
159
+ Returns:
160
+ Initial state, or None if no state is needed.
161
+
162
+ """
163
+ return None
164
+
165
+ @classmethod
166
+ def process(
167
+ cls,
168
+ params: ProcessParams[TArgs],
169
+ state: TState,
170
+ batch: pa.RecordBatch,
171
+ out: OutputCollector,
172
+ ) -> None:
173
+ """Process one input batch.
174
+
175
+ Called once per input batch during the INPUT phase. Must call
176
+ out.emit(batch) exactly once to produce output.
177
+
178
+ Use out.client_log(level, message) for in-band logging.
179
+
180
+ Args:
181
+ params: Process parameters including arguments and schemas.
182
+ state: Mutable state persisted between calls. None if TState not used.
183
+ batch: The input RecordBatch to process.
184
+ out: OutputCollector for emitting output and logging.
185
+
186
+ """
187
+ out.emit(batch)
188
+
189
+ @classmethod
190
+ def finalize(cls, params: ProcessParams[TArgs]) -> list[pa.RecordBatch]:
191
+ """Finalize processing and produce any remaining output.
192
+
193
+ Called after all input batches have been processed during the
194
+ FINALIZE phase. Override to emit buffered or aggregated results.
195
+
196
+ Args:
197
+ params: Process parameters including arguments and schemas.
198
+
199
+ Returns:
200
+ List of output RecordBatches, or empty list if no finalization needed.
201
+
202
+ """
203
+ return []
204
+
205
+ @classmethod
206
+ def on_cancel(cls, params: ProcessParams[TArgs], state: TState | None) -> None: # noqa: D102
207
+ pass
208
+
209
+ on_cancel.__func__.__doc__ = ( # type: ignore[attr-defined]
210
+ f"""Release resources when the stream is cancelled before natural end.
211
+
212
+ The VGI C++ extension fires this hook when a DuckDB query tears
213
+ down a VGI table-in-out scan early (LIMIT clause upstream, user
214
+ break, Ctrl-C, exception unwind). Override to release expensive
215
+ per-stream resources the function was holding in ``state``
216
+ (database cursors, LLM streaming sessions, file handles, GPU
217
+ buffers).
218
+
219
+ {_ON_CANCEL_CAVEATS}
220
+
221
+ Args:
222
+ params: Process parameters (same as ``process()`` received).
223
+ state: The current user state; ``None`` when state is unused.
224
+ """
225
+ )
226
+
227
+
228
+ @dataclass(slots=True, frozen=True, kw_only=True)
229
+ class TableInOutFunctionStateNoOp(ArrowSerializableDataclass):
230
+ """No-op state class for TableInOutFunction when no state is needed."""
231
+
232
+
233
+ class TableInOutFunction[
234
+ TArgs,
235
+ TState: ArrowSerializableDataclass = TableInOutFunctionStateNoOp,
236
+ ](TableInOutGenerator[TArgs, TState]):
237
+ """Simplified base class using transform/finish callbacks.
238
+
239
+ This class provides a simpler API for common use cases where you don't need
240
+ to work directly with OutputCollector. Instead of implementing process()
241
+ directly, you override transform() and optionally finish() as regular methods.
242
+
243
+ TState is optional. If not provided, state management is disabled and
244
+ transform() will always receive state=None. When TState is an
245
+ ArrowSerializableDataclass, state is automatically saved to storage
246
+ after each transform() call for distributed processing.
247
+
248
+ """
249
+
250
+ state_class: type[TState] | None = None
251
+
252
+ def __init_subclass__(cls, **kwargs: object) -> None:
253
+ """Automatically infer the state_class from the generic type parameters."""
254
+ super().__init_subclass__(**kwargs)
255
+
256
+ # Iterate over the original bases to find the generic parameters
257
+ orig_bases = getattr(cls, "__orig_bases__", ())
258
+ for base in orig_bases:
259
+ origin = get_origin(base)
260
+ if origin is None:
261
+ continue # not a generic base
262
+ args = get_args(base)
263
+ if len(args) >= 2:
264
+ # Assign the second type parameter to state_class
265
+ cls.state_class = args[1]
266
+ break
267
+
268
+ @classmethod
269
+ def transform(
270
+ cls,
271
+ batch: pa.RecordBatch,
272
+ params: ProcessParams[TArgs],
273
+ state: TState | None,
274
+ ) -> pa.RecordBatch | list[pa.RecordBatch]:
275
+ """Transform a single input batch.
276
+
277
+ Override this method to implement your transformation logic. This is called
278
+ once for each input batch.
279
+
280
+ Args:
281
+ batch: Input RecordBatch to transform.
282
+ params: ProcessParams containing arguments, schemas, and settings.
283
+ state: Mutable state that should be updated and will be serialized as needed.
284
+
285
+ Returns:
286
+ Either:
287
+ - A single pa.RecordBatch: The transformed output
288
+ - A list of pa.RecordBatch: Multiple outputs (will be concatenated)
289
+
290
+ """
291
+ return batch
292
+
293
+ @classmethod
294
+ def finish(
295
+ cls,
296
+ params: ProcessParams[TArgs],
297
+ states: list[TState],
298
+ ) -> list[pa.RecordBatch]:
299
+ """Return final batches after all input is processed.
300
+
301
+ Override this method to emit results after all input batches have been
302
+ processed. This is useful for aggregations, sorting, or any operation
303
+ that needs to see all data before producing output.
304
+
305
+ Returns:
306
+ List of pa.RecordBatch to emit as final output.
307
+ Return an empty list if no finalization output is needed.
308
+
309
+ """
310
+ return []
311
+
312
+ @classmethod
313
+ def initial_state(
314
+ cls,
315
+ params: ProcessParams[TArgs],
316
+ ) -> TState | None:
317
+ """Create the initial state for processing.
318
+
319
+ Override this method to initialize the state object before processing
320
+ begins.
321
+
322
+ Args:
323
+ params: ProcessParams containing arguments, schemas, and settings.
324
+
325
+ Returns:
326
+ An instance of TState representing the initial state.
327
+
328
+ """
329
+ return None
330
+
331
+ @final
332
+ @classmethod
333
+ def process(
334
+ cls,
335
+ params: ProcessParams[TArgs],
336
+ state: TState,
337
+ batch: pa.RecordBatch,
338
+ out: OutputCollector,
339
+ ) -> None:
340
+ """Process input batches by calling transform(). Do not override.
341
+
342
+ This method implements the exchange protocol by calling your transform()
343
+ method for each input batch. State is automatically saved to storage
344
+ after each call for distributed processing.
345
+
346
+ """
347
+ result = cls.transform(batch, params, state)
348
+
349
+ # Save state for distributed processing (upsert semantics)
350
+ if state is not None:
351
+ params.storage.state_put(
352
+ FrameworkNS.TIO_STATE, BoundStorage.pack_int_key(os.getpid()), state.serialize_to_bytes()
353
+ )
354
+
355
+ # Handle single batch or list of batches — exchange must emit exactly one
356
+ if isinstance(result, list):
357
+ if not result:
358
+ out.emit(empty_batch(params.output_schema))
359
+ elif len(result) == 1:
360
+ out.emit(result[0])
361
+ else:
362
+ combined = pa.Table.from_batches(result).combine_chunks()
363
+ out.emit(combined.to_batches()[0])
364
+ else:
365
+ out.emit(result)
366
+
367
+ @final
368
+ @classmethod
369
+ def finalize(cls, params: ProcessParams[TArgs]) -> list[pa.RecordBatch]:
370
+ """Emit final batches by calling finish(). Do not override.
371
+
372
+ This method collects serialized states from all workers, deserializes
373
+ them, and passes them to your finish() method.
374
+
375
+ """
376
+ if cls.state_class is not None and cls.state_class is not TableInOutFunctionStateNoOp:
377
+ states = [
378
+ cls.state_class.deserialize_from_bytes(v) for _k, v in params.storage.state_drain(FrameworkNS.TIO_STATE)
379
+ ]
380
+ else:
381
+ states = []
382
+
383
+ return cls.finish(params, states)
@@ -0,0 +1,24 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """db-transactor — transactional database access for VGI workers.
4
+
5
+ The transactor is a long-lived subprocess that owns a single DuckDB
6
+ connection. VGI worker processes communicate with it via ``vgi_rpc``
7
+ over Unix domain sockets, using the same streaming exchange patterns
8
+ that DuckDB uses with VGI workers.
9
+
10
+ Architecture::
11
+
12
+ VGI Worker(s) ──── vgi_rpc (Unix socket) ──── db-transactor
13
+
14
+ DuckDB file
15
+
16
+ """
17
+
18
+ from vgi.transactor.client import TransactorClient
19
+ from vgi.transactor.protocol import TransactorProtocol
20
+
21
+ __all__ = [
22
+ "TransactorClient",
23
+ "TransactorProtocol",
24
+ ]
@@ -0,0 +1,27 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """Type-safe shim for VGI's ``subcursor()`` extension to duckdb-python.
4
+
5
+ The VGI fork of duckdb-python adds ``DuckDBPyConnection.subcursor()`` so
6
+ callers can issue reads inside an open write transaction. That change has
7
+ not yet been merged into haybarn or upstreamed to duckdb — only local fork
8
+ builds provide it. The upstream type stubs don't know about it, so we cast
9
+ through a small Protocol here rather than scatter ``# type: ignore``
10
+ across the codebase.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import TYPE_CHECKING, Protocol, cast
16
+
17
+ if TYPE_CHECKING:
18
+ import duckdb
19
+
20
+
21
+ class _SupportsSubcursor(Protocol):
22
+ def subcursor(self) -> duckdb.DuckDBPyConnection: ...
23
+
24
+
25
+ def subcursor(conn: duckdb.DuckDBPyConnection) -> duckdb.DuckDBPyConnection:
26
+ """Return a read cursor that shares ``conn``'s transaction context."""
27
+ return cast(_SupportsSubcursor, conn).subcursor()
@@ -0,0 +1,137 @@
1
+ # Copyright 2025, 2026 Query Farm LLC - https://query.farm
2
+
3
+ """TransactorClient — connects to a db-transactor subprocess.
4
+
5
+ Handles auto-spawning the transactor process if one isn't running,
6
+ and provides a typed ``vgi_rpc`` proxy for RPC calls.
7
+
8
+ The transactor manages multiple databases internally (one per attach_opaque_data),
9
+ so a single transactor process serves all catalog attachments.
10
+
11
+ Usage::
12
+
13
+ client = TransactorClient()
14
+ proxy = client.get_proxy()
15
+ proxy.register(attach_opaque_data)
16
+ tx_id = proxy.begin(attach_opaque_data)
17
+ # ... use proxy.insert(), proxy.scan(), etc.
18
+ proxy.commit(attach_opaque_data, tx_id)
19
+ client.close()
20
+
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import logging
26
+ import os
27
+ import subprocess
28
+ import sys
29
+ import time
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ from vgi_rpc.rpc import RpcConnection, UnixTransport
34
+
35
+ from vgi.transactor.protocol import TransactorProtocol
36
+
37
+ logger = logging.getLogger("vgi.transactor.client")
38
+
39
+ _MAX_SPAWN_RETRIES = 50
40
+ _SPAWN_RETRY_DELAY = 0.1 # seconds
41
+ _DEFAULT_SOCKET_PATH = "/tmp/vgi-transactor.sock" # noqa: S108
42
+ _DEFAULT_DB_DIR = str(Path("~/.local/state/vgi/databases").expanduser())
43
+
44
+
45
+ class TransactorClient:
46
+ """Client that connects to (and optionally spawns) a db-transactor.
47
+
48
+ The transactor process is auto-spawned on first use if not already
49
+ running. A single transactor serves all databases.
50
+ """
51
+
52
+ def __init__(self) -> None:
53
+ """Initialize client."""
54
+ self._socket_path = os.environ.get("VGI_TRANSACTOR_SOCKET", _DEFAULT_SOCKET_PATH)
55
+ self._transport: UnixTransport | None = None
56
+ self._connection: RpcConnection[TransactorProtocol] | None = None
57
+ self._proxy: Any = None
58
+ self._process: subprocess.Popen | None = None # type: ignore[type-arg]
59
+
60
+ def get_proxy(self) -> Any:
61
+ """Get the typed RPC proxy, spawning the transactor if needed."""
62
+ if self._proxy is not None:
63
+ return self._proxy
64
+ self._ensure_server()
65
+ return self._proxy
66
+
67
+ def _ensure_server(self) -> None:
68
+ """Connect to existing transactor or spawn a new one."""
69
+ if self._try_connect():
70
+ return
71
+
72
+ self._spawn_server()
73
+
74
+ for _ in range(_MAX_SPAWN_RETRIES):
75
+ time.sleep(_SPAWN_RETRY_DELAY)
76
+ if self._try_connect():
77
+ return
78
+
79
+ raise RuntimeError(f"Failed to connect to transactor after spawning (socket: {self._socket_path})")
80
+
81
+ def _try_connect(self) -> bool:
82
+ """Try to connect to an existing transactor socket."""
83
+ import socket
84
+
85
+ if sys.platform == "win32": # pragma: no cover - AF_UNIX transactor is POSIX-only
86
+ return False
87
+
88
+ if not os.path.exists(self._socket_path):
89
+ return False
90
+
91
+ try:
92
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
93
+ sock.connect(self._socket_path)
94
+ self._transport = UnixTransport(sock)
95
+ self._connection = RpcConnection(TransactorProtocol, self._transport) # type: ignore[type-abstract, unused-ignore]
96
+ self._proxy = self._connection.__enter__()
97
+ logger.info("Connected to transactor: %s", self._socket_path)
98
+ return True
99
+ except (ConnectionRefusedError, FileNotFoundError, OSError):
100
+ return False
101
+
102
+ def _spawn_server(self) -> None:
103
+ """Spawn a new transactor subprocess."""
104
+ import sys
105
+
106
+ db_dir = os.environ.get("VGI_TRANSACTOR_DB_DIR", _DEFAULT_DB_DIR)
107
+ os.makedirs(db_dir, exist_ok=True)
108
+
109
+ cmd = [
110
+ sys.executable,
111
+ "-m",
112
+ "vgi.transactor.server",
113
+ "--db-dir",
114
+ db_dir,
115
+ "--socket",
116
+ self._socket_path,
117
+ ]
118
+ logger.info("Spawning transactor: %s", " ".join(cmd))
119
+ self._process = subprocess.Popen( # noqa: S603
120
+ cmd,
121
+ stdout=subprocess.DEVNULL,
122
+ stderr=subprocess.DEVNULL,
123
+ start_new_session=True,
124
+ )
125
+
126
+ def close(self) -> None:
127
+ """Close the connection."""
128
+ import contextlib
129
+
130
+ if self._connection is not None:
131
+ with contextlib.suppress(Exception):
132
+ self._connection.__exit__(None, None, None)
133
+ self._connection = None
134
+ if self._transport is not None:
135
+ with contextlib.suppress(Exception):
136
+ self._transport.close()
137
+ self._transport = None