vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
vgi/function_storage.py
ADDED
|
@@ -0,0 +1,1636 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Storage for VGI function state.
|
|
4
|
+
|
|
5
|
+
This module provides a storage protocol and implementation for sharing state
|
|
6
|
+
across worker processes in distributed VGI function execution.
|
|
7
|
+
|
|
8
|
+
Protocol:
|
|
9
|
+
FunctionStorage: Unified protocol for all VGI state storage needs.
|
|
10
|
+
|
|
11
|
+
Implementations:
|
|
12
|
+
FunctionStorageSqlite: SQLite-backed storage (local/subprocess transport).
|
|
13
|
+
FunctionStorageAzureSql: Azure SQL Database-backed storage (cloud deployments).
|
|
14
|
+
See ``vgi.function_storage_azure_sql`` for details.
|
|
15
|
+
FunctionStorageCfDo: Cloudflare Durable Object-backed storage (edge deployments).
|
|
16
|
+
See ``vgi.function_storage_cf_do`` for details.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import enum
|
|
21
|
+
import functools
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
import sqlite3
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
from collections.abc import Callable, Iterable
|
|
28
|
+
from typing import Any, Protocol, TypeVar
|
|
29
|
+
|
|
30
|
+
import pyarrow as pa
|
|
31
|
+
|
|
32
|
+
from vgi._storage_profile import _PROFILE_ON, _profiler, io_call_bytes
|
|
33
|
+
|
|
34
|
+
# When the parent vgi.* logger is configured at DEBUG, this emits one line
|
|
35
|
+
# per BoundStorage construction with the resolved shard_key — handy for
|
|
36
|
+
# cross-referencing storage-routing bugs with MetaWorker dispatch logs.
|
|
37
|
+
_shard_logger = logging.getLogger("vgi.storage.shard")
|
|
38
|
+
|
|
39
|
+
_F = TypeVar("_F", bound=Callable[..., Any])
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _profiled(op: str) -> Callable[[_F], _F]:
|
|
43
|
+
"""Record a ``BoundStorage`` op to the shared per-shard profiler.
|
|
44
|
+
|
|
45
|
+
No-op (returns the method unchanged, zero overhead) unless
|
|
46
|
+
``VGI_STORAGE_PROFILE=1``. Backends that already self-profile at their
|
|
47
|
+
transport layer (cloudflare-do, ``_profiles_at_transport=True``) are
|
|
48
|
+
skipped so the two layers never double-count. Records
|
|
49
|
+
``(shard_key, op, elapsed, resp_bytes)`` — keyed per shard, i.e. per test.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def deco(fn: _F) -> _F:
|
|
53
|
+
if not _PROFILE_ON:
|
|
54
|
+
return fn
|
|
55
|
+
|
|
56
|
+
@functools.wraps(fn)
|
|
57
|
+
def wrapper(self: "BoundStorage", *args: Any, **kwargs: Any) -> Any:
|
|
58
|
+
if getattr(self._base, "_profiles_at_transport", False):
|
|
59
|
+
return fn(self, *args, **kwargs)
|
|
60
|
+
t0 = time.monotonic()
|
|
61
|
+
result = fn(self, *args, **kwargs)
|
|
62
|
+
_profiler.record(self._shard_key, op, time.monotonic() - t0, io_call_bytes(args, kwargs, result))
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
return wrapper # type: ignore[return-value]
|
|
66
|
+
|
|
67
|
+
return deco
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
__all__ = [
|
|
71
|
+
"FrameworkNS",
|
|
72
|
+
"FunctionStorage",
|
|
73
|
+
"FunctionStorageSqlite",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
_RESERVED_NS_PREFIX = b"_vgi/"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class FrameworkNS(bytes, enum.Enum):
|
|
81
|
+
"""Framework-reserved storage namespaces.
|
|
82
|
+
|
|
83
|
+
All members start with ``b"_vgi/"``; user code may NOT pass a bytes
|
|
84
|
+
namespace with that prefix to ``BoundStorage.state_*`` — the reserved
|
|
85
|
+
prefix is checked at every entry point. Framework code threads a
|
|
86
|
+
member of this enum instead; the wrappers accept either form and
|
|
87
|
+
normalise to plain bytes downstream.
|
|
88
|
+
|
|
89
|
+
Adding a new entry: keep it ASCII-only, snake_case, prefixed
|
|
90
|
+
``_vgi/``. Don't rename existing entries — names are persisted in
|
|
91
|
+
sqlite / Azure SQL / CfDo rows on disk and an unbounded backfill
|
|
92
|
+
would be required.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
BUFFERING_INIT = b"_vgi/buffering_init"
|
|
96
|
+
STREAMING_FINALIZE = b"_vgi/streaming_finalize"
|
|
97
|
+
TIO_STATE = b"_vgi/tio_state"
|
|
98
|
+
AGGREGATE_STATE = b"_vgi/aggregate_state"
|
|
99
|
+
AGGREGATE_WINDOW_PARTITION = b"_vgi/aggregate_window_partition"
|
|
100
|
+
STREAMING_SESSION = b"_vgi/streaming_session"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _coerce_ns(ns: "bytes | FrameworkNS") -> bytes:
|
|
104
|
+
"""Validate the namespace and return plain bytes.
|
|
105
|
+
|
|
106
|
+
``FrameworkNS`` members carry the reserved prefix legitimately and
|
|
107
|
+
pass through. Caller-supplied bytes starting with ``_vgi/`` raise
|
|
108
|
+
``ValueError`` — that prefix is reserved for framework-owned state.
|
|
109
|
+
"""
|
|
110
|
+
if isinstance(ns, FrameworkNS):
|
|
111
|
+
return bytes(ns.value)
|
|
112
|
+
if not isinstance(ns, (bytes, bytearray)):
|
|
113
|
+
raise TypeError(f"namespace must be bytes or FrameworkNS, got {type(ns).__name__}")
|
|
114
|
+
ns_bytes = bytes(ns)
|
|
115
|
+
if ns_bytes.startswith(_RESERVED_NS_PREFIX):
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"namespace {ns_bytes!r} starts with the reserved prefix "
|
|
118
|
+
f"{_RESERVED_NS_PREFIX!r} — use a vgi.function_storage.FrameworkNS "
|
|
119
|
+
"member or choose a different prefix"
|
|
120
|
+
)
|
|
121
|
+
return ns_bytes
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Width of the framework UUID at the head of every unwrapped attach plaintext
|
|
125
|
+
# (``uuid(16) || catalog_bytes``). Mirrors ``worker._ATTACH_UUID_LEN``.
|
|
126
|
+
_ATTACH_UUID_LEN = 16
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def attach_catalog_bytes(attach_plaintext: bytes | None) -> bytes | None:
|
|
130
|
+
"""Strip the framework shard-UUID prefix from a full attach plaintext.
|
|
131
|
+
|
|
132
|
+
The framework unwraps an attach to ``uuid(16) || catalog_bytes``; function
|
|
133
|
+
bodies see only ``catalog_bytes`` (what the catalog returned). Returns None
|
|
134
|
+
when there is no attach.
|
|
135
|
+
"""
|
|
136
|
+
return attach_plaintext[_ATTACH_UUID_LEN:] if attach_plaintext else None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _derive_shard_key(*, attach_uuid: bytes | None, _origin: str = "?") -> str:
|
|
140
|
+
"""Return the routing key for the ``FunctionStorageCfDo`` Durable Object.
|
|
141
|
+
|
|
142
|
+
Server-derived inside the trusted worker process. The CF DO routes by
|
|
143
|
+
this key (``idFromName(shard_key)``), so one DO instance hosts every
|
|
144
|
+
storage op carrying the same shard_key.
|
|
145
|
+
|
|
146
|
+
Single rule: ``"att-" + attach_uuid.hex()`` where ``attach_uuid`` is the
|
|
147
|
+
framework-minted 16-byte UUID at the head of the **unwrapped** attach
|
|
148
|
+
(``catalog_attach`` prepends it; see ``_AttachUnwrapper`` in worker.py).
|
|
149
|
+
One DO per logical ATTACH. We shard on the UUID — not the sealed bytes —
|
|
150
|
+
because the seal uses a random nonce (re-sealing the same attach would
|
|
151
|
+
otherwise scatter its state across DOs) and the catalog-vended plaintext
|
|
152
|
+
isn't guaranteed unique (distinct attaches would otherwise collide). The
|
|
153
|
+
UUID is stable across re-seals and globally unique; 36-char key, ≤128.
|
|
154
|
+
|
|
155
|
+
``attach_uuid`` must be exactly 16 bytes: the storage path is always bound
|
|
156
|
+
to a logical ATTACH, so a missing/short value is a programming error and
|
|
157
|
+
raises rather than collapsing traffic onto a single fallback DO.
|
|
158
|
+
|
|
159
|
+
No-op for non-CfDo backends — they ignore the value.
|
|
160
|
+
|
|
161
|
+
``_origin`` labels the call site (e.g. ``"BoundStorage(InitRequest)"``).
|
|
162
|
+
Emitted as a ``vgi.storage.shard`` debug log for cross-referencing
|
|
163
|
+
storage-routing bugs with MetaWorker dispatch logs.
|
|
164
|
+
"""
|
|
165
|
+
if not attach_uuid or len(attach_uuid) != 16:
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"cannot derive shard_key without a 16-byte attach uuid (origin={_origin}, "
|
|
168
|
+
f"got {len(attach_uuid) if attach_uuid else 0} bytes); the storage path "
|
|
169
|
+
"must be bound to a logical ATTACH"
|
|
170
|
+
)
|
|
171
|
+
key = "att-" + attach_uuid.hex()
|
|
172
|
+
if _shard_logger.isEnabledFor(logging.DEBUG):
|
|
173
|
+
_shard_logger.debug("shard derived origin=%s uuid=%s key=%s", _origin, attach_uuid.hex(), key)
|
|
174
|
+
return key
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _resolve_shard_key(backend: Any, attach_plaintext: bytes | None, _origin: str) -> str:
|
|
178
|
+
"""Compute the shard_key for a ``BoundStorage`` over ``backend``.
|
|
179
|
+
|
|
180
|
+
``attach_plaintext`` is the framework-unwrapped attach, laid out as
|
|
181
|
+
``uuid(16) || catalog_bytes`` (the worker unwraps and threads it in; see
|
|
182
|
+
``_AttachUnwrapper``-free flow in worker.py), or None when there is no
|
|
183
|
+
ATTACH. We shard on the leading UUID for any backend (so the debug
|
|
184
|
+
``ShardedSqliteStorage`` partitions too). When there is no attach, only a
|
|
185
|
+
remote-sharding backend (``requires_shard_key``, i.e. CfDo) treats it as a
|
|
186
|
+
hard error; everything else gets an empty key — local / subprocess
|
|
187
|
+
executions are routinely not bound to an ATTACH and ignore the value anyway.
|
|
188
|
+
"""
|
|
189
|
+
uuid = (
|
|
190
|
+
attach_plaintext[:_ATTACH_UUID_LEN] if attach_plaintext and len(attach_plaintext) >= _ATTACH_UUID_LEN else None
|
|
191
|
+
)
|
|
192
|
+
if uuid:
|
|
193
|
+
return _derive_shard_key(attach_uuid=uuid, _origin=_origin)
|
|
194
|
+
if getattr(backend, "requires_shard_key", False):
|
|
195
|
+
# Remote-sharding backend with no attach: refuse rather than collapse
|
|
196
|
+
# onto a single hot DO.
|
|
197
|
+
return _derive_shard_key(attach_uuid=uuid, _origin=_origin)
|
|
198
|
+
return ""
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _scan_worker_stream_id() -> bytes:
|
|
202
|
+
"""Return raw stream-id bytes for the current scan worker.
|
|
203
|
+
|
|
204
|
+
HTTP transport: pulls the per-stream UUID from
|
|
205
|
+
``vgi_rpc.rpc._common._current_stream_id`` and returns its raw 16-byte
|
|
206
|
+
form. The framework sets this once per ``_serve_stream`` call and
|
|
207
|
+
preserves it across HTTP turns via the state token, so every tick of
|
|
208
|
+
one scan worker yields the same bytes regardless of which machine
|
|
209
|
+
or thread serves it.
|
|
210
|
+
|
|
211
|
+
Stdio transport / any non-stream path: returns
|
|
212
|
+
``struct.pack("<Q", os.getpid())`` so we still have a stable
|
|
213
|
+
per-pid identifier and the storage row doesn't collide. Distinct
|
|
214
|
+
pids → distinct keys; same pid across queries → overwrite (same
|
|
215
|
+
semantics as the old per-pid ``BoundStorage.put``).
|
|
216
|
+
"""
|
|
217
|
+
import struct
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
from vgi_rpc.rpc._common import _current_stream_id
|
|
221
|
+
except ImportError:
|
|
222
|
+
return struct.pack("<Q", os.getpid())
|
|
223
|
+
sid = _current_stream_id.get()
|
|
224
|
+
if not sid:
|
|
225
|
+
return struct.pack("<Q", os.getpid())
|
|
226
|
+
# Stream ids are hex-encoded 128-bit UUIDs. Decode to the canonical
|
|
227
|
+
# 16-byte form so the storage column doesn't carry the encoding tax.
|
|
228
|
+
try:
|
|
229
|
+
return bytes.fromhex(sid)
|
|
230
|
+
except ValueError:
|
|
231
|
+
# Defensively fall back to UTF-8 bytes — preserves uniqueness
|
|
232
|
+
# even if a future framework version uses a non-hex stream id.
|
|
233
|
+
return sid.encode("utf-8")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _get_default_db_path() -> str:
|
|
237
|
+
"""Return the default SQLite database path for VGI storage."""
|
|
238
|
+
from pathlib import Path
|
|
239
|
+
|
|
240
|
+
from platformdirs import user_state_dir
|
|
241
|
+
|
|
242
|
+
state_dir = Path(user_state_dir("vgi"))
|
|
243
|
+
state_dir.mkdir(parents=True, exist_ok=True)
|
|
244
|
+
return str((state_dir / "vgi_storage.db").resolve())
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class FunctionStorage(Protocol):
|
|
248
|
+
"""Storage protocol for VGI distributed function execution.
|
|
249
|
+
|
|
250
|
+
Two access patterns:
|
|
251
|
+
|
|
252
|
+
**Unified state_*** - Composite-key K/V over ``(scope_id, ns, key)``.
|
|
253
|
+
The catch-all family for per-execution state, per-transaction state,
|
|
254
|
+
per-group aggregate state, and any other "this caller picks the
|
|
255
|
+
namespace" pattern. Read-modify-write singletons via
|
|
256
|
+
``state_get_many`` / ``state_put_many``; non-destructive enumeration
|
|
257
|
+
via ``state_scan``; atomic scan-and-delete via ``state_drain``;
|
|
258
|
+
targeted or namespace-wide deletion via ``state_delete``;
|
|
259
|
+
cross-namespace teardown via ``execution_clear``.
|
|
260
|
+
|
|
261
|
+
**Work Queue** - Atomic FIFO work distribution. Producer pushes,
|
|
262
|
+
workers atomically claim. Distinct from state_* (destructive consume,
|
|
263
|
+
not key-addressable).
|
|
264
|
+
|
|
265
|
+
Idempotency: a concern of the remote (HTTP) tier only. The CfDo backend
|
|
266
|
+
generates an internal ``attempt_id`` per call so a retried ``state_put_many``
|
|
267
|
+
is a silent no-op and a retried ``state_drain`` returns the prior values.
|
|
268
|
+
The local SQLite tier is a single connection per process with no network
|
|
269
|
+
retries, so it carries no replay-detection (and no idempotency columns).
|
|
270
|
+
|
|
271
|
+
Eviction / lifecycle. Every scope-keyed table (``function_state``,
|
|
272
|
+
``function_state_log``, ``function_counter``) is reclaimed for a scope by
|
|
273
|
+
``execution_clear`` — called at operator teardown for execution-scoped state
|
|
274
|
+
and on commit/rollback for transaction-scoped state. Beyond that, each
|
|
275
|
+
backend differs: the **CfDo** DO self-evicts via an orphan-horizon alarm
|
|
276
|
+
(idle DO → ``deleteAll``); **Azure SQL** relies on ``cleanup_old_entries``,
|
|
277
|
+
an age-based sweep over the ``created_at`` column that must be scheduled
|
|
278
|
+
externally (so every age-managed table needs a ``created_at``); the local
|
|
279
|
+
**SQLite** tier is durable with no auto-eviction — long-lived, attach-scoped
|
|
280
|
+
data (e.g. an accumulate collection) is the consumer's responsibility to
|
|
281
|
+
bound (``ttl`` / ``max_row_size`` / explicit clear). The
|
|
282
|
+
``test_execution_clear_covers_all_scope_keyed_tables`` audit pins that every
|
|
283
|
+
scope-keyed table is wiped by ``execution_clear``. (Follow-up: some
|
|
284
|
+
``worker.py`` teardown paths call ``execution_clear`` without a try/except,
|
|
285
|
+
so a cleanup exception there can still leak — to be hardened separately.)
|
|
286
|
+
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
# Note: backends that route remotely on ``shard_key`` (CfDo) set a
|
|
290
|
+
# ``requires_shard_key = True`` class attribute; ``_resolve_shard_key`` reads
|
|
291
|
+
# it via ``getattr(.., False)``. It is intentionally NOT declared here as a
|
|
292
|
+
# Protocol member so the in-process backends (SQLite / Azure), which ignore
|
|
293
|
+
# shard_key, still structurally satisfy ``FunctionStorage`` without it.
|
|
294
|
+
|
|
295
|
+
# --- Work Queue (distributed work items) ---
|
|
296
|
+
|
|
297
|
+
def queue_push(self, execution_id: bytes, items: list[bytes], *, shard_key: str = "") -> int:
|
|
298
|
+
"""Append work items to the queue.
|
|
299
|
+
|
|
300
|
+
There is no registration step — the queue tracks only the items
|
|
301
|
+
themselves (matching the Durable Object).
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
execution_id: Unique identifier for the function invocation.
|
|
305
|
+
items: List of serialized work item bytes.
|
|
306
|
+
shard_key: Routing key for the CF DO backend; ignored by
|
|
307
|
+
SQLite / Azure backends. Set automatically by BoundStorage
|
|
308
|
+
from the caller's attach_opaque_data / auth context.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Number of items added.
|
|
312
|
+
|
|
313
|
+
"""
|
|
314
|
+
...
|
|
315
|
+
|
|
316
|
+
def queue_pop(self, execution_id: bytes, *, shard_key: str = "") -> bytes | None:
|
|
317
|
+
"""Atomically claim one work item from the queue.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
execution_id: Unique identifier for the function invocation.
|
|
321
|
+
shard_key: Routing key for the CF DO backend; ignored by
|
|
322
|
+
SQLite / Azure backends. Set automatically by BoundStorage
|
|
323
|
+
from the caller's attach_opaque_data / auth context.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Serialized work item bytes, or None if the queue is empty or the
|
|
327
|
+
execution_id was never pushed. There is no registration, so the
|
|
328
|
+
backend does not distinguish a never-pushed id from a drained
|
|
329
|
+
queue — both return None (matching the Durable Object).
|
|
330
|
+
|
|
331
|
+
"""
|
|
332
|
+
...
|
|
333
|
+
|
|
334
|
+
def queue_clear(self, execution_id: bytes, *, shard_key: str = "") -> int:
|
|
335
|
+
"""Clear all remaining work items for the execution.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
execution_id: Unique identifier for the function invocation.
|
|
339
|
+
shard_key: Routing key for the CF DO backend; ignored by
|
|
340
|
+
SQLite / Azure backends. Set automatically by BoundStorage
|
|
341
|
+
from the caller's attach_opaque_data / auth context.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Number of items deleted.
|
|
345
|
+
|
|
346
|
+
"""
|
|
347
|
+
...
|
|
348
|
+
|
|
349
|
+
# ========================================================================
|
|
350
|
+
# Unified state_* API (composite-key K/V over (scope_id, ns, key))
|
|
351
|
+
# ========================================================================
|
|
352
|
+
#
|
|
353
|
+
# This API replaces the four RMW families
|
|
354
|
+
# (``worker_*``, ``stream_state_*``, ``aggregate_state_*``,
|
|
355
|
+
# ``aggregate_window_partition_*``) plus ``transaction_state_*`` with a
|
|
356
|
+
# single composite-key shape:
|
|
357
|
+
#
|
|
358
|
+
# ``(scope_id, ns, key) -> value``
|
|
359
|
+
#
|
|
360
|
+
# ``scope_id`` carries the role of today's ``execution_id`` /
|
|
361
|
+
# ``transaction_opaque_data`` — caller decides whether they're scoping by
|
|
362
|
+
# invocation or by transaction. ``ns`` is a namespace selector chosen by
|
|
363
|
+
# the caller (e.g. ``b"agg"`` for aggregate state, ``b"buf"`` for buffered
|
|
364
|
+
# accumulators); the storage doesn't interpret it.
|
|
365
|
+
#
|
|
366
|
+
# Idempotency: the public API does not expose ``attempt_id``. It is an
|
|
367
|
+
# HTTP-tier concern — the CfDo client generates one per call and splices the
|
|
368
|
+
# same id into retries within its ``_post()`` loop so the Durable Object can
|
|
369
|
+
# detect a replay across the network. The local SQLite tier has no network
|
|
370
|
+
# retries and carries no replay-detection (nor the idempotency columns).
|
|
371
|
+
|
|
372
|
+
def state_get_many(
|
|
373
|
+
self,
|
|
374
|
+
scope_id: bytes,
|
|
375
|
+
ns: bytes,
|
|
376
|
+
keys: list[bytes],
|
|
377
|
+
*,
|
|
378
|
+
shard_key: str = "",
|
|
379
|
+
) -> list[bytes | None]:
|
|
380
|
+
"""Batched non-destructive read of values keyed by ``(scope_id, ns, key)``.
|
|
381
|
+
|
|
382
|
+
Returns a list parallel to ``keys`` with the stored ``bytes`` for
|
|
383
|
+
hits and ``None`` for misses. Single-call so cloud backends (CfDo)
|
|
384
|
+
can serve a 100-key request as one HTTP roundtrip.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
scope_id: Caller's scope identifier (typically ``execution_id`` for
|
|
388
|
+
per-query state, ``transaction_opaque_data`` for txn-scoped state).
|
|
389
|
+
ns: Caller-chosen namespace bytes; the storage doesn't interpret.
|
|
390
|
+
keys: List of binary keys to look up.
|
|
391
|
+
shard_key: CF DO routing key; ignored by SQLite/Azure backends.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
List parallel to ``keys`` of stored values or ``None``.
|
|
395
|
+
|
|
396
|
+
"""
|
|
397
|
+
...
|
|
398
|
+
|
|
399
|
+
def state_put_many(
|
|
400
|
+
self,
|
|
401
|
+
scope_id: bytes,
|
|
402
|
+
ns: bytes,
|
|
403
|
+
items: list[tuple[bytes, bytes]],
|
|
404
|
+
*,
|
|
405
|
+
shard_key: str = "",
|
|
406
|
+
) -> None:
|
|
407
|
+
"""Batched atomic upsert of ``(key, value)`` pairs in one namespace.
|
|
408
|
+
|
|
409
|
+
Atomic per backend's single-statement isolation: either every item
|
|
410
|
+
in the batch is written, or none are. Existing values for the same
|
|
411
|
+
``(scope_id, ns, key)`` are overwritten.
|
|
412
|
+
|
|
413
|
+
Remote backends (CfDo) carry an internal ``attempt_id`` so an HTTP
|
|
414
|
+
retry is detected as a replay and silently no-ops. Local backends
|
|
415
|
+
(SQLite) are a single connection per process with no network retries,
|
|
416
|
+
so they need no replay-detection.
|
|
417
|
+
"""
|
|
418
|
+
...
|
|
419
|
+
|
|
420
|
+
def state_scan(
|
|
421
|
+
self,
|
|
422
|
+
scope_id: bytes,
|
|
423
|
+
ns: bytes,
|
|
424
|
+
*,
|
|
425
|
+
start: bytes | None = None,
|
|
426
|
+
end: bytes | None = None,
|
|
427
|
+
reverse: bool = False,
|
|
428
|
+
limit: int | None = None,
|
|
429
|
+
shard_key: str = "",
|
|
430
|
+
) -> Iterable[tuple[bytes, bytes]]:
|
|
431
|
+
"""Non-destructive scan of ``(key, value)`` in one namespace.
|
|
432
|
+
|
|
433
|
+
Returns an iterable of ``(key, value)`` ordered by key bytes (unsigned
|
|
434
|
+
lexicographic / memcmp). ``reverse=True`` orders descending. The scan is
|
|
435
|
+
bounded to the half-open key range ``[start, end)`` (either bound
|
|
436
|
+
``None`` is open) and capped at ``limit`` rows (``None`` = unbounded).
|
|
437
|
+
Large result sets may be streamed in pages by the backend (the
|
|
438
|
+
``cloudflare-do`` backend pages under the hood), so callers should
|
|
439
|
+
iterate rather than assume a materialized list. Use when you need to
|
|
440
|
+
enumerate an unknown key set (e.g. drainer-side discovery of which sink
|
|
441
|
+
threads produced state).
|
|
442
|
+
"""
|
|
443
|
+
...
|
|
444
|
+
|
|
445
|
+
def state_drain(
|
|
446
|
+
self,
|
|
447
|
+
scope_id: bytes,
|
|
448
|
+
ns: bytes,
|
|
449
|
+
*,
|
|
450
|
+
shard_key: str = "",
|
|
451
|
+
) -> Iterable[tuple[bytes, bytes]]:
|
|
452
|
+
"""Atomically scan-and-delete every ``(key, value)`` in one namespace.
|
|
453
|
+
|
|
454
|
+
Returns an iterable of ``(key, value)`` ordered by key. Remote backends
|
|
455
|
+
(CfDo) tombstone the rows for HTTP replay-detection (a retried drain
|
|
456
|
+
returns the same values without re-deleting) and stream the result in
|
|
457
|
+
pages; local backends delete outright. The drain is atomic — beginning
|
|
458
|
+
to iterate claims the whole namespace, so always consume it fully.
|
|
459
|
+
"""
|
|
460
|
+
...
|
|
461
|
+
|
|
462
|
+
def state_delete(
|
|
463
|
+
self,
|
|
464
|
+
scope_id: bytes,
|
|
465
|
+
ns: bytes,
|
|
466
|
+
keys: list[bytes] | None = None,
|
|
467
|
+
*,
|
|
468
|
+
start: bytes | None = None,
|
|
469
|
+
end: bytes | None = None,
|
|
470
|
+
shard_key: str = "",
|
|
471
|
+
) -> int:
|
|
472
|
+
"""Delete by key list, by key range, or wipe the entire namespace.
|
|
473
|
+
|
|
474
|
+
``keys=[...]`` deletes those keys. ``keys is None`` with a ``start``
|
|
475
|
+
and/or ``end`` deletes the half-open key range ``[start, end)`` (either
|
|
476
|
+
bound ``None`` is open). ``keys is None`` with no range wipes the whole
|
|
477
|
+
namespace. ``keys`` and the range are mutually exclusive.
|
|
478
|
+
|
|
479
|
+
Naturally idempotent — deleting an already-deleted key/range is a no-op.
|
|
480
|
+
Returns the count of rows actually removed. Replaces today's
|
|
481
|
+
per-family ``*_clear`` methods.
|
|
482
|
+
"""
|
|
483
|
+
...
|
|
484
|
+
|
|
485
|
+
def execution_clear(
|
|
486
|
+
self,
|
|
487
|
+
scope_id: bytes,
|
|
488
|
+
*,
|
|
489
|
+
shard_key: str = "",
|
|
490
|
+
) -> int:
|
|
491
|
+
"""Wipe ALL state, log, and counter rows for ``scope_id`` across every namespace.
|
|
492
|
+
|
|
493
|
+
Used as a safety-sweep at end-of-execution / on crash recovery.
|
|
494
|
+
Naturally idempotent. Returns total row count deleted across the
|
|
495
|
+
``function_state``, ``function_state_log``, and ``function_counter`` tables.
|
|
496
|
+
|
|
497
|
+
Does NOT touch ``queue_*`` rows.
|
|
498
|
+
"""
|
|
499
|
+
...
|
|
500
|
+
|
|
501
|
+
# --- Append-only log ---
|
|
502
|
+
|
|
503
|
+
def state_append(
|
|
504
|
+
self,
|
|
505
|
+
scope_id: bytes,
|
|
506
|
+
ns: bytes,
|
|
507
|
+
key: bytes,
|
|
508
|
+
item: bytes,
|
|
509
|
+
*,
|
|
510
|
+
shard_key: str = "",
|
|
511
|
+
) -> int:
|
|
512
|
+
"""Append ``item`` to the log keyed by (scope_id, ns, key); return ordinal.
|
|
513
|
+
|
|
514
|
+
Ordinals are globally monotonic across all (scope, ns, key) triples
|
|
515
|
+
on a given backend (one IDENTITY/AUTOINCREMENT column for the table).
|
|
516
|
+
Per-key order is recovered via the ``(scope_id, ns, key, id)`` index;
|
|
517
|
+
``state_log_scan`` yields rows in id order, which corresponds to
|
|
518
|
+
append order. Concurrent appenders to the *same* key get distinct
|
|
519
|
+
ordinals but interleaving across writers is undefined.
|
|
520
|
+
|
|
521
|
+
**Idempotency scope.** Remote backends carry an internal
|
|
522
|
+
``attempt_id`` covering *transport-layer* retries within a single
|
|
523
|
+
backend call (an HTTP retry on CfDo replays correctly); local SQLite
|
|
524
|
+
has no retry layer. **Caller-level retries** (re-invoking
|
|
525
|
+
``state_append`` for the same logical record after the call already
|
|
526
|
+
returned) always produce duplicate rows. If you need caller-level
|
|
527
|
+
idempotency, dedupe on the caller side — e.g., check
|
|
528
|
+
``state_log_scan`` before appending, or key your namespace on a
|
|
529
|
+
stable content hash.
|
|
530
|
+
"""
|
|
531
|
+
...
|
|
532
|
+
|
|
533
|
+
def state_log_scan(
|
|
534
|
+
self,
|
|
535
|
+
scope_id: bytes,
|
|
536
|
+
ns: bytes,
|
|
537
|
+
key: bytes,
|
|
538
|
+
*,
|
|
539
|
+
after_id: int = -1,
|
|
540
|
+
limit: int | None = None,
|
|
541
|
+
shard_key: str = "",
|
|
542
|
+
) -> list[tuple[int, bytes]]:
|
|
543
|
+
"""Yield (id, value) pairs for (scope_id, ns, key) with id > after_id.
|
|
544
|
+
|
|
545
|
+
Returns rows in ascending ``id`` order. ``after_id=-1`` is the
|
|
546
|
+
before-first sentinel (returns from the start). ``limit=None`` is
|
|
547
|
+
unbounded; positive values cap the result at that many rows.
|
|
548
|
+
Use the returned ``id`` of the last row as the next ``after_id``
|
|
549
|
+
for cursor-based scrolling.
|
|
550
|
+
|
|
551
|
+
Non-destructive. Repeat calls with the same parameters return
|
|
552
|
+
identical results until ``execution_clear`` wipes the log rows.
|
|
553
|
+
"""
|
|
554
|
+
...
|
|
555
|
+
|
|
556
|
+
# --- Atomic int64 counters (separate ``function_counter`` table) ---
|
|
557
|
+
#
|
|
558
|
+
# A typed numeric facet kept apart from the opaque ``function_state`` K/V so
|
|
559
|
+
# the value column never has to carry numeric semantics. Keyed by the same
|
|
560
|
+
# ``(scope_id, ns, key)`` shape. ``state_counter_add`` is an atomic
|
|
561
|
+
# read-add-return in one statement (no caller-side CAS loop); the others are
|
|
562
|
+
# plain upsert / select / delete.
|
|
563
|
+
|
|
564
|
+
def state_counter_get(
|
|
565
|
+
self,
|
|
566
|
+
scope_id: bytes,
|
|
567
|
+
ns: bytes,
|
|
568
|
+
key: bytes,
|
|
569
|
+
*,
|
|
570
|
+
shard_key: str = "",
|
|
571
|
+
) -> int:
|
|
572
|
+
"""Return the int64 counter at ``(scope_id, ns, key)``; 0 if absent."""
|
|
573
|
+
...
|
|
574
|
+
|
|
575
|
+
def state_counter_add(
|
|
576
|
+
self,
|
|
577
|
+
scope_id: bytes,
|
|
578
|
+
ns: bytes,
|
|
579
|
+
key: bytes,
|
|
580
|
+
delta: int,
|
|
581
|
+
*,
|
|
582
|
+
shard_key: str = "",
|
|
583
|
+
) -> int:
|
|
584
|
+
"""Atomically add ``delta`` and return the new value (init 0 if absent).
|
|
585
|
+
|
|
586
|
+
Single-statement upsert — no read-modify-write race, no caller loop.
|
|
587
|
+
Not idempotent: a retried add double-applies. Remote/cloud backends
|
|
588
|
+
carry an internal ``attempt_id`` (as ``state_put_many`` does) so a
|
|
589
|
+
transport retry replays the prior result instead of re-adding; the
|
|
590
|
+
local SQLite tier has no retry layer.
|
|
591
|
+
"""
|
|
592
|
+
...
|
|
593
|
+
|
|
594
|
+
def state_counter_set(
|
|
595
|
+
self,
|
|
596
|
+
scope_id: bytes,
|
|
597
|
+
ns: bytes,
|
|
598
|
+
key: bytes,
|
|
599
|
+
value: int,
|
|
600
|
+
*,
|
|
601
|
+
shard_key: str = "",
|
|
602
|
+
) -> None:
|
|
603
|
+
"""Overwrite the counter at ``(scope_id, ns, key)`` with ``value``."""
|
|
604
|
+
...
|
|
605
|
+
|
|
606
|
+
def state_counter_delete(
|
|
607
|
+
self,
|
|
608
|
+
scope_id: bytes,
|
|
609
|
+
ns: bytes,
|
|
610
|
+
key: bytes,
|
|
611
|
+
*,
|
|
612
|
+
shard_key: str = "",
|
|
613
|
+
) -> None:
|
|
614
|
+
"""Delete the counter at ``(scope_id, ns, key)`` (no-op if absent)."""
|
|
615
|
+
...
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
class TransactionBoundStorage:
|
|
619
|
+
"""Convenience wrapper bound to a single transaction_opaque_data.
|
|
620
|
+
|
|
621
|
+
Lets a function read/write transaction-scoped state without
|
|
622
|
+
threading the transaction_opaque_data through every call site. Get one via
|
|
623
|
+
``BoundStorage.transaction(transaction_opaque_data)``.
|
|
624
|
+
"""
|
|
625
|
+
|
|
626
|
+
def __init__(
|
|
627
|
+
self,
|
|
628
|
+
storage: "FunctionStorage",
|
|
629
|
+
transaction_opaque_data: bytes,
|
|
630
|
+
*,
|
|
631
|
+
request: Any = None,
|
|
632
|
+
attach_plaintext: bytes | None = None,
|
|
633
|
+
shard_key: str | None = None,
|
|
634
|
+
) -> None:
|
|
635
|
+
self._base = storage
|
|
636
|
+
self._transaction_opaque_data = transaction_opaque_data
|
|
637
|
+
# ``attach_plaintext`` is the framework-unwrapped attach
|
|
638
|
+
# (``uuid(16) || catalog_bytes``); we shard on its leading UUID. Callers
|
|
639
|
+
# may instead pass an already-resolved ``shard_key=`` (e.g. inherited
|
|
640
|
+
# from a parent BoundStorage). ``request=`` only labels the origin.
|
|
641
|
+
if shard_key is None:
|
|
642
|
+
origin = (
|
|
643
|
+
f"TransactionBoundStorage({type(request).__name__})"
|
|
644
|
+
if request is not None
|
|
645
|
+
else "TransactionBoundStorage"
|
|
646
|
+
)
|
|
647
|
+
shard_key = _resolve_shard_key(storage, attach_plaintext, origin)
|
|
648
|
+
self._shard_key = shard_key
|
|
649
|
+
|
|
650
|
+
# Backed by the unified state_* API: scope_id = transaction_opaque_data,
|
|
651
|
+
# ns = b"txn". Caller-supplied keys are user-chosen bytes (typically
|
|
652
|
+
# short ASCII like b"watermark:topic-A"); the storage doesn't interpret
|
|
653
|
+
# them. This class is preserved as a convenience wrapper so callers
|
|
654
|
+
# don't have to thread the (transaction_opaque_data, b"txn") pair on
|
|
655
|
+
# every call — the surface stays clean.
|
|
656
|
+
|
|
657
|
+
_NS = b"txn"
|
|
658
|
+
|
|
659
|
+
def get(self, keys: list[bytes]) -> list[bytes | None]:
|
|
660
|
+
"""Load values for a list of keys; parallel return list."""
|
|
661
|
+
return self._base.state_get_many(
|
|
662
|
+
self._transaction_opaque_data,
|
|
663
|
+
self._NS,
|
|
664
|
+
keys,
|
|
665
|
+
shard_key=self._shard_key,
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
def get_one(self, key: bytes) -> bytes | None:
|
|
669
|
+
"""Load a single value, or None if missing."""
|
|
670
|
+
return self.get([key])[0]
|
|
671
|
+
|
|
672
|
+
def put(self, items: list[tuple[bytes, bytes]]) -> None:
|
|
673
|
+
"""Write a batch of (key, value) pairs."""
|
|
674
|
+
self._base.state_put_many(
|
|
675
|
+
self._transaction_opaque_data,
|
|
676
|
+
self._NS,
|
|
677
|
+
items,
|
|
678
|
+
shard_key=self._shard_key,
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
def put_one(self, key: bytes, value: bytes) -> None:
|
|
682
|
+
"""Write a single (key, value) pair."""
|
|
683
|
+
self.put([(key, value)])
|
|
684
|
+
|
|
685
|
+
def clear(self) -> None:
|
|
686
|
+
"""Drop every value for this transaction (every namespace)."""
|
|
687
|
+
# execution_clear sweeps all namespaces — same effect as the old
|
|
688
|
+
# transaction_state_clear since the only namespace under txn scope
|
|
689
|
+
# is b"txn".
|
|
690
|
+
self._base.execution_clear(
|
|
691
|
+
self._transaction_opaque_data,
|
|
692
|
+
shard_key=self._shard_key,
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
class BoundStorage:
|
|
697
|
+
def __init__(
|
|
698
|
+
self,
|
|
699
|
+
storage: FunctionStorage,
|
|
700
|
+
execution_id: bytes,
|
|
701
|
+
*,
|
|
702
|
+
request: Any = None,
|
|
703
|
+
attach_plaintext: bytes | None = None,
|
|
704
|
+
):
|
|
705
|
+
self._base = storage
|
|
706
|
+
self._execution_id = execution_id
|
|
707
|
+
# ``attach_plaintext`` is the framework-unwrapped attach
|
|
708
|
+
# (``uuid(16) || catalog_bytes``); we shard on its leading UUID. The
|
|
709
|
+
# worker unwraps once and threads it in. ``request=`` only labels the
|
|
710
|
+
# derivation origin for debug logs.
|
|
711
|
+
origin = f"BoundStorage({type(request).__name__})" if request is not None else "BoundStorage"
|
|
712
|
+
self._shard_key = _resolve_shard_key(storage, attach_plaintext, origin)
|
|
713
|
+
|
|
714
|
+
def transaction(self, transaction_opaque_data: bytes) -> TransactionBoundStorage:
|
|
715
|
+
"""Return a transaction-scoped storage view.
|
|
716
|
+
|
|
717
|
+
Used for state that the user expects to be stable across
|
|
718
|
+
multiple statements in one SQL transaction (e.g. Kafka topic
|
|
719
|
+
watermarks, for snapshot-isolation reads).
|
|
720
|
+
"""
|
|
721
|
+
# Inherit our resolved shard_key directly — both views are part of the
|
|
722
|
+
# same logical attach and shard identically.
|
|
723
|
+
return TransactionBoundStorage(
|
|
724
|
+
self._base,
|
|
725
|
+
transaction_opaque_data,
|
|
726
|
+
shard_key=self._shard_key,
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
@_profiled("queue_push")
|
|
730
|
+
def queue_push(self, items: list[bytes]) -> int:
|
|
731
|
+
"""Add work items to the queue and register the invocation."""
|
|
732
|
+
return self._base.queue_push(
|
|
733
|
+
self._execution_id,
|
|
734
|
+
items,
|
|
735
|
+
shard_key=self._shard_key,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
def queue_push_batches(self, batches: list[pa.RecordBatch]) -> int:
|
|
739
|
+
"""Serialize and push RecordBatches as work items."""
|
|
740
|
+
return self.queue_push([self.serialize_record_batch(b) for b in batches])
|
|
741
|
+
|
|
742
|
+
@_profiled("queue_pop")
|
|
743
|
+
def queue_pop(self) -> bytes | None:
|
|
744
|
+
"""Atomically claim one work item from the queue."""
|
|
745
|
+
return self._base.queue_pop(
|
|
746
|
+
self._execution_id,
|
|
747
|
+
shard_key=self._shard_key,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
def queue_pop_batch(self) -> pa.RecordBatch | None:
|
|
751
|
+
"""Pop and deserialize one work item as a RecordBatch."""
|
|
752
|
+
data = self.queue_pop()
|
|
753
|
+
if data is None:
|
|
754
|
+
return None
|
|
755
|
+
return self.deserialize_record_batch(data)
|
|
756
|
+
|
|
757
|
+
@_profiled("queue_clear")
|
|
758
|
+
def queue_clear(self) -> int:
|
|
759
|
+
"""Clear all remaining work items and unregister the invocation."""
|
|
760
|
+
return self._base.queue_clear(
|
|
761
|
+
self._execution_id,
|
|
762
|
+
shard_key=self._shard_key,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
# ========================================================================
|
|
766
|
+
# Unified state_* facade — composite-key K/V over (ns, key)
|
|
767
|
+
# ========================================================================
|
|
768
|
+
#
|
|
769
|
+
# See FunctionStorage.state_* docstrings for the full semantic. These
|
|
770
|
+
# facade wrappers bind ``scope_id = execution_id`` (the common case);
|
|
771
|
+
# for transaction-scoped state, use BoundStorage.transaction() to get
|
|
772
|
+
# a separate facade bound to ``transaction_opaque_data``.
|
|
773
|
+
|
|
774
|
+
@_profiled("state_get")
|
|
775
|
+
def state_get(self, ns: "bytes | FrameworkNS", key: bytes) -> bytes | None:
|
|
776
|
+
"""Read one key's value (or None)."""
|
|
777
|
+
result = self._base.state_get_many(self._execution_id, _coerce_ns(ns), [key], shard_key=self._shard_key)
|
|
778
|
+
return result[0]
|
|
779
|
+
|
|
780
|
+
@_profiled("state_get_many")
|
|
781
|
+
def state_get_many(self, ns: "bytes | FrameworkNS", keys: list[bytes]) -> list[bytes | None]:
|
|
782
|
+
"""Batched non-destructive read."""
|
|
783
|
+
return self._base.state_get_many(self._execution_id, _coerce_ns(ns), keys, shard_key=self._shard_key)
|
|
784
|
+
|
|
785
|
+
@_profiled("state_put")
|
|
786
|
+
def state_put(self, ns: "bytes | FrameworkNS", key: bytes, value: bytes) -> None:
|
|
787
|
+
"""Upsert one (key, value)."""
|
|
788
|
+
self._base.state_put_many(self._execution_id, _coerce_ns(ns), [(key, value)], shard_key=self._shard_key)
|
|
789
|
+
|
|
790
|
+
@_profiled("state_put_many")
|
|
791
|
+
def state_put_many(self, ns: "bytes | FrameworkNS", items: list[tuple[bytes, bytes]]) -> None:
|
|
792
|
+
"""Batched atomic upsert."""
|
|
793
|
+
self._base.state_put_many(self._execution_id, _coerce_ns(ns), items, shard_key=self._shard_key)
|
|
794
|
+
|
|
795
|
+
@_profiled("state_scan")
|
|
796
|
+
def state_scan(
|
|
797
|
+
self,
|
|
798
|
+
ns: "bytes | FrameworkNS",
|
|
799
|
+
*,
|
|
800
|
+
start: bytes | None = None,
|
|
801
|
+
end: bytes | None = None,
|
|
802
|
+
reverse: bool = False,
|
|
803
|
+
limit: int | None = None,
|
|
804
|
+
) -> Iterable[tuple[bytes, bytes]]:
|
|
805
|
+
"""Non-destructive scan of (key, value) in one namespace.
|
|
806
|
+
|
|
807
|
+
Ordered by key bytes (``reverse=True`` for descending), bounded to the
|
|
808
|
+
half-open range ``[start, end)`` and capped at ``limit``. Returns an
|
|
809
|
+
iterable (the cloudflare-do backend streams it in pages).
|
|
810
|
+
"""
|
|
811
|
+
return self._base.state_scan(
|
|
812
|
+
self._execution_id,
|
|
813
|
+
_coerce_ns(ns),
|
|
814
|
+
start=start,
|
|
815
|
+
end=end,
|
|
816
|
+
reverse=reverse,
|
|
817
|
+
limit=limit,
|
|
818
|
+
shard_key=self._shard_key,
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
@_profiled("state_drain")
|
|
822
|
+
def state_drain(self, ns: "bytes | FrameworkNS") -> Iterable[tuple[bytes, bytes]]:
|
|
823
|
+
"""Atomic scan-and-delete of every (key, value) in one namespace.
|
|
824
|
+
|
|
825
|
+
Returns an iterable; consume it fully (beginning to iterate claims the
|
|
826
|
+
whole namespace on the cloudflare-do backend).
|
|
827
|
+
"""
|
|
828
|
+
return self._base.state_drain(self._execution_id, _coerce_ns(ns), shard_key=self._shard_key)
|
|
829
|
+
|
|
830
|
+
@_profiled("state_delete")
|
|
831
|
+
def state_delete(
|
|
832
|
+
self,
|
|
833
|
+
ns: "bytes | FrameworkNS",
|
|
834
|
+
keys: list[bytes] | None = None,
|
|
835
|
+
*,
|
|
836
|
+
start: bytes | None = None,
|
|
837
|
+
end: bytes | None = None,
|
|
838
|
+
) -> int:
|
|
839
|
+
"""Delete by key list, by half-open ``[start, end)`` range, or wipe all.
|
|
840
|
+
|
|
841
|
+
``keys`` and the range are mutually exclusive. See
|
|
842
|
+
``FunctionStorage.state_delete`` for the full contract.
|
|
843
|
+
"""
|
|
844
|
+
return self._base.state_delete(
|
|
845
|
+
self._execution_id,
|
|
846
|
+
_coerce_ns(ns),
|
|
847
|
+
keys,
|
|
848
|
+
start=start,
|
|
849
|
+
end=end,
|
|
850
|
+
shard_key=self._shard_key,
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
@_profiled("execution_clear")
|
|
854
|
+
def execution_clear(self) -> int:
|
|
855
|
+
"""Wipe ALL state and log rows for this execution across every namespace."""
|
|
856
|
+
return self._base.execution_clear(self._execution_id, shard_key=self._shard_key)
|
|
857
|
+
|
|
858
|
+
@_profiled("state_append")
|
|
859
|
+
def state_append(self, ns: "bytes | FrameworkNS", key: bytes, item: bytes) -> int:
|
|
860
|
+
"""Append an item to the (ns, key) log; return the assigned ordinal.
|
|
861
|
+
|
|
862
|
+
Idempotency covers transport-layer retries only (HTTP retry on
|
|
863
|
+
CfDo, pymssql driver-level retry on Azure SQL). Caller-level
|
|
864
|
+
retries — re-invoking ``state_append`` for the same logical
|
|
865
|
+
record after it returned — produce duplicate rows. See the
|
|
866
|
+
underlying ``FunctionStorage.state_append`` for the full contract.
|
|
867
|
+
"""
|
|
868
|
+
return self._base.state_append(self._execution_id, _coerce_ns(ns), key, item, shard_key=self._shard_key)
|
|
869
|
+
|
|
870
|
+
@_profiled("state_log_scan")
|
|
871
|
+
def state_log_scan(
|
|
872
|
+
self,
|
|
873
|
+
ns: "bytes | FrameworkNS",
|
|
874
|
+
key: bytes,
|
|
875
|
+
*,
|
|
876
|
+
after_id: int = -1,
|
|
877
|
+
limit: int | None = None,
|
|
878
|
+
) -> list[tuple[int, bytes]]:
|
|
879
|
+
"""Yield (id, value) pairs for (ns, key) with id > after_id.
|
|
880
|
+
|
|
881
|
+
See ``FunctionStorage.state_log_scan`` for the full contract.
|
|
882
|
+
"""
|
|
883
|
+
return self._base.state_log_scan(
|
|
884
|
+
self._execution_id,
|
|
885
|
+
_coerce_ns(ns),
|
|
886
|
+
key,
|
|
887
|
+
after_id=after_id,
|
|
888
|
+
limit=limit,
|
|
889
|
+
shard_key=self._shard_key,
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
# --- Atomic int64 counters (function_counter table) ---
|
|
893
|
+
|
|
894
|
+
@_profiled("state_counter_get")
|
|
895
|
+
def counter_get(self, ns: "bytes | FrameworkNS", key: bytes) -> int:
|
|
896
|
+
"""Read the int64 counter (0 if absent)."""
|
|
897
|
+
return self._base.state_counter_get(self._execution_id, _coerce_ns(ns), key, shard_key=self._shard_key)
|
|
898
|
+
|
|
899
|
+
@_profiled("state_counter_add")
|
|
900
|
+
def counter_add(self, ns: "bytes | FrameworkNS", key: bytes, delta: int) -> int:
|
|
901
|
+
"""Atomically add ``delta``; return the new value. See FunctionStorage."""
|
|
902
|
+
return self._base.state_counter_add(self._execution_id, _coerce_ns(ns), key, delta, shard_key=self._shard_key)
|
|
903
|
+
|
|
904
|
+
@_profiled("state_counter_set")
|
|
905
|
+
def counter_set(self, ns: "bytes | FrameworkNS", key: bytes, value: int) -> None:
|
|
906
|
+
"""Overwrite the counter with ``value``."""
|
|
907
|
+
self._base.state_counter_set(self._execution_id, _coerce_ns(ns), key, value, shard_key=self._shard_key)
|
|
908
|
+
|
|
909
|
+
@_profiled("state_counter_delete")
|
|
910
|
+
def counter_delete(self, ns: "bytes | FrameworkNS", key: bytes) -> None:
|
|
911
|
+
"""Delete the counter (no-op if absent)."""
|
|
912
|
+
self._base.state_counter_delete(self._execution_id, _coerce_ns(ns), key, shard_key=self._shard_key)
|
|
913
|
+
|
|
914
|
+
@staticmethod
|
|
915
|
+
def pack_int_key(i: int) -> bytes:
|
|
916
|
+
"""Sugar: encode an int as 8-byte little-endian for use as ``state_*`` key.
|
|
917
|
+
|
|
918
|
+
The common case for table_buffering state_id, aggregate group_id,
|
|
919
|
+
window partition_id is an int. This canonicalizes the encoding so
|
|
920
|
+
every caller produces the same bytes for the same int.
|
|
921
|
+
"""
|
|
922
|
+
return i.to_bytes(8, "little", signed=True)
|
|
923
|
+
|
|
924
|
+
@staticmethod
|
|
925
|
+
def serialize_record_batch(batch: pa.RecordBatch) -> bytes:
|
|
926
|
+
"""Serialize a RecordBatch to Arrow IPC stream bytes."""
|
|
927
|
+
sink = pa.BufferOutputStream()
|
|
928
|
+
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
929
|
+
writer.write_batch(batch)
|
|
930
|
+
return sink.getvalue().to_pybytes()
|
|
931
|
+
|
|
932
|
+
@staticmethod
|
|
933
|
+
def deserialize_record_batch(data: bytes) -> pa.RecordBatch:
|
|
934
|
+
with pa.ipc.open_stream(data) as ipc_reader:
|
|
935
|
+
return ipc_reader.read_next_batch()
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
class FunctionStorageSqlite:
|
|
939
|
+
"""SQLite-backed storage for VGI function state.
|
|
940
|
+
|
|
941
|
+
This implementation uses SQLite with WAL mode to allow multiple worker
|
|
942
|
+
processes to share state. It manages the three unified tables (the same
|
|
943
|
+
shape every backend uses):
|
|
944
|
+
|
|
945
|
+
- function_state: composite-key K/V over (scope_id, ns, key) — the single
|
|
946
|
+
home for per-execution / per-transaction / per-group / per-pid state
|
|
947
|
+
- function_state_log: append-only log keyed by (scope_id, ns, key)
|
|
948
|
+
- work_queue: FIFO queue of work items per execution
|
|
949
|
+
|
|
950
|
+
"""
|
|
951
|
+
|
|
952
|
+
def __init__(self, db_path: str | None = None) -> None:
|
|
953
|
+
"""Initialize SQLite storage.
|
|
954
|
+
|
|
955
|
+
Args:
|
|
956
|
+
db_path: Path to the SQLite database file. If None, uses a default
|
|
957
|
+
location in the user's state directory. Pass ``":memory:"`` to
|
|
958
|
+
use a process-local in-memory database; the storage uses a
|
|
959
|
+
shared-cache URI plus an anchor connection so the per-op
|
|
960
|
+
connections in ``_connect`` see the same DB. Suitable for
|
|
961
|
+
single-process test fixtures where commit-fsync overhead
|
|
962
|
+
dominates and persistence isn't needed.
|
|
963
|
+
|
|
964
|
+
"""
|
|
965
|
+
if db_path == ":memory:":
|
|
966
|
+
# Shared-cache in-memory: every connection to this URI sees the
|
|
967
|
+
# same database for as long as at least one connection is open.
|
|
968
|
+
# We hold ``_anchor_conn`` for the storage instance's lifetime so
|
|
969
|
+
# the DB survives between transient ``_connect`` calls. The
|
|
970
|
+
# per-instance UUID namespaces the DB so independent storage
|
|
971
|
+
# instances within a single process don't collide.
|
|
972
|
+
import uuid
|
|
973
|
+
|
|
974
|
+
self._memory_uri: str | None = f"file:vgi_storage_{uuid.uuid4().hex}?mode=memory&cache=shared"
|
|
975
|
+
self._anchor_conn: sqlite3.Connection | None = sqlite3.connect(self._memory_uri, uri=True, timeout=30.0)
|
|
976
|
+
self.db_path = ":memory:"
|
|
977
|
+
else:
|
|
978
|
+
self._memory_uri = None
|
|
979
|
+
self._anchor_conn = None
|
|
980
|
+
self.db_path = db_path if db_path is not None else _get_default_db_path()
|
|
981
|
+
self._tls = threading.local()
|
|
982
|
+
self._ensure_tables()
|
|
983
|
+
|
|
984
|
+
def _connect(self) -> sqlite3.Connection:
|
|
985
|
+
"""Create a new short-lived database connection (used for one-shot DDL)."""
|
|
986
|
+
if self._memory_uri is not None:
|
|
987
|
+
# Memory DBs use MEMORY journal mode implicitly; no WAL,
|
|
988
|
+
# no fsync — the whole point of using :memory: here.
|
|
989
|
+
return sqlite3.connect(self._memory_uri, uri=True, timeout=30.0)
|
|
990
|
+
conn = sqlite3.connect(self.db_path, timeout=30.0)
|
|
991
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
992
|
+
return conn
|
|
993
|
+
|
|
994
|
+
def _conn(self) -> sqlite3.Connection:
|
|
995
|
+
"""Return the calling thread's persistent connection, creating it lazily.
|
|
996
|
+
|
|
997
|
+
WAL coordinates writes across processes via file locking; within a
|
|
998
|
+
process, each thread gets its own connection so SQLite's per-connection
|
|
999
|
+
locking serializes writers without a Python-level lock and without
|
|
1000
|
+
forfeiting WAL's reader-writer concurrency. Pragmas are applied once
|
|
1001
|
+
per connection — ``synchronous=NORMAL`` is the dominant win, since it
|
|
1002
|
+
skips fsync on every commit and only fsyncs at WAL checkpoint.
|
|
1003
|
+
"""
|
|
1004
|
+
conn: sqlite3.Connection | None = getattr(self._tls, "conn", None)
|
|
1005
|
+
if conn is not None:
|
|
1006
|
+
return conn
|
|
1007
|
+
if self._memory_uri is not None:
|
|
1008
|
+
conn = sqlite3.connect(self._memory_uri, uri=True, timeout=30.0)
|
|
1009
|
+
else:
|
|
1010
|
+
conn = sqlite3.connect(self.db_path, timeout=30.0)
|
|
1011
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
1012
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
|
1013
|
+
conn.execute("PRAGMA busy_timeout=30000")
|
|
1014
|
+
conn.execute("PRAGMA temp_store=MEMORY")
|
|
1015
|
+
conn.execute("PRAGMA cache_size=-65536")
|
|
1016
|
+
self._tls.conn = conn
|
|
1017
|
+
return conn
|
|
1018
|
+
|
|
1019
|
+
def close(self) -> None:
|
|
1020
|
+
"""Close the calling thread's persistent connection, if any."""
|
|
1021
|
+
conn: sqlite3.Connection | None = getattr(self._tls, "conn", None)
|
|
1022
|
+
if conn is not None:
|
|
1023
|
+
conn.close()
|
|
1024
|
+
self._tls.conn = None
|
|
1025
|
+
|
|
1026
|
+
def _ensure_tables(self) -> None:
|
|
1027
|
+
"""Create all storage tables if they don't exist.
|
|
1028
|
+
|
|
1029
|
+
Handles schema migration from older versions (e.g. invocation_id → execution_id)
|
|
1030
|
+
by dropping and recreating tables with stale schemas. The data in these tables
|
|
1031
|
+
is ephemeral (in-progress worker state), so dropping is safe.
|
|
1032
|
+
"""
|
|
1033
|
+
conn = self._connect()
|
|
1034
|
+
try:
|
|
1035
|
+
# Self-heal an older on-disk DB to the unified minimal schema. The
|
|
1036
|
+
# local SQLite tier is single-connection-per-process with no network
|
|
1037
|
+
# retries, so it carries none of the DO's HTTP idempotency machinery
|
|
1038
|
+
# (last_attempt_id / drained_* / attempt_id / created_at). Drop any
|
|
1039
|
+
# table left over with the old idempotency columns so the CREATEs
|
|
1040
|
+
# below recreate the minimal shape. All of this state is ephemeral
|
|
1041
|
+
# in-progress worker state, so dropping + recreating is safe.
|
|
1042
|
+
for table, stale_col in [
|
|
1043
|
+
("function_state", "last_attempt_id"),
|
|
1044
|
+
("function_state_log", "attempt_id"),
|
|
1045
|
+
("work_queue", "created_at"),
|
|
1046
|
+
]:
|
|
1047
|
+
cols = {row[1] for row in conn.execute(f"PRAGMA table_info({table})").fetchall()} # noqa: S608
|
|
1048
|
+
if stale_col in cols:
|
|
1049
|
+
conn.execute(f"DROP TABLE IF EXISTS {table}") # noqa: S608
|
|
1050
|
+
# Tables eliminated by the unified schema: worker collect now rides
|
|
1051
|
+
# function_state + state_drain, and the queue carries no registration
|
|
1052
|
+
# (matching the Durable Object — pop on an unknown id returns None).
|
|
1053
|
+
for dead in ("global_state_storage", "worker_state", "invocation_registry", "init_storage"):
|
|
1054
|
+
conn.execute(f"DROP TABLE IF EXISTS {dead}") # noqa: S608
|
|
1055
|
+
|
|
1056
|
+
# ----------------------------------------------------------------
|
|
1057
|
+
# Unified schema — the same three tables every backend uses (the
|
|
1058
|
+
# Durable Object adds an HTTP-idempotency column layer on top).
|
|
1059
|
+
# work_queue — FIFO work items, destructive pop.
|
|
1060
|
+
# function_state — composite-key K/V over (scope_id, ns, key);
|
|
1061
|
+
# the single home for per-execution /
|
|
1062
|
+
# per-transaction / per-group / per-pid
|
|
1063
|
+
# state. Caller picks ``ns``; storage
|
|
1064
|
+
# doesn't interpret it.
|
|
1065
|
+
# function_state_log — append-only log keyed by (scope, ns, key);
|
|
1066
|
+
# the AUTOINCREMENT id is the scan cursor.
|
|
1067
|
+
# ----------------------------------------------------------------
|
|
1068
|
+
conn.execute("""
|
|
1069
|
+
CREATE TABLE IF NOT EXISTS work_queue (
|
|
1070
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1071
|
+
execution_id BLOB NOT NULL,
|
|
1072
|
+
work_item BLOB NOT NULL
|
|
1073
|
+
)
|
|
1074
|
+
""")
|
|
1075
|
+
conn.execute("""
|
|
1076
|
+
CREATE INDEX IF NOT EXISTS idx_work_queue_execution
|
|
1077
|
+
ON work_queue(execution_id, id)
|
|
1078
|
+
""")
|
|
1079
|
+
conn.execute("""
|
|
1080
|
+
CREATE TABLE IF NOT EXISTS function_state (
|
|
1081
|
+
scope_id BLOB NOT NULL,
|
|
1082
|
+
ns BLOB NOT NULL,
|
|
1083
|
+
key BLOB NOT NULL,
|
|
1084
|
+
value BLOB NOT NULL,
|
|
1085
|
+
PRIMARY KEY (scope_id, ns, key)
|
|
1086
|
+
)
|
|
1087
|
+
""")
|
|
1088
|
+
conn.execute("""
|
|
1089
|
+
CREATE TABLE IF NOT EXISTS function_state_log (
|
|
1090
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1091
|
+
scope_id BLOB NOT NULL,
|
|
1092
|
+
ns BLOB NOT NULL,
|
|
1093
|
+
key BLOB NOT NULL,
|
|
1094
|
+
value BLOB NOT NULL
|
|
1095
|
+
)
|
|
1096
|
+
""")
|
|
1097
|
+
conn.execute("""
|
|
1098
|
+
CREATE INDEX IF NOT EXISTS idx_function_state_log_lookup
|
|
1099
|
+
ON function_state_log(scope_id, ns, key, id)
|
|
1100
|
+
""")
|
|
1101
|
+
# function_counter — atomic int64 counters, a typed numeric facet
|
|
1102
|
+
# kept apart from the opaque function_state K/V. No idempotency
|
|
1103
|
+
# columns: the local SQLite tier has no network retry layer.
|
|
1104
|
+
conn.execute("""
|
|
1105
|
+
CREATE TABLE IF NOT EXISTS function_counter (
|
|
1106
|
+
scope_id BLOB NOT NULL,
|
|
1107
|
+
ns BLOB NOT NULL,
|
|
1108
|
+
key BLOB NOT NULL,
|
|
1109
|
+
n INTEGER NOT NULL,
|
|
1110
|
+
PRIMARY KEY (scope_id, ns, key)
|
|
1111
|
+
)
|
|
1112
|
+
""")
|
|
1113
|
+
conn.commit()
|
|
1114
|
+
finally:
|
|
1115
|
+
conn.close()
|
|
1116
|
+
|
|
1117
|
+
# --- Work Queue ---
|
|
1118
|
+
|
|
1119
|
+
def queue_push(self, execution_id: bytes, items: list[bytes], *, shard_key: str = "") -> int:
|
|
1120
|
+
"""Append work items to the queue."""
|
|
1121
|
+
conn = self._conn()
|
|
1122
|
+
if items:
|
|
1123
|
+
conn.executemany(
|
|
1124
|
+
"""
|
|
1125
|
+
INSERT INTO work_queue (execution_id, work_item)
|
|
1126
|
+
VALUES (?, ?)
|
|
1127
|
+
""",
|
|
1128
|
+
[(execution_id, item) for item in items],
|
|
1129
|
+
)
|
|
1130
|
+
conn.commit()
|
|
1131
|
+
return len(items)
|
|
1132
|
+
|
|
1133
|
+
def queue_pop(self, execution_id: bytes, *, shard_key: str = "") -> bytes | None:
|
|
1134
|
+
"""Atomically claim one work item from the queue.
|
|
1135
|
+
|
|
1136
|
+
Returns None when the queue is empty or the execution_id was never
|
|
1137
|
+
pushed — there is no registration, matching the Durable Object.
|
|
1138
|
+
"""
|
|
1139
|
+
conn = self._conn()
|
|
1140
|
+
cursor = conn.execute(
|
|
1141
|
+
"""
|
|
1142
|
+
DELETE FROM work_queue
|
|
1143
|
+
WHERE id = (
|
|
1144
|
+
SELECT id FROM work_queue
|
|
1145
|
+
WHERE execution_id = ?
|
|
1146
|
+
ORDER BY id ASC
|
|
1147
|
+
LIMIT 1
|
|
1148
|
+
)
|
|
1149
|
+
RETURNING work_item
|
|
1150
|
+
""",
|
|
1151
|
+
(execution_id,),
|
|
1152
|
+
)
|
|
1153
|
+
row = cursor.fetchone()
|
|
1154
|
+
conn.commit()
|
|
1155
|
+
return row[0] if row else None
|
|
1156
|
+
|
|
1157
|
+
def queue_clear(self, execution_id: bytes, *, shard_key: str = "") -> int:
|
|
1158
|
+
"""Clear all remaining work items for the execution."""
|
|
1159
|
+
conn = self._conn()
|
|
1160
|
+
cursor = conn.execute(
|
|
1161
|
+
"DELETE FROM work_queue WHERE execution_id = ?",
|
|
1162
|
+
(execution_id,),
|
|
1163
|
+
)
|
|
1164
|
+
conn.commit()
|
|
1165
|
+
return cursor.rowcount
|
|
1166
|
+
|
|
1167
|
+
# ========================================================================
|
|
1168
|
+
# Unified state_* implementation
|
|
1169
|
+
# ========================================================================
|
|
1170
|
+
#
|
|
1171
|
+
# See FunctionStorage protocol docstrings for contracts. No idempotency /
|
|
1172
|
+
# replay-detection here: that exists only on the HTTP tier (the Durable
|
|
1173
|
+
# Object) to dedup network retries. A local SQLite connection has no retry
|
|
1174
|
+
# layer above these methods, so mutations are plain writes.
|
|
1175
|
+
|
|
1176
|
+
def state_get_many(
|
|
1177
|
+
self,
|
|
1178
|
+
scope_id: bytes,
|
|
1179
|
+
ns: bytes,
|
|
1180
|
+
keys: list[bytes],
|
|
1181
|
+
*,
|
|
1182
|
+
shard_key: str = "",
|
|
1183
|
+
) -> list[bytes | None]:
|
|
1184
|
+
"""Batched read by key list. Returns parallel list with None for misses."""
|
|
1185
|
+
del shard_key
|
|
1186
|
+
if not keys:
|
|
1187
|
+
return []
|
|
1188
|
+
conn = self._conn()
|
|
1189
|
+
placeholders = ",".join("?" for _ in keys)
|
|
1190
|
+
rows = conn.execute(
|
|
1191
|
+
f"""
|
|
1192
|
+
SELECT key, value FROM function_state
|
|
1193
|
+
WHERE scope_id = ? AND ns = ? AND key IN ({placeholders})
|
|
1194
|
+
""",
|
|
1195
|
+
(scope_id, ns, *keys),
|
|
1196
|
+
).fetchall()
|
|
1197
|
+
found: dict[bytes, bytes] = {bytes(k): bytes(v) for k, v in rows}
|
|
1198
|
+
return [found.get(bytes(k)) for k in keys]
|
|
1199
|
+
|
|
1200
|
+
def state_put_many(
|
|
1201
|
+
self,
|
|
1202
|
+
scope_id: bytes,
|
|
1203
|
+
ns: bytes,
|
|
1204
|
+
items: list[tuple[bytes, bytes]],
|
|
1205
|
+
*,
|
|
1206
|
+
shard_key: str = "",
|
|
1207
|
+
) -> None:
|
|
1208
|
+
"""Atomic batched upsert by (scope_id, ns, key)."""
|
|
1209
|
+
del shard_key
|
|
1210
|
+
if not items:
|
|
1211
|
+
return
|
|
1212
|
+
conn = self._conn()
|
|
1213
|
+
conn.executemany(
|
|
1214
|
+
"""
|
|
1215
|
+
INSERT INTO function_state (scope_id, ns, key, value)
|
|
1216
|
+
VALUES (?, ?, ?, ?)
|
|
1217
|
+
ON CONFLICT(scope_id, ns, key) DO UPDATE SET
|
|
1218
|
+
value = excluded.value
|
|
1219
|
+
""",
|
|
1220
|
+
[(scope_id, ns, k, v) for k, v in items],
|
|
1221
|
+
)
|
|
1222
|
+
conn.commit()
|
|
1223
|
+
|
|
1224
|
+
def state_scan(
|
|
1225
|
+
self,
|
|
1226
|
+
scope_id: bytes,
|
|
1227
|
+
ns: bytes,
|
|
1228
|
+
*,
|
|
1229
|
+
start: bytes | None = None,
|
|
1230
|
+
end: bytes | None = None,
|
|
1231
|
+
reverse: bool = False,
|
|
1232
|
+
limit: int | None = None,
|
|
1233
|
+
shard_key: str = "",
|
|
1234
|
+
) -> list[tuple[bytes, bytes]]:
|
|
1235
|
+
"""Non-destructive scan of (key, value) in a namespace.
|
|
1236
|
+
|
|
1237
|
+
Ordered by key bytes (BLOB compares bytewise / memcmp), descending when
|
|
1238
|
+
``reverse``, bounded to ``[start, end)`` and capped at ``limit``.
|
|
1239
|
+
"""
|
|
1240
|
+
del shard_key
|
|
1241
|
+
conn = self._conn()
|
|
1242
|
+
params: list[object] = [scope_id, ns]
|
|
1243
|
+
clauses = ""
|
|
1244
|
+
if start is not None:
|
|
1245
|
+
clauses += " AND key >= ?"
|
|
1246
|
+
params.append(start)
|
|
1247
|
+
if end is not None:
|
|
1248
|
+
clauses += " AND key < ?"
|
|
1249
|
+
params.append(end)
|
|
1250
|
+
order = "DESC" if reverse else "ASC"
|
|
1251
|
+
params.append(-1 if limit is None else int(limit))
|
|
1252
|
+
rows = conn.execute(
|
|
1253
|
+
f"""
|
|
1254
|
+
SELECT key, value FROM function_state
|
|
1255
|
+
WHERE scope_id = ? AND ns = ?{clauses}
|
|
1256
|
+
ORDER BY key {order}
|
|
1257
|
+
LIMIT ?
|
|
1258
|
+
""", # noqa: S608 — order is a fixed ASC/DESC literal, not user input
|
|
1259
|
+
tuple(params),
|
|
1260
|
+
).fetchall()
|
|
1261
|
+
return [(bytes(k), bytes(v)) for k, v in rows]
|
|
1262
|
+
|
|
1263
|
+
def state_drain(
|
|
1264
|
+
self,
|
|
1265
|
+
scope_id: bytes,
|
|
1266
|
+
ns: bytes,
|
|
1267
|
+
*,
|
|
1268
|
+
shard_key: str = "",
|
|
1269
|
+
) -> list[tuple[bytes, bytes]]:
|
|
1270
|
+
"""Atomic destructive scan: read all (key, value) in a namespace and delete them."""
|
|
1271
|
+
del shard_key
|
|
1272
|
+
conn = self._conn()
|
|
1273
|
+
# DELETE ... RETURNING (SQLite ≥3.35) reads and removes in one
|
|
1274
|
+
# statement; the connection serializes it. No tombstone/replay layer —
|
|
1275
|
+
# that exists only on the HTTP tier (the Durable Object) for retry
|
|
1276
|
+
# safety, which a single local connection doesn't face.
|
|
1277
|
+
rows = conn.execute(
|
|
1278
|
+
"""
|
|
1279
|
+
DELETE FROM function_state
|
|
1280
|
+
WHERE scope_id = ? AND ns = ?
|
|
1281
|
+
RETURNING key, value
|
|
1282
|
+
""",
|
|
1283
|
+
(scope_id, ns),
|
|
1284
|
+
).fetchall()
|
|
1285
|
+
conn.commit()
|
|
1286
|
+
return [(bytes(k), bytes(v)) for k, v in rows]
|
|
1287
|
+
|
|
1288
|
+
def state_delete(
|
|
1289
|
+
self,
|
|
1290
|
+
scope_id: bytes,
|
|
1291
|
+
ns: bytes,
|
|
1292
|
+
keys: list[bytes] | None = None,
|
|
1293
|
+
*,
|
|
1294
|
+
start: bytes | None = None,
|
|
1295
|
+
end: bytes | None = None,
|
|
1296
|
+
shard_key: str = "",
|
|
1297
|
+
) -> int:
|
|
1298
|
+
"""Delete by key list, by ``[start, end)`` range, or whole namespace.
|
|
1299
|
+
|
|
1300
|
+
``keys`` and the range are mutually exclusive. Returns count deleted.
|
|
1301
|
+
"""
|
|
1302
|
+
del shard_key
|
|
1303
|
+
if keys is not None and (start is not None or end is not None):
|
|
1304
|
+
raise ValueError("state_delete: keys and start/end are mutually exclusive")
|
|
1305
|
+
conn = self._conn()
|
|
1306
|
+
if keys is not None:
|
|
1307
|
+
if not keys:
|
|
1308
|
+
return 0
|
|
1309
|
+
placeholders = ",".join("?" for _ in keys)
|
|
1310
|
+
cur = conn.execute(
|
|
1311
|
+
f"""
|
|
1312
|
+
DELETE FROM function_state
|
|
1313
|
+
WHERE scope_id = ? AND ns = ? AND key IN ({placeholders})
|
|
1314
|
+
""", # noqa: S608 — placeholders are bound '?' params, not interpolated values
|
|
1315
|
+
(scope_id, ns, *keys),
|
|
1316
|
+
)
|
|
1317
|
+
else:
|
|
1318
|
+
params: list[object] = [scope_id, ns]
|
|
1319
|
+
clauses = ""
|
|
1320
|
+
if start is not None:
|
|
1321
|
+
clauses += " AND key >= ?"
|
|
1322
|
+
params.append(start)
|
|
1323
|
+
if end is not None:
|
|
1324
|
+
clauses += " AND key < ?"
|
|
1325
|
+
params.append(end)
|
|
1326
|
+
cur = conn.execute(
|
|
1327
|
+
f"DELETE FROM function_state WHERE scope_id = ? AND ns = ?{clauses}", # noqa: S608
|
|
1328
|
+
tuple(params),
|
|
1329
|
+
)
|
|
1330
|
+
conn.commit()
|
|
1331
|
+
return int(cur.rowcount)
|
|
1332
|
+
|
|
1333
|
+
def execution_clear(
|
|
1334
|
+
self,
|
|
1335
|
+
scope_id: bytes,
|
|
1336
|
+
*,
|
|
1337
|
+
shard_key: str = "",
|
|
1338
|
+
) -> int:
|
|
1339
|
+
"""Wipe all state, log, and counter rows for scope_id across every namespace."""
|
|
1340
|
+
del shard_key
|
|
1341
|
+
conn = self._conn()
|
|
1342
|
+
c1 = conn.execute(
|
|
1343
|
+
"DELETE FROM function_state WHERE scope_id = ?",
|
|
1344
|
+
(scope_id,),
|
|
1345
|
+
)
|
|
1346
|
+
c2 = conn.execute(
|
|
1347
|
+
"DELETE FROM function_state_log WHERE scope_id = ?",
|
|
1348
|
+
(scope_id,),
|
|
1349
|
+
)
|
|
1350
|
+
c3 = conn.execute(
|
|
1351
|
+
"DELETE FROM function_counter WHERE scope_id = ?",
|
|
1352
|
+
(scope_id,),
|
|
1353
|
+
)
|
|
1354
|
+
conn.commit()
|
|
1355
|
+
return int(c1.rowcount) + int(c2.rowcount) + int(c3.rowcount)
|
|
1356
|
+
|
|
1357
|
+
def state_append(
|
|
1358
|
+
self,
|
|
1359
|
+
scope_id: bytes,
|
|
1360
|
+
ns: bytes,
|
|
1361
|
+
key: bytes,
|
|
1362
|
+
item: bytes,
|
|
1363
|
+
*,
|
|
1364
|
+
shard_key: str = "",
|
|
1365
|
+
) -> int:
|
|
1366
|
+
"""Append item to the (scope_id, ns, key) log; return its ordinal (the row id)."""
|
|
1367
|
+
del shard_key
|
|
1368
|
+
conn = self._conn()
|
|
1369
|
+
row = conn.execute(
|
|
1370
|
+
"""
|
|
1371
|
+
INSERT INTO function_state_log (scope_id, ns, key, value)
|
|
1372
|
+
VALUES (?, ?, ?, ?)
|
|
1373
|
+
RETURNING id
|
|
1374
|
+
""",
|
|
1375
|
+
(scope_id, ns, key, item),
|
|
1376
|
+
).fetchone()
|
|
1377
|
+
conn.commit()
|
|
1378
|
+
return int(row[0])
|
|
1379
|
+
|
|
1380
|
+
def state_log_scan(
|
|
1381
|
+
self,
|
|
1382
|
+
scope_id: bytes,
|
|
1383
|
+
ns: bytes,
|
|
1384
|
+
key: bytes,
|
|
1385
|
+
*,
|
|
1386
|
+
after_id: int = -1,
|
|
1387
|
+
limit: int | None = None,
|
|
1388
|
+
shard_key: str = "",
|
|
1389
|
+
) -> list[tuple[int, bytes]]:
|
|
1390
|
+
"""Yield (id, value) pairs for (scope_id, ns, key) with id > after_id."""
|
|
1391
|
+
del shard_key
|
|
1392
|
+
conn = self._conn()
|
|
1393
|
+
# SQLite supports LIMIT -1 as unbounded, but we pass NULL via
|
|
1394
|
+
# a parameter for clarity. Use LIMIT ? with -1 sentinel.
|
|
1395
|
+
sql = """
|
|
1396
|
+
SELECT id, value FROM function_state_log
|
|
1397
|
+
WHERE scope_id = ? AND ns = ? AND key = ? AND id > ?
|
|
1398
|
+
ORDER BY id
|
|
1399
|
+
LIMIT ?
|
|
1400
|
+
"""
|
|
1401
|
+
sqlite_limit = -1 if limit is None else int(limit)
|
|
1402
|
+
rows = conn.execute(
|
|
1403
|
+
sql,
|
|
1404
|
+
(scope_id, ns, key, after_id, sqlite_limit),
|
|
1405
|
+
).fetchall()
|
|
1406
|
+
return [(int(rid), bytes(v)) for (rid, v) in rows]
|
|
1407
|
+
|
|
1408
|
+
# --- Atomic int64 counters (function_counter) ---
|
|
1409
|
+
# No idempotency layer: a local single-connection backend has no retries.
|
|
1410
|
+
|
|
1411
|
+
def state_counter_get(self, scope_id: bytes, ns: bytes, key: bytes, *, shard_key: str = "") -> int:
|
|
1412
|
+
"""Read the int64 counter; 0 if absent."""
|
|
1413
|
+
del shard_key
|
|
1414
|
+
row = (
|
|
1415
|
+
self._conn()
|
|
1416
|
+
.execute(
|
|
1417
|
+
"SELECT n FROM function_counter WHERE scope_id = ? AND ns = ? AND key = ?",
|
|
1418
|
+
(scope_id, ns, key),
|
|
1419
|
+
)
|
|
1420
|
+
.fetchone()
|
|
1421
|
+
)
|
|
1422
|
+
return int(row[0]) if row else 0
|
|
1423
|
+
|
|
1424
|
+
def state_counter_add(
|
|
1425
|
+
self,
|
|
1426
|
+
scope_id: bytes,
|
|
1427
|
+
ns: bytes,
|
|
1428
|
+
key: bytes,
|
|
1429
|
+
delta: int,
|
|
1430
|
+
*,
|
|
1431
|
+
shard_key: str = "",
|
|
1432
|
+
) -> int:
|
|
1433
|
+
"""Atomically add ``delta`` and return the new value (init 0 if absent)."""
|
|
1434
|
+
del shard_key
|
|
1435
|
+
conn = self._conn()
|
|
1436
|
+
row = conn.execute(
|
|
1437
|
+
"""
|
|
1438
|
+
INSERT INTO function_counter (scope_id, ns, key, n)
|
|
1439
|
+
VALUES (?, ?, ?, ?)
|
|
1440
|
+
ON CONFLICT(scope_id, ns, key) DO UPDATE SET n = n + excluded.n
|
|
1441
|
+
RETURNING n
|
|
1442
|
+
""",
|
|
1443
|
+
(scope_id, ns, key, int(delta)),
|
|
1444
|
+
).fetchone()
|
|
1445
|
+
conn.commit()
|
|
1446
|
+
return int(row[0])
|
|
1447
|
+
|
|
1448
|
+
def state_counter_set(
|
|
1449
|
+
self,
|
|
1450
|
+
scope_id: bytes,
|
|
1451
|
+
ns: bytes,
|
|
1452
|
+
key: bytes,
|
|
1453
|
+
value: int,
|
|
1454
|
+
*,
|
|
1455
|
+
shard_key: str = "",
|
|
1456
|
+
) -> None:
|
|
1457
|
+
"""Overwrite the counter with ``value``."""
|
|
1458
|
+
del shard_key
|
|
1459
|
+
conn = self._conn()
|
|
1460
|
+
conn.execute(
|
|
1461
|
+
"""
|
|
1462
|
+
INSERT INTO function_counter (scope_id, ns, key, n)
|
|
1463
|
+
VALUES (?, ?, ?, ?)
|
|
1464
|
+
ON CONFLICT(scope_id, ns, key) DO UPDATE SET n = excluded.n
|
|
1465
|
+
""",
|
|
1466
|
+
(scope_id, ns, key, int(value)),
|
|
1467
|
+
)
|
|
1468
|
+
conn.commit()
|
|
1469
|
+
|
|
1470
|
+
def state_counter_delete(self, scope_id: bytes, ns: bytes, key: bytes, *, shard_key: str = "") -> None:
|
|
1471
|
+
"""Delete the counter (no-op if absent)."""
|
|
1472
|
+
del shard_key
|
|
1473
|
+
conn = self._conn()
|
|
1474
|
+
conn.execute(
|
|
1475
|
+
"DELETE FROM function_counter WHERE scope_id = ? AND ns = ? AND key = ?",
|
|
1476
|
+
(scope_id, ns, key),
|
|
1477
|
+
)
|
|
1478
|
+
conn.commit()
|
|
1479
|
+
|
|
1480
|
+
|
|
1481
|
+
class ShardedSqliteStorage:
|
|
1482
|
+
"""Debug-only SQLite backend that PARTITIONS storage by ``shard_key``.
|
|
1483
|
+
|
|
1484
|
+
The normal SQLite backend ignores ``shard_key`` (one shared DB), masking
|
|
1485
|
+
shard-routing bugs that only bite ``cloudflare-do`` (which truly shards per
|
|
1486
|
+
Durable Object). This wrapper isolates shards by PREFIXING the scope_id /
|
|
1487
|
+
execution_id with the shard_key, so an op under shard A can't see state
|
|
1488
|
+
written under shard B — reproducing cloudflare-do isolation locally — while
|
|
1489
|
+
using ONE inner store, so concurrency behaves exactly like the normal sqlite
|
|
1490
|
+
backend. (Per-shard databases instead exploded connections and deadlocked
|
|
1491
|
+
the shared-cache :memory: DB under load.) Enabled via ``VGI_SQLITE_SHARD=1``
|
|
1492
|
+
(see ``vgi/function.py:_resolve_storage``). Not for production.
|
|
1493
|
+
|
|
1494
|
+
With ``VGI_SQLITE_SHARD_LOG=1`` it logs every op's (op, shard_key, scope) so
|
|
1495
|
+
a write and a read for one execution can be compared without a remote tail.
|
|
1496
|
+
"""
|
|
1497
|
+
|
|
1498
|
+
_SEP = b"\x1f" # unit separator — absent from attach/execution id bytes
|
|
1499
|
+
|
|
1500
|
+
def __init__(self, db_path: str | None = None) -> None:
|
|
1501
|
+
self._inner = FunctionStorageSqlite(db_path=db_path or ":memory:")
|
|
1502
|
+
self._log = logging.getLogger("vgi.storage.sqlite_shard")
|
|
1503
|
+
self._dbg_on = os.environ.get("VGI_SQLITE_SHARD_LOG") == "1"
|
|
1504
|
+
|
|
1505
|
+
def _p(self, shard_key: str, id_bytes: bytes) -> bytes:
|
|
1506
|
+
"""Namespace an execution_id / scope_id by shard_key.
|
|
1507
|
+
|
|
1508
|
+
Transparent to the worker — only the sqlite row key changes, never the
|
|
1509
|
+
returned data.
|
|
1510
|
+
"""
|
|
1511
|
+
return shard_key.encode("utf-8") + self._SEP + id_bytes
|
|
1512
|
+
|
|
1513
|
+
def _dbg(self, op: str, shard_key: str, scope: bytes) -> None:
|
|
1514
|
+
if self._dbg_on:
|
|
1515
|
+
self._log.warning("op=%s shard=%s scope=%s", op, shard_key, scope.hex()[:16])
|
|
1516
|
+
|
|
1517
|
+
# --- Work Queue ---
|
|
1518
|
+
def queue_push(self, execution_id: bytes, items: list[bytes], *, shard_key: str = "") -> int:
|
|
1519
|
+
self._dbg("queue_push", shard_key, execution_id)
|
|
1520
|
+
return self._inner.queue_push(self._p(shard_key, execution_id), items)
|
|
1521
|
+
|
|
1522
|
+
def queue_pop(self, execution_id: bytes, *, shard_key: str = "") -> bytes | None:
|
|
1523
|
+
self._dbg("queue_pop", shard_key, execution_id)
|
|
1524
|
+
return self._inner.queue_pop(self._p(shard_key, execution_id))
|
|
1525
|
+
|
|
1526
|
+
def queue_clear(self, execution_id: bytes, *, shard_key: str = "") -> int:
|
|
1527
|
+
self._dbg("queue_clear", shard_key, execution_id)
|
|
1528
|
+
return self._inner.queue_clear(self._p(shard_key, execution_id))
|
|
1529
|
+
|
|
1530
|
+
# --- Unified state (scope_id namespaced by shard_key) ---
|
|
1531
|
+
def state_get_many(
|
|
1532
|
+
self, scope_id: bytes, ns: bytes, keys: list[bytes], *, shard_key: str = ""
|
|
1533
|
+
) -> list[bytes | None]:
|
|
1534
|
+
self._dbg("state_get_many", shard_key, scope_id)
|
|
1535
|
+
return self._inner.state_get_many(self._p(shard_key, scope_id), ns, keys)
|
|
1536
|
+
|
|
1537
|
+
def state_put_many(
|
|
1538
|
+
self, scope_id: bytes, ns: bytes, items: list[tuple[bytes, bytes]], *, shard_key: str = ""
|
|
1539
|
+
) -> None:
|
|
1540
|
+
self._dbg("state_put_many", shard_key, scope_id)
|
|
1541
|
+
self._inner.state_put_many(self._p(shard_key, scope_id), ns, items)
|
|
1542
|
+
|
|
1543
|
+
def state_scan(
|
|
1544
|
+
self,
|
|
1545
|
+
scope_id: bytes,
|
|
1546
|
+
ns: bytes,
|
|
1547
|
+
*,
|
|
1548
|
+
start: bytes | None = None,
|
|
1549
|
+
end: bytes | None = None,
|
|
1550
|
+
reverse: bool = False,
|
|
1551
|
+
limit: int | None = None,
|
|
1552
|
+
shard_key: str = "",
|
|
1553
|
+
) -> list[tuple[bytes, bytes]]:
|
|
1554
|
+
self._dbg("state_scan", shard_key, scope_id)
|
|
1555
|
+
return self._inner.state_scan(
|
|
1556
|
+
self._p(shard_key, scope_id),
|
|
1557
|
+
ns,
|
|
1558
|
+
start=start,
|
|
1559
|
+
end=end,
|
|
1560
|
+
reverse=reverse,
|
|
1561
|
+
limit=limit,
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1564
|
+
def state_drain(self, scope_id: bytes, ns: bytes, *, shard_key: str = "") -> list[tuple[bytes, bytes]]:
|
|
1565
|
+
self._dbg("state_drain", shard_key, scope_id)
|
|
1566
|
+
return self._inner.state_drain(self._p(shard_key, scope_id), ns)
|
|
1567
|
+
|
|
1568
|
+
def state_delete(
|
|
1569
|
+
self,
|
|
1570
|
+
scope_id: bytes,
|
|
1571
|
+
ns: bytes,
|
|
1572
|
+
keys: list[bytes] | None = None,
|
|
1573
|
+
*,
|
|
1574
|
+
start: bytes | None = None,
|
|
1575
|
+
end: bytes | None = None,
|
|
1576
|
+
shard_key: str = "",
|
|
1577
|
+
) -> int:
|
|
1578
|
+
self._dbg("state_delete", shard_key, scope_id)
|
|
1579
|
+
return self._inner.state_delete(self._p(shard_key, scope_id), ns, keys, start=start, end=end)
|
|
1580
|
+
|
|
1581
|
+
def execution_clear(self, scope_id: bytes, *, shard_key: str = "") -> int:
|
|
1582
|
+
self._dbg("execution_clear", shard_key, scope_id)
|
|
1583
|
+
return self._inner.execution_clear(self._p(shard_key, scope_id))
|
|
1584
|
+
|
|
1585
|
+
def state_append(self, scope_id: bytes, ns: bytes, key: bytes, item: bytes, *, shard_key: str = "") -> int:
|
|
1586
|
+
self._dbg("state_append", shard_key, scope_id)
|
|
1587
|
+
return self._inner.state_append(self._p(shard_key, scope_id), ns, key, item)
|
|
1588
|
+
|
|
1589
|
+
def state_log_scan(
|
|
1590
|
+
self,
|
|
1591
|
+
scope_id: bytes,
|
|
1592
|
+
ns: bytes,
|
|
1593
|
+
key: bytes,
|
|
1594
|
+
*,
|
|
1595
|
+
after_id: int = -1,
|
|
1596
|
+
limit: int | None = None,
|
|
1597
|
+
shard_key: str = "",
|
|
1598
|
+
) -> list[tuple[int, bytes]]:
|
|
1599
|
+
self._dbg("state_log_scan", shard_key, scope_id)
|
|
1600
|
+
return self._inner.state_log_scan(self._p(shard_key, scope_id), ns, key, after_id=after_id, limit=limit)
|
|
1601
|
+
|
|
1602
|
+
# --- Atomic int64 counters ---
|
|
1603
|
+
def state_counter_get(self, scope_id: bytes, ns: bytes, key: bytes, *, shard_key: str = "") -> int:
|
|
1604
|
+
self._dbg("state_counter_get", shard_key, scope_id)
|
|
1605
|
+
return self._inner.state_counter_get(self._p(shard_key, scope_id), ns, key)
|
|
1606
|
+
|
|
1607
|
+
def state_counter_add(
|
|
1608
|
+
self,
|
|
1609
|
+
scope_id: bytes,
|
|
1610
|
+
ns: bytes,
|
|
1611
|
+
key: bytes,
|
|
1612
|
+
delta: int,
|
|
1613
|
+
*,
|
|
1614
|
+
shard_key: str = "",
|
|
1615
|
+
) -> int:
|
|
1616
|
+
self._dbg("state_counter_add", shard_key, scope_id)
|
|
1617
|
+
return self._inner.state_counter_add(self._p(shard_key, scope_id), ns, key, delta)
|
|
1618
|
+
|
|
1619
|
+
def state_counter_set(
|
|
1620
|
+
self,
|
|
1621
|
+
scope_id: bytes,
|
|
1622
|
+
ns: bytes,
|
|
1623
|
+
key: bytes,
|
|
1624
|
+
value: int,
|
|
1625
|
+
*,
|
|
1626
|
+
shard_key: str = "",
|
|
1627
|
+
) -> None:
|
|
1628
|
+
self._dbg("state_counter_set", shard_key, scope_id)
|
|
1629
|
+
self._inner.state_counter_set(self._p(shard_key, scope_id), ns, key, value)
|
|
1630
|
+
|
|
1631
|
+
def state_counter_delete(self, scope_id: bytes, ns: bytes, key: bytes, *, shard_key: str = "") -> None:
|
|
1632
|
+
self._dbg("state_counter_delete", shard_key, scope_id)
|
|
1633
|
+
self._inner.state_counter_delete(self._p(shard_key, scope_id), ns, key)
|
|
1634
|
+
|
|
1635
|
+
def close(self) -> None:
|
|
1636
|
+
self._inner.close()
|