vgi-python 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vgi/__init__.py +152 -0
- vgi/_duckdb.py +62 -0
- vgi/_storage_profile.py +132 -0
- vgi/_test_fixtures/__init__.py +20 -0
- vgi/_test_fixtures/accumulate/__init__.py +19 -0
- vgi/_test_fixtures/accumulate/worker.py +762 -0
- vgi/_test_fixtures/aggregate/__init__.py +62 -0
- vgi/_test_fixtures/aggregate/_common.py +21 -0
- vgi/_test_fixtures/aggregate/basic.py +232 -0
- vgi/_test_fixtures/aggregate/dynamic.py +409 -0
- vgi/_test_fixtures/aggregate/generic.py +86 -0
- vgi/_test_fixtures/aggregate/listagg.py +71 -0
- vgi/_test_fixtures/aggregate/percentile.py +107 -0
- vgi/_test_fixtures/aggregate/streaming.py +192 -0
- vgi/_test_fixtures/aggregate/varargs.py +75 -0
- vgi/_test_fixtures/aggregate/window.py +380 -0
- vgi/_test_fixtures/attach_options.py +308 -0
- vgi/_test_fixtures/bad_protocol.py +62 -0
- vgi/_test_fixtures/cancellable.py +336 -0
- vgi/_test_fixtures/catalog.py +813 -0
- vgi/_test_fixtures/http_server.py +394 -0
- vgi/_test_fixtures/nest_tensor.py +614 -0
- vgi/_test_fixtures/orchard_catalog.py +47 -0
- vgi/_test_fixtures/projection_repro/__init__.py +6 -0
- vgi/_test_fixtures/projection_repro/worker.py +454 -0
- vgi/_test_fixtures/scalar/__init__.py +116 -0
- vgi/_test_fixtures/scalar/_common.py +69 -0
- vgi/_test_fixtures/scalar/arithmetic.py +321 -0
- vgi/_test_fixtures/scalar/binary.py +120 -0
- vgi/_test_fixtures/scalar/formatting.py +176 -0
- vgi/_test_fixtures/scalar/geo.py +300 -0
- vgi/_test_fixtures/scalar/null_handling.py +107 -0
- vgi/_test_fixtures/scalar/random_demo.py +171 -0
- vgi/_test_fixtures/scalar/settings_secrets.py +102 -0
- vgi/_test_fixtures/scalar/type_info.py +219 -0
- vgi/_test_fixtures/schema_reconcile/__init__.py +29 -0
- vgi/_test_fixtures/schema_reconcile/worker.py +653 -0
- vgi/_test_fixtures/simple_writable.py +793 -0
- vgi/_test_fixtures/table/__init__.py +221 -0
- vgi/_test_fixtures/table/_common.py +162 -0
- vgi/_test_fixtures/table/batch_index.py +283 -0
- vgi/_test_fixtures/table/batch_index_broken.py +200 -0
- vgi/_test_fixtures/table/catalog_scans.py +162 -0
- vgi/_test_fixtures/table/filters.py +1005 -0
- vgi/_test_fixtures/table/late_materialization.py +249 -0
- vgi/_test_fixtures/table/make_series.py +273 -0
- vgi/_test_fixtures/table/misc.py +499 -0
- vgi/_test_fixtures/table/order_modes.py +164 -0
- vgi/_test_fixtures/table/pairs.py +437 -0
- vgi/_test_fixtures/table/partition_columns.py +472 -0
- vgi/_test_fixtures/table/partition_columns_broken.py +304 -0
- vgi/_test_fixtures/table/profiling_example.py +195 -0
- vgi/_test_fixtures/table/required_filters.py +234 -0
- vgi/_test_fixtures/table/sequence.py +710 -0
- vgi/_test_fixtures/table/settings.py +426 -0
- vgi/_test_fixtures/table/transaction_storage.py +162 -0
- vgi/_test_fixtures/table/tt_pushdown.py +191 -0
- vgi/_test_fixtures/table/versioned.py +230 -0
- vgi/_test_fixtures/table_in_out.py +1392 -0
- vgi/_test_fixtures/versioned.py +155 -0
- vgi/_test_fixtures/versioned_tables.py +595 -0
- vgi/_test_fixtures/worker.py +1631 -0
- vgi/_test_fixtures/writable/__init__.py +8 -0
- vgi/_test_fixtures/writable/generic.py +236 -0
- vgi/_test_fixtures/writable/table.py +149 -0
- vgi/_test_fixtures/writable/worker.py +1148 -0
- vgi/aggregate_function.py +607 -0
- vgi/argument_spec.py +472 -0
- vgi/arguments.py +1747 -0
- vgi/auth.py +55 -0
- vgi/catalog/__init__.py +88 -0
- vgi/catalog/attach_option.py +206 -0
- vgi/catalog/catalog_interface.py +2767 -0
- vgi/catalog/descriptors.py +870 -0
- vgi/catalog/duckdb_statistics.py +377 -0
- vgi/catalog/secret_type.py +96 -0
- vgi/catalog/setting.py +253 -0
- vgi/catalog/storage.py +372 -0
- vgi/client/__init__.py +67 -0
- vgi/client/catalog_mixin.py +1251 -0
- vgi/client/cli.py +582 -0
- vgi/client/cli_catalog.py +182 -0
- vgi/client/cli_schema.py +270 -0
- vgi/client/cli_table.py +907 -0
- vgi/client/cli_transaction.py +97 -0
- vgi/client/cli_utils.py +441 -0
- vgi/client/cli_view.py +303 -0
- vgi/client/client.py +2183 -0
- vgi/exceptions.py +205 -0
- vgi/function.py +245 -0
- vgi/function_storage.py +1636 -0
- vgi/function_storage_azure_sql.py +922 -0
- vgi/function_storage_cf_do.py +740 -0
- vgi/http/__init__.py +25 -0
- vgi/http/demo_storage.py +212 -0
- vgi/http/worker_page.py +1252 -0
- vgi/invocation.py +154 -0
- vgi/logging_config.py +93 -0
- vgi/meta_worker.py +661 -0
- vgi/metadata.py +1403 -0
- vgi/otel.py +406 -0
- vgi/protocol.py +2418 -0
- vgi/protocol_version.txt +1 -0
- vgi/py.typed +0 -0
- vgi/scalar_function.py +1211 -0
- vgi/schema_utils.py +234 -0
- vgi/secret_protocol.py +124 -0
- vgi/secret_service.py +238 -0
- vgi/serve.py +769 -0
- vgi/table_buffering_function.py +443 -0
- vgi/table_filter_pushdown.py +1528 -0
- vgi/table_function.py +1130 -0
- vgi/table_in_out_function.py +383 -0
- vgi/transactor/__init__.py +24 -0
- vgi/transactor/_duckdb_compat.py +27 -0
- vgi/transactor/client.py +137 -0
- vgi/transactor/protocol.py +149 -0
- vgi/transactor/server.py +740 -0
- vgi/worker.py +4761 -0
- vgi_python-0.8.0.dist-info/METADATA +735 -0
- vgi_python-0.8.0.dist-info/RECORD +124 -0
- vgi_python-0.8.0.dist-info/WHEEL +4 -0
- vgi_python-0.8.0.dist-info/entry_points.txt +5 -0
- vgi_python-0.8.0.dist-info/licenses/LICENSE +134 -0
|
@@ -0,0 +1,922 @@
|
|
|
1
|
+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
|
|
2
|
+
|
|
3
|
+
"""Azure SQL Database storage for VGI function state.
|
|
4
|
+
|
|
5
|
+
This module provides a FunctionStorage implementation backed by Azure SQL
|
|
6
|
+
Database (Serverless). It is a near-direct port of FunctionStorageSqlite
|
|
7
|
+
to T-SQL via pymssql.
|
|
8
|
+
|
|
9
|
+
Implementation:
|
|
10
|
+
FunctionStorageAzureSql: Azure SQL-backed storage implementation.
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
Set ``VGI_WORKER_SHARED_STORAGE=azure-sql`` plus ``VGI_AZURE_SQL_SERVER``
|
|
14
|
+
and ``VGI_AZURE_SQL_DATABASE`` environment variables to enable. Provide
|
|
15
|
+
``VGI_AZURE_SQL_USER`` / ``VGI_AZURE_SQL_PASSWORD`` for SQL auth, or
|
|
16
|
+
omit them to use ``DefaultAzureCredential`` (managed identity).
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import contextlib
|
|
21
|
+
import logging
|
|
22
|
+
import os
|
|
23
|
+
import struct
|
|
24
|
+
import time
|
|
25
|
+
from collections.abc import Callable
|
|
26
|
+
from typing import Any, cast
|
|
27
|
+
|
|
28
|
+
import pymssql
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"FunctionStorageAzureSql",
|
|
32
|
+
"MissingTablesError",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
# SQL Server error codes
|
|
36
|
+
_ERR_INVALID_OBJECT_NAME = 208
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MissingTablesError(Exception):
|
|
40
|
+
"""Raised when storage tables don't exist in the database."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_logger = logging.getLogger("vgi.storage.azure_sql")
|
|
44
|
+
|
|
45
|
+
# If VGI_AZURE_SQL_DEBUG_LOG is set, write debug logs to that file
|
|
46
|
+
# regardless of the root logger configuration.
|
|
47
|
+
_debug_log_path = os.environ.get("VGI_AZURE_SQL_DEBUG_LOG")
|
|
48
|
+
if _debug_log_path:
|
|
49
|
+
_fh = logging.FileHandler(_debug_log_path)
|
|
50
|
+
_fh.setLevel(logging.DEBUG)
|
|
51
|
+
_fh.setFormatter(logging.Formatter("%(asctime)s %(process)d %(message)s"))
|
|
52
|
+
_logger.addHandler(_fh)
|
|
53
|
+
_logger.setLevel(logging.DEBUG)
|
|
54
|
+
|
|
55
|
+
# Azure AD resource for SQL Database token auth
|
|
56
|
+
_SQL_AZURE_RESOURCE = "https://database.windows.net/"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class FunctionStorageAzureSql:
|
|
60
|
+
"""Azure SQL Database-backed storage for VGI function state.
|
|
61
|
+
|
|
62
|
+
This implementation uses Azure SQL Database with the same table schema
|
|
63
|
+
as FunctionStorageSqlite. It manages three tables:
|
|
64
|
+
|
|
65
|
+
- worker_state: Per-worker partial state keyed by (execution_id, process_id)
|
|
66
|
+
- work_queue: FIFO queue of work items per invocation
|
|
67
|
+
- invocation_registry: Tracks valid invocation IDs for queue operations
|
|
68
|
+
|
|
69
|
+
Connection modes:
|
|
70
|
+
- SQL auth: provide ``user`` and ``password``
|
|
71
|
+
- Managed identity: omit ``user``/``password``, optionally pass a
|
|
72
|
+
``credential`` (falls back to ``DefaultAzureCredential``)
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
*,
|
|
79
|
+
server: str,
|
|
80
|
+
database: str,
|
|
81
|
+
user: str | None = None,
|
|
82
|
+
password: str | None = None,
|
|
83
|
+
credential: object | None = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Initialize Azure SQL storage.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
server: Azure SQL server hostname.
|
|
89
|
+
database: Database name.
|
|
90
|
+
user: SQL auth username. If None, token-based auth is used.
|
|
91
|
+
password: SQL auth password.
|
|
92
|
+
credential: Optional TokenCredential for Azure AD auth.
|
|
93
|
+
Falls back to DefaultAzureCredential if omitted.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
self._server = server
|
|
97
|
+
self._database = database
|
|
98
|
+
self._user = user
|
|
99
|
+
self._password = password
|
|
100
|
+
self._credential = credential
|
|
101
|
+
self._conn: pymssql.Connection | None = None
|
|
102
|
+
|
|
103
|
+
def _new_connection(self) -> pymssql.Connection:
|
|
104
|
+
"""Create a new database connection."""
|
|
105
|
+
t0 = time.monotonic()
|
|
106
|
+
if self._user is not None and self._password is not None:
|
|
107
|
+
conn = pymssql.connect(
|
|
108
|
+
server=self._server,
|
|
109
|
+
user=self._user,
|
|
110
|
+
password=self._password,
|
|
111
|
+
database=self._database,
|
|
112
|
+
login_timeout=30,
|
|
113
|
+
as_dict=False,
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
# Token-based auth (managed identity / DefaultAzureCredential)
|
|
117
|
+
token = self._get_access_token()
|
|
118
|
+
token_bytes = _encode_access_token(token)
|
|
119
|
+
conn = pymssql.connect( # type: ignore[call-overload]
|
|
120
|
+
server=self._server,
|
|
121
|
+
password=token_bytes,
|
|
122
|
+
database=self._database,
|
|
123
|
+
login_timeout=30,
|
|
124
|
+
as_dict=False,
|
|
125
|
+
)
|
|
126
|
+
elapsed_ms = (time.monotonic() - t0) * 1000
|
|
127
|
+
_logger.debug("connect server=%s elapsed_ms=%.1f", self._server, elapsed_ms)
|
|
128
|
+
return conn
|
|
129
|
+
|
|
130
|
+
def _connect(self) -> pymssql.Connection:
|
|
131
|
+
"""Return a persistent connection, creating one if needed.
|
|
132
|
+
|
|
133
|
+
Callers that catch exceptions should call ``_reconnect()``
|
|
134
|
+
before retrying so the dead connection is replaced.
|
|
135
|
+
"""
|
|
136
|
+
if self._conn is None:
|
|
137
|
+
self._conn = self._new_connection()
|
|
138
|
+
return self._conn
|
|
139
|
+
|
|
140
|
+
def _reconnect(self) -> None:
|
|
141
|
+
"""Drop the current connection so the next ``_connect()`` creates a fresh one."""
|
|
142
|
+
if self._conn is not None:
|
|
143
|
+
with contextlib.suppress(Exception):
|
|
144
|
+
self._conn.close()
|
|
145
|
+
self._conn = None
|
|
146
|
+
|
|
147
|
+
def _execute_with_retry[T](self, fn: "Callable[[pymssql.Connection], T]") -> T:
|
|
148
|
+
"""Execute a function with the persistent connection, retrying once on failure.
|
|
149
|
+
|
|
150
|
+
On the first failure, the connection is dropped and a fresh one
|
|
151
|
+
is created for the retry. ``Invalid object name`` errors are
|
|
152
|
+
translated to :class:`MissingTablesError` with a helpful message.
|
|
153
|
+
"""
|
|
154
|
+
for attempt in range(2):
|
|
155
|
+
try:
|
|
156
|
+
return fn(self._connect())
|
|
157
|
+
except pymssql.OperationalError as exc:
|
|
158
|
+
self._check_missing_tables(exc)
|
|
159
|
+
if attempt == 0:
|
|
160
|
+
_logger.debug("retry after OperationalError: %s", exc)
|
|
161
|
+
self._reconnect()
|
|
162
|
+
else:
|
|
163
|
+
raise
|
|
164
|
+
except pymssql.InterfaceError as exc:
|
|
165
|
+
if attempt == 0:
|
|
166
|
+
_logger.debug("retry after InterfaceError: %s", exc)
|
|
167
|
+
self._reconnect()
|
|
168
|
+
else:
|
|
169
|
+
raise
|
|
170
|
+
raise RuntimeError("unreachable") # pragma: no cover
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _check_missing_tables(exc: Exception) -> None:
|
|
174
|
+
"""Raise MissingTablesError if the exception indicates missing tables."""
|
|
175
|
+
if hasattr(exc, "args") and exc.args and exc.args[0] == _ERR_INVALID_OBJECT_NAME:
|
|
176
|
+
raise MissingTablesError(
|
|
177
|
+
"Storage tables do not exist in the database. "
|
|
178
|
+
"Run FunctionStorageAzureSql.ensure_tables() during deployment "
|
|
179
|
+
"to create them."
|
|
180
|
+
) from exc
|
|
181
|
+
|
|
182
|
+
def _get_access_token(self) -> str:
|
|
183
|
+
"""Acquire an Azure AD access token for SQL Database."""
|
|
184
|
+
if self._credential is None:
|
|
185
|
+
from azure.identity import DefaultAzureCredential
|
|
186
|
+
|
|
187
|
+
self._credential = DefaultAzureCredential()
|
|
188
|
+
token = self._credential.get_token(_SQL_AZURE_RESOURCE) # type: ignore[attr-defined]
|
|
189
|
+
return str(token.token)
|
|
190
|
+
|
|
191
|
+
def ensure_tables(self) -> None:
|
|
192
|
+
"""Create all storage tables if they don't exist.
|
|
193
|
+
|
|
194
|
+
Call this once during deployment or migration — not on every worker
|
|
195
|
+
start. All DDL is sent as a single batch to minimize round-trips.
|
|
196
|
+
"""
|
|
197
|
+
t0 = time.monotonic()
|
|
198
|
+
conn = self._connect()
|
|
199
|
+
cursor = conn.cursor()
|
|
200
|
+
cursor.execute("""
|
|
201
|
+
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'work_queue')
|
|
202
|
+
CREATE TABLE work_queue (
|
|
203
|
+
id BIGINT IDENTITY(1,1) PRIMARY KEY,
|
|
204
|
+
execution_id VARBINARY(16) NOT NULL,
|
|
205
|
+
work_item VARBINARY(MAX) NOT NULL,
|
|
206
|
+
created_at DATETIME2 DEFAULT GETUTCDATE()
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'idx_work_queue_execution')
|
|
210
|
+
CREATE INDEX idx_work_queue_execution ON work_queue(execution_id);
|
|
211
|
+
|
|
212
|
+
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'invocation_registry')
|
|
213
|
+
CREATE TABLE invocation_registry (
|
|
214
|
+
execution_id VARBINARY(16) PRIMARY KEY,
|
|
215
|
+
created_at DATETIME2 DEFAULT GETUTCDATE()
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
-- Unified state_* tables. scope_id holds either execution_id
|
|
219
|
+
-- or transaction_opaque_data; ns is a caller-chosen namespace
|
|
220
|
+
-- (b"agg", b"win", b"buf", b"txn", etc.). last_attempt_id +
|
|
221
|
+
-- drained_at/drained_by_attempt power internal replay-detection
|
|
222
|
+
-- (silent no-op for state_put_many retries; read-back for
|
|
223
|
+
-- state_drain retries). VARBINARY(255) because scope_id /
|
|
224
|
+
-- ns / key shapes vary across callers (16-byte UUIDs for
|
|
225
|
+
-- execution_id, ASCII for transaction-state keys).
|
|
226
|
+
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'function_state')
|
|
227
|
+
CREATE TABLE function_state (
|
|
228
|
+
scope_id VARBINARY(255) NOT NULL,
|
|
229
|
+
ns VARBINARY(255) NOT NULL,
|
|
230
|
+
[key] VARBINARY(255) NOT NULL,
|
|
231
|
+
value VARBINARY(MAX) NOT NULL,
|
|
232
|
+
last_attempt_id VARBINARY(16) NOT NULL,
|
|
233
|
+
drained_at DATETIME2 DEFAULT NULL,
|
|
234
|
+
drained_by_attempt VARBINARY(16) DEFAULT NULL,
|
|
235
|
+
created_at DATETIME2 DEFAULT GETUTCDATE(),
|
|
236
|
+
PRIMARY KEY (scope_id, ns, [key])
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
-- function_state_log: append-only log keyed by (scope, ns, key).
|
|
240
|
+
-- IDENTITY column gives a global monotonic ordinal per row;
|
|
241
|
+
-- (scope, ns, key, attempt_id) is unique so a retried
|
|
242
|
+
-- state_append maps back to its original ordinal.
|
|
243
|
+
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'function_state_log')
|
|
244
|
+
CREATE TABLE function_state_log (
|
|
245
|
+
id BIGINT IDENTITY(1,1) PRIMARY KEY,
|
|
246
|
+
scope_id VARBINARY(255) NOT NULL,
|
|
247
|
+
ns VARBINARY(255) NOT NULL,
|
|
248
|
+
[key] VARBINARY(255) NOT NULL,
|
|
249
|
+
value VARBINARY(MAX) NOT NULL,
|
|
250
|
+
attempt_id VARBINARY(16) NOT NULL,
|
|
251
|
+
created_at DATETIME2 DEFAULT GETUTCDATE()
|
|
252
|
+
);
|
|
253
|
+
|
|
254
|
+
IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'idx_function_state_log_lookup')
|
|
255
|
+
CREATE INDEX idx_function_state_log_lookup
|
|
256
|
+
ON function_state_log(scope_id, ns, [key], id);
|
|
257
|
+
|
|
258
|
+
IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'idx_function_state_log_replay')
|
|
259
|
+
CREATE UNIQUE INDEX idx_function_state_log_replay
|
|
260
|
+
ON function_state_log(scope_id, ns, [key], attempt_id);
|
|
261
|
+
|
|
262
|
+
-- function_counter: atomic int64 counters, a typed numeric facet
|
|
263
|
+
-- kept apart from the opaque function_state K/V (VARBINARY(MAX)
|
|
264
|
+
-- can't do arithmetic). last_attempt_id powers replay-detection so
|
|
265
|
+
-- a connection-level retry of state_counter_add doesn't double-add.
|
|
266
|
+
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'function_counter')
|
|
267
|
+
CREATE TABLE function_counter (
|
|
268
|
+
scope_id VARBINARY(255) NOT NULL,
|
|
269
|
+
ns VARBINARY(255) NOT NULL,
|
|
270
|
+
[key] VARBINARY(255) NOT NULL,
|
|
271
|
+
n BIGINT NOT NULL,
|
|
272
|
+
last_attempt_id VARBINARY(16) DEFAULT NULL,
|
|
273
|
+
created_at DATETIME2 DEFAULT GETUTCDATE(),
|
|
274
|
+
PRIMARY KEY (scope_id, ns, [key])
|
|
275
|
+
);
|
|
276
|
+
""")
|
|
277
|
+
conn.commit()
|
|
278
|
+
elapsed_ms = (time.monotonic() - t0) * 1000
|
|
279
|
+
_logger.debug("ensure_tables elapsed_ms=%.1f", elapsed_ms)
|
|
280
|
+
|
|
281
|
+
# --- Work Queue ---
|
|
282
|
+
|
|
283
|
+
def queue_push(self, execution_id: bytes, items: list[bytes], *, shard_key: str = "") -> int:
|
|
284
|
+
"""Add work items to the queue and register the invocation."""
|
|
285
|
+
|
|
286
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
287
|
+
t0 = time.monotonic()
|
|
288
|
+
cursor = conn.cursor()
|
|
289
|
+
cursor.execute(
|
|
290
|
+
"""
|
|
291
|
+
MERGE invocation_registry AS t
|
|
292
|
+
USING (VALUES (CAST(%s AS VARBINARY(16)))) AS s(execution_id)
|
|
293
|
+
ON t.execution_id = s.execution_id
|
|
294
|
+
WHEN NOT MATCHED THEN
|
|
295
|
+
INSERT (execution_id) VALUES (s.execution_id);
|
|
296
|
+
""",
|
|
297
|
+
(execution_id,),
|
|
298
|
+
)
|
|
299
|
+
if items:
|
|
300
|
+
cursor.executemany(
|
|
301
|
+
"""
|
|
302
|
+
INSERT INTO work_queue (execution_id, work_item)
|
|
303
|
+
VALUES (CAST(%s AS VARBINARY(16)), CAST(%s AS VARBINARY(MAX)))
|
|
304
|
+
""",
|
|
305
|
+
[(execution_id, item) for item in items],
|
|
306
|
+
)
|
|
307
|
+
conn.commit()
|
|
308
|
+
_logger.debug(
|
|
309
|
+
"queue_push eid=%s items=%d elapsed_ms=%.1f",
|
|
310
|
+
execution_id.hex()[:8],
|
|
311
|
+
len(items),
|
|
312
|
+
(time.monotonic() - t0) * 1000,
|
|
313
|
+
)
|
|
314
|
+
return len(items)
|
|
315
|
+
|
|
316
|
+
return self._execute_with_retry(_do)
|
|
317
|
+
|
|
318
|
+
def queue_pop(self, execution_id: bytes, *, shard_key: str = "") -> bytes | None:
|
|
319
|
+
"""Atomically claim one work item from the queue.
|
|
320
|
+
|
|
321
|
+
Returns None when the queue is empty *or* the execution_id was
|
|
322
|
+
never pushed — see the base-class docstring.
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
def _do(conn: pymssql.Connection) -> bytes | None:
|
|
326
|
+
t0 = time.monotonic()
|
|
327
|
+
cursor = conn.cursor()
|
|
328
|
+
# Atomic claim of the oldest work_queue row for this eid.
|
|
329
|
+
# OUTPUT deleted.work_item returns the claimed item (or no row
|
|
330
|
+
# when the queue is empty / unregistered — both surface as
|
|
331
|
+
# None to the caller).
|
|
332
|
+
cursor.execute(
|
|
333
|
+
"""
|
|
334
|
+
DECLARE @eid VARBINARY(16) = CAST(%s AS VARBINARY(16));
|
|
335
|
+
;WITH cte AS (
|
|
336
|
+
SELECT TOP (1) *
|
|
337
|
+
FROM work_queue WITH (ROWLOCK, UPDLOCK, READPAST)
|
|
338
|
+
WHERE execution_id = @eid
|
|
339
|
+
ORDER BY id ASC
|
|
340
|
+
)
|
|
341
|
+
DELETE FROM cte
|
|
342
|
+
OUTPUT deleted.work_item;
|
|
343
|
+
""",
|
|
344
|
+
(execution_id,),
|
|
345
|
+
)
|
|
346
|
+
row = cursor.fetchone()
|
|
347
|
+
conn.commit()
|
|
348
|
+
elapsed_ms = (time.monotonic() - t0) * 1000
|
|
349
|
+
got_item = row is not None and row[0] is not None
|
|
350
|
+
_logger.debug(
|
|
351
|
+
"queue_pop eid=%s result=%s elapsed_ms=%.1f",
|
|
352
|
+
execution_id.hex()[:8],
|
|
353
|
+
"item" if got_item else "empty",
|
|
354
|
+
elapsed_ms,
|
|
355
|
+
)
|
|
356
|
+
if not got_item:
|
|
357
|
+
return None
|
|
358
|
+
result: bytes = row[0] # type: ignore[index, assignment]
|
|
359
|
+
return result
|
|
360
|
+
|
|
361
|
+
return self._execute_with_retry(_do)
|
|
362
|
+
|
|
363
|
+
def queue_clear(self, execution_id: bytes, *, shard_key: str = "") -> int:
|
|
364
|
+
"""Clear all remaining work items and unregister the invocation."""
|
|
365
|
+
|
|
366
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
367
|
+
t0 = time.monotonic()
|
|
368
|
+
cursor = conn.cursor()
|
|
369
|
+
cursor.execute(
|
|
370
|
+
"DELETE FROM work_queue WHERE execution_id = CAST(%s AS VARBINARY(16))",
|
|
371
|
+
(execution_id,),
|
|
372
|
+
)
|
|
373
|
+
cleared = cursor.rowcount
|
|
374
|
+
cursor.execute(
|
|
375
|
+
"DELETE FROM invocation_registry WHERE execution_id = CAST(%s AS VARBINARY(16))",
|
|
376
|
+
(execution_id,),
|
|
377
|
+
)
|
|
378
|
+
conn.commit()
|
|
379
|
+
_logger.debug(
|
|
380
|
+
"queue_clear eid=%s cleared=%d elapsed_ms=%.1f",
|
|
381
|
+
execution_id.hex()[:8],
|
|
382
|
+
cleared,
|
|
383
|
+
(time.monotonic() - t0) * 1000,
|
|
384
|
+
)
|
|
385
|
+
return cleared
|
|
386
|
+
|
|
387
|
+
return self._execute_with_retry(_do)
|
|
388
|
+
|
|
389
|
+
# --- Maintenance ---
|
|
390
|
+
|
|
391
|
+
def cleanup_old_entries(self, max_age_days: float = 1.0) -> int:
|
|
392
|
+
"""Remove entries older than the specified age from all tables."""
|
|
393
|
+
|
|
394
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
395
|
+
t0 = time.monotonic()
|
|
396
|
+
max_age_seconds = int(max_age_days * 86400)
|
|
397
|
+
cursor = conn.cursor()
|
|
398
|
+
total = 0
|
|
399
|
+
for table in (
|
|
400
|
+
"work_queue",
|
|
401
|
+
"invocation_registry",
|
|
402
|
+
"function_state",
|
|
403
|
+
"function_state_log",
|
|
404
|
+
"function_counter",
|
|
405
|
+
):
|
|
406
|
+
cursor.execute(
|
|
407
|
+
f"DELETE FROM {table} WHERE DATEDIFF(SECOND, created_at, GETUTCDATE()) > %s", # noqa: S608
|
|
408
|
+
(max_age_seconds,),
|
|
409
|
+
)
|
|
410
|
+
total += cursor.rowcount
|
|
411
|
+
conn.commit()
|
|
412
|
+
_logger.debug(
|
|
413
|
+
"cleanup_old_entries max_age_days=%.1f deleted=%d elapsed_ms=%.1f",
|
|
414
|
+
max_age_days,
|
|
415
|
+
total,
|
|
416
|
+
(time.monotonic() - t0) * 1000,
|
|
417
|
+
)
|
|
418
|
+
return total
|
|
419
|
+
|
|
420
|
+
return self._execute_with_retry(_do)
|
|
421
|
+
|
|
422
|
+
# ========================================================================
|
|
423
|
+
# Unified state_* implementation
|
|
424
|
+
# ========================================================================
|
|
425
|
+
#
|
|
426
|
+
# Mirrors the SQLite backend's contract. Every mutating call generates
|
|
427
|
+
# an internal attempt_id (UUID4 bytes); replay-detection in
|
|
428
|
+
# state_put_many checks whether the FIRST item's last_attempt_id
|
|
429
|
+
# matches; state_drain checks drained_by_attempt and returns prior
|
|
430
|
+
# tombstoned values on retry. T-SQL MERGE for upsert; IDENTITY column
|
|
431
|
+
# on function_state_log gives global ordinals.
|
|
432
|
+
|
|
433
|
+
def state_get_many(
|
|
434
|
+
self,
|
|
435
|
+
scope_id: bytes,
|
|
436
|
+
ns: bytes,
|
|
437
|
+
keys: list[bytes],
|
|
438
|
+
*,
|
|
439
|
+
shard_key: str = "",
|
|
440
|
+
) -> list[bytes | None]:
|
|
441
|
+
"""Batched read by key list. Returns parallel list with None for misses."""
|
|
442
|
+
del shard_key
|
|
443
|
+
if not keys:
|
|
444
|
+
return []
|
|
445
|
+
|
|
446
|
+
def _do(conn: pymssql.Connection) -> list[bytes | None]:
|
|
447
|
+
cursor = conn.cursor()
|
|
448
|
+
placeholders = ",".join("%s" for _ in keys)
|
|
449
|
+
cursor.execute(
|
|
450
|
+
f"""
|
|
451
|
+
SELECT [key], value FROM function_state
|
|
452
|
+
WHERE scope_id = %s AND ns = %s AND [key] IN ({placeholders})
|
|
453
|
+
AND drained_at IS NULL
|
|
454
|
+
""", # noqa: S608
|
|
455
|
+
(scope_id, ns, *keys),
|
|
456
|
+
)
|
|
457
|
+
# pymssql cursor values are typed as a broad Any-equivalent
|
|
458
|
+
# union; the columns we SELECT are VARBINARY (function_state.key,
|
|
459
|
+
# function_state.value) so the runtime value is always bytes.
|
|
460
|
+
found: dict[bytes, bytes] = {bytes(cast(Any, k)): bytes(cast(Any, v)) for k, v in cursor.fetchall()}
|
|
461
|
+
return [found.get(bytes(k)) for k in keys]
|
|
462
|
+
|
|
463
|
+
return self._execute_with_retry(_do)
|
|
464
|
+
|
|
465
|
+
def state_put_many(
|
|
466
|
+
self,
|
|
467
|
+
scope_id: bytes,
|
|
468
|
+
ns: bytes,
|
|
469
|
+
items: list[tuple[bytes, bytes]],
|
|
470
|
+
*,
|
|
471
|
+
shard_key: str = "",
|
|
472
|
+
) -> None:
|
|
473
|
+
"""Atomic batched upsert. First-key replay-detection on attempt_id."""
|
|
474
|
+
del shard_key
|
|
475
|
+
if not items:
|
|
476
|
+
return
|
|
477
|
+
import uuid
|
|
478
|
+
|
|
479
|
+
attempt_id = uuid.uuid4().bytes
|
|
480
|
+
|
|
481
|
+
def _do(conn: pymssql.Connection) -> None:
|
|
482
|
+
cursor = conn.cursor()
|
|
483
|
+
# Replay-detection: did the first item already land with our
|
|
484
|
+
# attempt_id? Mirrors the CfDo aggregate_state_put first-item
|
|
485
|
+
# check (`index.ts:618`); first key is sufficient because the
|
|
486
|
+
# batch is atomic per-call.
|
|
487
|
+
first_key, _ = items[0]
|
|
488
|
+
cursor.execute(
|
|
489
|
+
"""
|
|
490
|
+
SELECT 1 FROM function_state
|
|
491
|
+
WHERE scope_id = %s AND ns = %s AND [key] = %s AND last_attempt_id = %s
|
|
492
|
+
""",
|
|
493
|
+
(scope_id, ns, first_key, attempt_id),
|
|
494
|
+
)
|
|
495
|
+
if cursor.fetchone() is not None:
|
|
496
|
+
return # Replay — silent no-op.
|
|
497
|
+
for k, v in items:
|
|
498
|
+
cursor.execute(
|
|
499
|
+
"""
|
|
500
|
+
MERGE function_state AS t
|
|
501
|
+
USING (VALUES (CAST(%s AS VARBINARY(255)),
|
|
502
|
+
CAST(%s AS VARBINARY(255)),
|
|
503
|
+
CAST(%s AS VARBINARY(255)),
|
|
504
|
+
CAST(%s AS VARBINARY(MAX)),
|
|
505
|
+
CAST(%s AS VARBINARY(16))))
|
|
506
|
+
AS s(scope_id, ns, [key], value, last_attempt_id)
|
|
507
|
+
ON t.scope_id = s.scope_id AND t.ns = s.ns AND t.[key] = s.[key]
|
|
508
|
+
WHEN MATCHED THEN
|
|
509
|
+
UPDATE SET value = s.value,
|
|
510
|
+
last_attempt_id = s.last_attempt_id,
|
|
511
|
+
created_at = GETUTCDATE(),
|
|
512
|
+
drained_at = NULL,
|
|
513
|
+
drained_by_attempt = NULL
|
|
514
|
+
WHEN NOT MATCHED THEN
|
|
515
|
+
INSERT (scope_id, ns, [key], value, last_attempt_id)
|
|
516
|
+
VALUES (s.scope_id, s.ns, s.[key], s.value, s.last_attempt_id);
|
|
517
|
+
""",
|
|
518
|
+
(scope_id, ns, k, v, attempt_id),
|
|
519
|
+
)
|
|
520
|
+
conn.commit()
|
|
521
|
+
|
|
522
|
+
self._execute_with_retry(_do)
|
|
523
|
+
|
|
524
|
+
def state_scan(
|
|
525
|
+
self,
|
|
526
|
+
scope_id: bytes,
|
|
527
|
+
ns: bytes,
|
|
528
|
+
*,
|
|
529
|
+
start: bytes | None = None,
|
|
530
|
+
end: bytes | None = None,
|
|
531
|
+
reverse: bool = False,
|
|
532
|
+
limit: int | None = None,
|
|
533
|
+
shard_key: str = "",
|
|
534
|
+
) -> list[tuple[bytes, bytes]]:
|
|
535
|
+
"""Non-destructive scan of live (key, value) in a namespace.
|
|
536
|
+
|
|
537
|
+
Ordered by key bytes (VARBINARY compares bytewise), descending when
|
|
538
|
+
``reverse``, bounded to ``[start, end)`` and capped at ``limit``.
|
|
539
|
+
"""
|
|
540
|
+
del shard_key
|
|
541
|
+
|
|
542
|
+
def _do(conn: pymssql.Connection) -> list[tuple[bytes, bytes]]:
|
|
543
|
+
cursor = conn.cursor()
|
|
544
|
+
params: list[Any] = [scope_id, ns]
|
|
545
|
+
clauses = ""
|
|
546
|
+
if start is not None:
|
|
547
|
+
clauses += " AND [key] >= %s"
|
|
548
|
+
params.append(start)
|
|
549
|
+
if end is not None:
|
|
550
|
+
clauses += " AND [key] < %s"
|
|
551
|
+
params.append(end)
|
|
552
|
+
order = "DESC" if reverse else "ASC"
|
|
553
|
+
fetch = ""
|
|
554
|
+
if limit is not None:
|
|
555
|
+
fetch = "OFFSET 0 ROWS FETCH NEXT %s ROWS ONLY"
|
|
556
|
+
params.append(int(limit))
|
|
557
|
+
cursor.execute(
|
|
558
|
+
f"""
|
|
559
|
+
SELECT [key], value FROM function_state
|
|
560
|
+
WHERE scope_id = %s AND ns = %s AND drained_at IS NULL{clauses}
|
|
561
|
+
ORDER BY [key] {order}
|
|
562
|
+
{fetch}
|
|
563
|
+
""", # noqa: S608 — order is a fixed ASC/DESC literal; values are bound params
|
|
564
|
+
tuple(params),
|
|
565
|
+
)
|
|
566
|
+
return [(bytes(cast(Any, k)), bytes(cast(Any, v))) for k, v in cursor.fetchall()]
|
|
567
|
+
|
|
568
|
+
return self._execute_with_retry(_do)
|
|
569
|
+
|
|
570
|
+
def state_drain(
|
|
571
|
+
self,
|
|
572
|
+
scope_id: bytes,
|
|
573
|
+
ns: bytes,
|
|
574
|
+
*,
|
|
575
|
+
shard_key: str = "",
|
|
576
|
+
) -> list[tuple[bytes, bytes]]:
|
|
577
|
+
"""Destructive scan-and-tombstone. Replay returns prior tombstoned values."""
|
|
578
|
+
del shard_key
|
|
579
|
+
import uuid
|
|
580
|
+
|
|
581
|
+
attempt_id = uuid.uuid4().bytes
|
|
582
|
+
|
|
583
|
+
def _do(conn: pymssql.Connection) -> list[tuple[bytes, bytes]]:
|
|
584
|
+
cursor = conn.cursor()
|
|
585
|
+
# Read-back replay: any rows already tombstoned with our
|
|
586
|
+
# attempt_id? Return them.
|
|
587
|
+
cursor.execute(
|
|
588
|
+
"""
|
|
589
|
+
SELECT [key], value FROM function_state
|
|
590
|
+
WHERE scope_id = %s AND ns = %s AND drained_by_attempt = %s
|
|
591
|
+
ORDER BY [key]
|
|
592
|
+
""",
|
|
593
|
+
(scope_id, ns, attempt_id),
|
|
594
|
+
)
|
|
595
|
+
replay = cursor.fetchall()
|
|
596
|
+
if replay:
|
|
597
|
+
return [(bytes(cast(Any, k)), bytes(cast(Any, v))) for k, v in replay]
|
|
598
|
+
# Fresh drain: tombstone live rows for this attempt_id, then
|
|
599
|
+
# read them back. T-SQL doesn't support UPDATE..RETURNING the
|
|
600
|
+
# same way SQLite does; use OUTPUT clause.
|
|
601
|
+
cursor.execute(
|
|
602
|
+
"""
|
|
603
|
+
UPDATE function_state
|
|
604
|
+
SET drained_at = GETUTCDATE(),
|
|
605
|
+
drained_by_attempt = %s
|
|
606
|
+
OUTPUT inserted.[key], inserted.value
|
|
607
|
+
WHERE scope_id = %s AND ns = %s AND drained_at IS NULL
|
|
608
|
+
""",
|
|
609
|
+
(attempt_id, scope_id, ns),
|
|
610
|
+
)
|
|
611
|
+
rows = cursor.fetchall()
|
|
612
|
+
conn.commit()
|
|
613
|
+
return [(bytes(cast(Any, k)), bytes(cast(Any, v))) for k, v in rows]
|
|
614
|
+
|
|
615
|
+
return self._execute_with_retry(_do)
|
|
616
|
+
|
|
617
|
+
def state_delete(
|
|
618
|
+
self,
|
|
619
|
+
scope_id: bytes,
|
|
620
|
+
ns: bytes,
|
|
621
|
+
keys: list[bytes] | None = None,
|
|
622
|
+
*,
|
|
623
|
+
start: bytes | None = None,
|
|
624
|
+
end: bytes | None = None,
|
|
625
|
+
shard_key: str = "",
|
|
626
|
+
) -> int:
|
|
627
|
+
"""Delete by key list, by ``[start, end)`` range, or whole namespace.
|
|
628
|
+
|
|
629
|
+
``keys`` and the range are mutually exclusive. Returns count deleted.
|
|
630
|
+
"""
|
|
631
|
+
del shard_key
|
|
632
|
+
if keys is not None and (start is not None or end is not None):
|
|
633
|
+
raise ValueError("state_delete: keys and start/end are mutually exclusive")
|
|
634
|
+
|
|
635
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
636
|
+
cursor = conn.cursor()
|
|
637
|
+
if keys is not None:
|
|
638
|
+
if not keys:
|
|
639
|
+
return 0
|
|
640
|
+
placeholders = ",".join("%s" for _ in keys)
|
|
641
|
+
cursor.execute(
|
|
642
|
+
f"""
|
|
643
|
+
DELETE FROM function_state
|
|
644
|
+
WHERE scope_id = %s AND ns = %s AND [key] IN ({placeholders})
|
|
645
|
+
""", # noqa: S608
|
|
646
|
+
(scope_id, ns, *keys),
|
|
647
|
+
)
|
|
648
|
+
else:
|
|
649
|
+
params: list[Any] = [scope_id, ns]
|
|
650
|
+
clauses = ""
|
|
651
|
+
if start is not None:
|
|
652
|
+
clauses += " AND [key] >= %s"
|
|
653
|
+
params.append(start)
|
|
654
|
+
if end is not None:
|
|
655
|
+
clauses += " AND [key] < %s"
|
|
656
|
+
params.append(end)
|
|
657
|
+
cursor.execute(
|
|
658
|
+
f"DELETE FROM function_state WHERE scope_id = %s AND ns = %s{clauses}", # noqa: S608
|
|
659
|
+
tuple(params),
|
|
660
|
+
)
|
|
661
|
+
count = int(cursor.rowcount)
|
|
662
|
+
conn.commit()
|
|
663
|
+
return count
|
|
664
|
+
|
|
665
|
+
return self._execute_with_retry(_do)
|
|
666
|
+
|
|
667
|
+
def execution_clear(
|
|
668
|
+
self,
|
|
669
|
+
scope_id: bytes,
|
|
670
|
+
*,
|
|
671
|
+
shard_key: str = "",
|
|
672
|
+
) -> int:
|
|
673
|
+
"""Wipe all state, log, and counter rows for scope_id across every namespace."""
|
|
674
|
+
del shard_key
|
|
675
|
+
|
|
676
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
677
|
+
cursor = conn.cursor()
|
|
678
|
+
cursor.execute("DELETE FROM function_state WHERE scope_id = %s", (scope_id,))
|
|
679
|
+
n1 = int(cursor.rowcount)
|
|
680
|
+
cursor.execute("DELETE FROM function_state_log WHERE scope_id = %s", (scope_id,))
|
|
681
|
+
n2 = int(cursor.rowcount)
|
|
682
|
+
cursor.execute("DELETE FROM function_counter WHERE scope_id = %s", (scope_id,))
|
|
683
|
+
n3 = int(cursor.rowcount)
|
|
684
|
+
conn.commit()
|
|
685
|
+
return n1 + n2 + n3
|
|
686
|
+
|
|
687
|
+
return self._execute_with_retry(_do)
|
|
688
|
+
|
|
689
|
+
def state_append(
|
|
690
|
+
self,
|
|
691
|
+
scope_id: bytes,
|
|
692
|
+
ns: bytes,
|
|
693
|
+
key: bytes,
|
|
694
|
+
item: bytes,
|
|
695
|
+
*,
|
|
696
|
+
shard_key: str = "",
|
|
697
|
+
) -> int:
|
|
698
|
+
"""Append item; return assigned ordinal. Replay returns prior ordinal."""
|
|
699
|
+
del shard_key
|
|
700
|
+
import uuid
|
|
701
|
+
|
|
702
|
+
attempt_id = uuid.uuid4().bytes
|
|
703
|
+
|
|
704
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
705
|
+
cursor = conn.cursor()
|
|
706
|
+
# Replay-detection via idx_function_state_log_replay UNIQUE
|
|
707
|
+
# (scope_id, ns, key, attempt_id). Look up first; if a prior
|
|
708
|
+
# call with this attempt_id already inserted, return its id.
|
|
709
|
+
cursor.execute(
|
|
710
|
+
"""
|
|
711
|
+
SELECT id FROM function_state_log
|
|
712
|
+
WHERE scope_id = %s AND ns = %s AND [key] = %s AND attempt_id = %s
|
|
713
|
+
""",
|
|
714
|
+
(scope_id, ns, key, attempt_id),
|
|
715
|
+
)
|
|
716
|
+
row = cursor.fetchone()
|
|
717
|
+
# function_state_log.id is BIGINT IDENTITY → int at runtime;
|
|
718
|
+
# pymssql's typed-row narrowing can't see that.
|
|
719
|
+
if row is not None:
|
|
720
|
+
return int(cast(Any, row[0]))
|
|
721
|
+
cursor.execute(
|
|
722
|
+
"""
|
|
723
|
+
INSERT INTO function_state_log
|
|
724
|
+
(scope_id, ns, [key], value, attempt_id)
|
|
725
|
+
OUTPUT inserted.id
|
|
726
|
+
VALUES (%s, %s, %s, %s, %s)
|
|
727
|
+
""",
|
|
728
|
+
(scope_id, ns, key, item, attempt_id),
|
|
729
|
+
)
|
|
730
|
+
inserted_row = cursor.fetchone()
|
|
731
|
+
assert inserted_row is not None, "INSERT ... OUTPUT inserted.id returned no row"
|
|
732
|
+
new_id = int(cast(Any, inserted_row[0]))
|
|
733
|
+
conn.commit()
|
|
734
|
+
return new_id
|
|
735
|
+
|
|
736
|
+
return self._execute_with_retry(_do)
|
|
737
|
+
|
|
738
|
+
def state_log_scan(
|
|
739
|
+
self,
|
|
740
|
+
scope_id: bytes,
|
|
741
|
+
ns: bytes,
|
|
742
|
+
key: bytes,
|
|
743
|
+
*,
|
|
744
|
+
after_id: int = -1,
|
|
745
|
+
limit: int | None = None,
|
|
746
|
+
shard_key: str = "",
|
|
747
|
+
) -> list[tuple[int, bytes]]:
|
|
748
|
+
"""Yield (id, value) pairs for (scope_id, ns, key) with id > after_id."""
|
|
749
|
+
del shard_key
|
|
750
|
+
|
|
751
|
+
def _do(conn: pymssql.Connection) -> list[tuple[int, bytes]]:
|
|
752
|
+
cursor = conn.cursor()
|
|
753
|
+
# T-SQL: ORDER BY ... OFFSET 0 ROWS FETCH NEXT ? ROWS ONLY
|
|
754
|
+
# is the canonical paging form. When limit is None we omit
|
|
755
|
+
# the FETCH clause to get all rows.
|
|
756
|
+
if limit is None:
|
|
757
|
+
cursor.execute(
|
|
758
|
+
"""
|
|
759
|
+
SELECT id, value FROM function_state_log
|
|
760
|
+
WHERE scope_id = %s AND ns = %s AND [key] = %s AND id > %s
|
|
761
|
+
ORDER BY id
|
|
762
|
+
""",
|
|
763
|
+
(scope_id, ns, key, after_id),
|
|
764
|
+
)
|
|
765
|
+
else:
|
|
766
|
+
cursor.execute(
|
|
767
|
+
"""
|
|
768
|
+
SELECT id, value FROM function_state_log
|
|
769
|
+
WHERE scope_id = %s AND ns = %s AND [key] = %s AND id > %s
|
|
770
|
+
ORDER BY id
|
|
771
|
+
OFFSET 0 ROWS FETCH NEXT %s ROWS ONLY
|
|
772
|
+
""",
|
|
773
|
+
(scope_id, ns, key, after_id, int(limit)),
|
|
774
|
+
)
|
|
775
|
+
return [(int(cast(Any, rid)), bytes(cast(Any, v))) for (rid, v) in cursor.fetchall()]
|
|
776
|
+
|
|
777
|
+
return self._execute_with_retry(_do)
|
|
778
|
+
|
|
779
|
+
# --- Atomic int64 counters (function_counter) ---
|
|
780
|
+
|
|
781
|
+
def state_counter_get(self, scope_id: bytes, ns: bytes, key: bytes, *, shard_key: str = "") -> int:
|
|
782
|
+
"""Read the int64 counter; 0 if absent."""
|
|
783
|
+
del shard_key
|
|
784
|
+
|
|
785
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
786
|
+
cursor = conn.cursor()
|
|
787
|
+
cursor.execute(
|
|
788
|
+
"SELECT n FROM function_counter WHERE scope_id = %s AND ns = %s AND [key] = %s",
|
|
789
|
+
(scope_id, ns, key),
|
|
790
|
+
)
|
|
791
|
+
row = cursor.fetchone()
|
|
792
|
+
return int(cast(Any, row[0])) if row is not None else 0
|
|
793
|
+
|
|
794
|
+
return self._execute_with_retry(_do)
|
|
795
|
+
|
|
796
|
+
def state_counter_add(self, scope_id: bytes, ns: bytes, key: bytes, delta: int, *, shard_key: str = "") -> int:
|
|
797
|
+
"""Atomically add ``delta``; return the new value. Replay-safe on retry."""
|
|
798
|
+
del shard_key
|
|
799
|
+
import uuid
|
|
800
|
+
|
|
801
|
+
attempt_id = uuid.uuid4().bytes
|
|
802
|
+
|
|
803
|
+
def _do(conn: pymssql.Connection) -> int:
|
|
804
|
+
cursor = conn.cursor()
|
|
805
|
+
# Replay-detection: a connection-level retry of this same logical
|
|
806
|
+
# add would carry the same attempt_id; if it already landed, return
|
|
807
|
+
# the stored value rather than adding twice.
|
|
808
|
+
cursor.execute(
|
|
809
|
+
"""
|
|
810
|
+
SELECT n FROM function_counter
|
|
811
|
+
WHERE scope_id = %s AND ns = %s AND [key] = %s AND last_attempt_id = %s
|
|
812
|
+
""",
|
|
813
|
+
(scope_id, ns, key, attempt_id),
|
|
814
|
+
)
|
|
815
|
+
row = cursor.fetchone()
|
|
816
|
+
if row is not None:
|
|
817
|
+
return int(cast(Any, row[0]))
|
|
818
|
+
cursor.execute(
|
|
819
|
+
"""
|
|
820
|
+
MERGE function_counter AS t
|
|
821
|
+
USING (VALUES (CAST(%s AS VARBINARY(255)),
|
|
822
|
+
CAST(%s AS VARBINARY(255)),
|
|
823
|
+
CAST(%s AS VARBINARY(255)),
|
|
824
|
+
CAST(%s AS BIGINT),
|
|
825
|
+
CAST(%s AS VARBINARY(16))))
|
|
826
|
+
AS s(scope_id, ns, [key], delta, attempt_id)
|
|
827
|
+
ON t.scope_id = s.scope_id AND t.ns = s.ns AND t.[key] = s.[key]
|
|
828
|
+
WHEN MATCHED THEN
|
|
829
|
+
UPDATE SET n = t.n + s.delta, last_attempt_id = s.attempt_id
|
|
830
|
+
WHEN NOT MATCHED THEN
|
|
831
|
+
INSERT (scope_id, ns, [key], n, last_attempt_id)
|
|
832
|
+
VALUES (s.scope_id, s.ns, s.[key], s.delta, s.attempt_id)
|
|
833
|
+
OUTPUT inserted.n;
|
|
834
|
+
""",
|
|
835
|
+
(scope_id, ns, key, int(delta), attempt_id),
|
|
836
|
+
)
|
|
837
|
+
out = cursor.fetchone()
|
|
838
|
+
assert out is not None, "MERGE ... OUTPUT inserted.n returned no row"
|
|
839
|
+
conn.commit()
|
|
840
|
+
return int(cast(Any, out[0]))
|
|
841
|
+
|
|
842
|
+
return self._execute_with_retry(_do)
|
|
843
|
+
|
|
844
|
+
def state_counter_set(self, scope_id: bytes, ns: bytes, key: bytes, value: int, *, shard_key: str = "") -> None:
|
|
845
|
+
"""Overwrite the counter with ``value`` (idempotent — no attempt_id)."""
|
|
846
|
+
del shard_key
|
|
847
|
+
|
|
848
|
+
def _do(conn: pymssql.Connection) -> None:
|
|
849
|
+
cursor = conn.cursor()
|
|
850
|
+
cursor.execute(
|
|
851
|
+
"""
|
|
852
|
+
MERGE function_counter AS t
|
|
853
|
+
USING (VALUES (CAST(%s AS VARBINARY(255)),
|
|
854
|
+
CAST(%s AS VARBINARY(255)),
|
|
855
|
+
CAST(%s AS VARBINARY(255)),
|
|
856
|
+
CAST(%s AS BIGINT)))
|
|
857
|
+
AS s(scope_id, ns, [key], n)
|
|
858
|
+
ON t.scope_id = s.scope_id AND t.ns = s.ns AND t.[key] = s.[key]
|
|
859
|
+
WHEN MATCHED THEN UPDATE SET n = s.n, last_attempt_id = NULL
|
|
860
|
+
WHEN NOT MATCHED THEN
|
|
861
|
+
INSERT (scope_id, ns, [key], n) VALUES (s.scope_id, s.ns, s.[key], s.n);
|
|
862
|
+
""",
|
|
863
|
+
(scope_id, ns, key, int(value)),
|
|
864
|
+
)
|
|
865
|
+
conn.commit()
|
|
866
|
+
|
|
867
|
+
self._execute_with_retry(_do)
|
|
868
|
+
|
|
869
|
+
def state_counter_delete(self, scope_id: bytes, ns: bytes, key: bytes, *, shard_key: str = "") -> None:
|
|
870
|
+
"""Delete the counter (no-op if absent)."""
|
|
871
|
+
del shard_key
|
|
872
|
+
|
|
873
|
+
def _do(conn: pymssql.Connection) -> None:
|
|
874
|
+
cursor = conn.cursor()
|
|
875
|
+
cursor.execute(
|
|
876
|
+
"DELETE FROM function_counter WHERE scope_id = %s AND ns = %s AND [key] = %s",
|
|
877
|
+
(scope_id, ns, key),
|
|
878
|
+
)
|
|
879
|
+
conn.commit()
|
|
880
|
+
|
|
881
|
+
self._execute_with_retry(_do)
|
|
882
|
+
|
|
883
|
+
# --- Factory ---
|
|
884
|
+
|
|
885
|
+
@classmethod
|
|
886
|
+
def from_env(cls) -> "FunctionStorageAzureSql":
|
|
887
|
+
"""Create an instance from environment variables.
|
|
888
|
+
|
|
889
|
+
Required:
|
|
890
|
+
VGI_AZURE_SQL_SERVER: Azure SQL server hostname.
|
|
891
|
+
VGI_AZURE_SQL_DATABASE: Database name.
|
|
892
|
+
|
|
893
|
+
Optional (SQL auth):
|
|
894
|
+
VGI_AZURE_SQL_USER: SQL auth username.
|
|
895
|
+
VGI_AZURE_SQL_PASSWORD: SQL auth password.
|
|
896
|
+
|
|
897
|
+
If user/password are omitted, DefaultAzureCredential is used.
|
|
898
|
+
|
|
899
|
+
"""
|
|
900
|
+
server = os.environ.get("VGI_AZURE_SQL_SERVER")
|
|
901
|
+
database = os.environ.get("VGI_AZURE_SQL_DATABASE")
|
|
902
|
+
if not server or not database:
|
|
903
|
+
raise ValueError(
|
|
904
|
+
"VGI_AZURE_SQL_SERVER and VGI_AZURE_SQL_DATABASE environment "
|
|
905
|
+
"variables are required when VGI_WORKER_SHARED_STORAGE=azure-sql"
|
|
906
|
+
)
|
|
907
|
+
return cls(
|
|
908
|
+
server=server,
|
|
909
|
+
database=database,
|
|
910
|
+
user=os.environ.get("VGI_AZURE_SQL_USER") or None,
|
|
911
|
+
password=os.environ.get("VGI_AZURE_SQL_PASSWORD") or None,
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
def _encode_access_token(token: str) -> bytes:
|
|
916
|
+
"""Encode an Azure AD access token for TDS token-based auth.
|
|
917
|
+
|
|
918
|
+
SQL Server expects the token as a UTF-16-LE encoded byte string
|
|
919
|
+
with a 4-byte little-endian length prefix.
|
|
920
|
+
"""
|
|
921
|
+
token_bytes = token.encode("UTF-16-LE")
|
|
922
|
+
return struct.pack("<I", len(token_bytes)) + token_bytes
|