withcache 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {withcache-0.4.0 → withcache-0.4.2}/PKG-INFO +1 -1
- {withcache-0.4.0 → withcache-0.4.2}/shim/build.zig.zon +1 -1
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/__init__.py +1 -1
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/server.py +112 -23
- {withcache-0.4.0 → withcache-0.4.2}/tests/test_withcache.py +203 -0
- {withcache-0.4.0 → withcache-0.4.2}/.gitignore +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/LICENSE +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/README.md +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/deploy/Containerfile +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/deploy/compose.yml +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/hatch_build.py +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/pyproject.toml +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/shim/build.zig +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/shim/shim.zig +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/_shim.py +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/client.py +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/curlwithcache.py +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/static/htmx.min.js +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/static/pico.min.css +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/src/withcache/wgetwithcache.py +0 -0
- {withcache-0.4.0 → withcache-0.4.2}/tests/test_differential.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: withcache
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
|
|
5
5
|
Project-URL: Homepage, https://github.com/safl/withcache
|
|
6
6
|
Author-email: "Simon A. F. Lund" <safl@safl.dk>
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
.name = .withcache_shim,
|
|
3
3
|
// Zig requires a literal here; keep it in lockstep with the project's
|
|
4
4
|
// single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
|
|
5
|
-
.version = "0.4.
|
|
5
|
+
.version = "0.4.2",
|
|
6
6
|
.fingerprint = 0xd7d96c5ed212ccaa,
|
|
7
7
|
.minimum_zig_version = "0.16.0",
|
|
8
8
|
.paths = .{
|
|
@@ -43,6 +43,16 @@ from datetime import datetime, timezone
|
|
|
43
43
|
|
|
44
44
|
CHUNK = 64 * 1024
|
|
45
45
|
USER_AGENT = "withcache-cache/0.1"
|
|
46
|
+
# Resume budget for a single store_from_origin call. A truncated
|
|
47
|
+
# upstream stream re-fetches with ``Range: bytes=<got>-`` so the
|
|
48
|
+
# next attempt picks up where the cut happened. Five tries cover
|
|
49
|
+
# the realistic failure mode (e.g. ghcr.io serves blobs via Azure
|
|
50
|
+
# Blob Storage SAS URLs with a ~10 minute expiry; a >2 GiB image
|
|
51
|
+
# at modest bandwidth blows past one window and the connection is
|
|
52
|
+
# cut server-side, but a fresh redirect through ghcr yields a new
|
|
53
|
+
# SAS URL each retry). The cap is the give-up gate, not a normal
|
|
54
|
+
# operating depth.
|
|
55
|
+
RESUME_MAX_ATTEMPTS = 5
|
|
46
56
|
STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
|
47
57
|
MIME_TYPES = {".css": "text/css; charset=utf-8", ".js": "application/javascript; charset=utf-8"}
|
|
48
58
|
_DB_WRITE_LOCK = threading.Lock()
|
|
@@ -285,7 +295,14 @@ class Store:
|
|
|
285
295
|
except FileNotFoundError:
|
|
286
296
|
pass
|
|
287
297
|
|
|
288
|
-
def store_from_origin(
|
|
298
|
+
def store_from_origin(
|
|
299
|
+
self,
|
|
300
|
+
url: str,
|
|
301
|
+
progress=None,
|
|
302
|
+
cancel=None,
|
|
303
|
+
headers=None,
|
|
304
|
+
max_resume_attempts: int = RESUME_MAX_ATTEMPTS,
|
|
305
|
+
) -> sqlite3.Row:
|
|
289
306
|
"""Operator-triggered: pull the artifact from origin and store it.
|
|
290
307
|
|
|
291
308
|
``progress(done, total)`` is called as bytes arrive (total may be None);
|
|
@@ -294,41 +311,105 @@ class Store:
|
|
|
294
311
|
``headers`` adds request headers to the origin fetch (e.g. a registry
|
|
295
312
|
bearer token bty pre-resolved for an oras blob). Raises :class:`CacheFull`
|
|
296
313
|
if the cache is already at --max-bytes.
|
|
314
|
+
|
|
315
|
+
Resume-on-truncation: if the upstream stream ends before its
|
|
316
|
+
declared Content-Length, the partial bytes are kept and the
|
|
317
|
+
next attempt requests ``Range: bytes=<got>-`` so the fetch
|
|
318
|
+
picks up where the connection died. Up to
|
|
319
|
+
``max_resume_attempts`` attempts are made before
|
|
320
|
+
:class:`TruncatedDownload` is raised; on giving up the
|
|
321
|
+
partial file is removed. A 200 response to a Range request
|
|
322
|
+
(the origin chose to ignore the header, common on naive
|
|
323
|
+
upstreams) is handled by restarting from byte 0 and counts
|
|
324
|
+
against the same attempt budget. Re-issuing the request also
|
|
325
|
+
re-resolves any 30x redirect chain, which matters for
|
|
326
|
+
ghcr.io: each ghcr request hands back a fresh Azure Blob
|
|
327
|
+
Storage SAS URL valid only for a short window, and the
|
|
328
|
+
prior cut almost certainly was that SAS expiring mid-stream.
|
|
297
329
|
"""
|
|
298
330
|
if not self.has_capacity():
|
|
299
331
|
raise CacheFull(f"cache full (>= {self.max_bytes} bytes); refusing to fetch {url}")
|
|
300
332
|
normalized = self.normalize(url)
|
|
301
333
|
key = self.key_of(normalized)
|
|
302
334
|
tmp = os.path.join(self.tmp_dir, key + ".part")
|
|
303
|
-
|
|
335
|
+
base_headers = {"User-Agent": USER_AGENT}
|
|
304
336
|
if headers:
|
|
305
|
-
|
|
306
|
-
req = urllib.request.Request(url, headers=req_headers)
|
|
337
|
+
base_headers.update(headers)
|
|
307
338
|
sha = hashlib.sha256()
|
|
308
339
|
size = 0
|
|
340
|
+
total: int | None = None
|
|
341
|
+
content_type: str | None = None
|
|
309
342
|
try:
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
343
|
+
for _ in range(max_resume_attempts):
|
|
344
|
+
req_headers = dict(base_headers)
|
|
345
|
+
if size > 0:
|
|
346
|
+
# Resume from where the previous attempt cut.
|
|
347
|
+
# A 206 response continues the stream; a 200
|
|
348
|
+
# means the origin ignored Range (e.g. a dumb
|
|
349
|
+
# static server) and we restart from 0.
|
|
350
|
+
req_headers["Range"] = f"bytes={size}-"
|
|
351
|
+
req = urllib.request.Request(url, headers=req_headers)
|
|
352
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
353
|
+
status = getattr(resp, "status", None) or resp.getcode()
|
|
354
|
+
if content_type is None:
|
|
355
|
+
content_type = resp.headers.get_content_type()
|
|
356
|
+
if size > 0 and status == 200:
|
|
357
|
+
# Range ignored by origin: discard the partial
|
|
358
|
+
# and start a fresh full-stream attempt.
|
|
359
|
+
size = 0
|
|
360
|
+
sha = hashlib.sha256()
|
|
361
|
+
if os.path.exists(tmp):
|
|
362
|
+
os.remove(tmp)
|
|
363
|
+
if size > 0 and status == 206:
|
|
364
|
+
# ``Content-Range: bytes <start>-<end>/<total>``;
|
|
365
|
+
# use the total declared there as the contract,
|
|
366
|
+
# not Content-Length (which on 206 is the size
|
|
367
|
+
# of the partial response, not the whole blob).
|
|
368
|
+
cr = resp.headers.get("Content-Range") or ""
|
|
369
|
+
if "/" in cr:
|
|
370
|
+
tail = cr.rsplit("/", 1)[1].strip()
|
|
371
|
+
if tail.isdigit():
|
|
372
|
+
total = int(tail)
|
|
373
|
+
else:
|
|
374
|
+
cl = resp.headers.get("Content-Length")
|
|
375
|
+
if cl and cl.isdigit():
|
|
376
|
+
total = int(cl)
|
|
377
|
+
if progress:
|
|
378
|
+
progress(size, total)
|
|
379
|
+
mode = "ab" if size > 0 else "wb"
|
|
380
|
+
with open(tmp, mode) as f:
|
|
381
|
+
while True:
|
|
382
|
+
if cancel and cancel():
|
|
383
|
+
raise DownloadCancelled()
|
|
384
|
+
chunk = resp.read(CHUNK)
|
|
385
|
+
if not chunk:
|
|
386
|
+
break
|
|
387
|
+
f.write(chunk)
|
|
388
|
+
sha.update(chunk)
|
|
389
|
+
size += len(chunk)
|
|
390
|
+
if progress:
|
|
391
|
+
progress(size, total)
|
|
392
|
+
# urllib's read loop exits on clean EOF AND on transport-
|
|
393
|
+
# aborted close; HTTPResponse only raises IncompleteRead
|
|
394
|
+
# in some configurations. When the origin declared a
|
|
395
|
+
# total (either via Content-Length on a 200 or via
|
|
396
|
+
# Content-Range on a 206), treat that as the contract:
|
|
397
|
+
# try to resume from the cut, give up after the budget
|
|
398
|
+
# is exhausted. Without a declared total there is no
|
|
399
|
+
# truncation signal, so a single attempt is the whole
|
|
400
|
+
# story.
|
|
401
|
+
if total is None or size >= total:
|
|
402
|
+
break
|
|
403
|
+
else:
|
|
404
|
+
# for/else: ran out of attempts before reaching total
|
|
405
|
+
raise TruncatedDownload(
|
|
406
|
+
f"upstream truncated for {url}: declared {total} bytes, got {size}"
|
|
407
|
+
f" after {max_resume_attempts} attempts"
|
|
408
|
+
)
|
|
328
409
|
os.replace(tmp, self.blob_path(key))
|
|
329
410
|
except BaseException:
|
|
330
411
|
if os.path.exists(tmp):
|
|
331
|
-
os.remove(tmp) # no half-written blob on cancel/error
|
|
412
|
+
os.remove(tmp) # no half-written blob on cancel/error/give-up
|
|
332
413
|
raise
|
|
333
414
|
ts = now_iso()
|
|
334
415
|
with _DB_WRITE_LOCK, self.conn() as c:
|
|
@@ -369,6 +450,14 @@ class CacheFull(Exception):
|
|
|
369
450
|
"""Raised when --max-bytes is reached; the fill is refused, not evicted."""
|
|
370
451
|
|
|
371
452
|
|
|
453
|
+
class TruncatedDownload(Exception):
|
|
454
|
+
"""Raised when the upstream stream ended before the declared
|
|
455
|
+
Content-Length. The temp file is removed and no blob row is
|
|
456
|
+
written, so the same URL re-enqueues cleanly on the next request
|
|
457
|
+
instead of permanently serving a malformed file.
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
|
|
372
461
|
@dataclass
|
|
373
462
|
class Job:
|
|
374
463
|
id: int
|
|
@@ -7,6 +7,7 @@ without an install.
|
|
|
7
7
|
import http.server
|
|
8
8
|
import os
|
|
9
9
|
import shutil
|
|
10
|
+
import socket
|
|
10
11
|
import socketserver
|
|
11
12
|
import sys
|
|
12
13
|
import tempfile
|
|
@@ -167,6 +168,208 @@ class TestStoreFromOrigin(unittest.TestCase):
|
|
|
167
168
|
store.store_from_origin(f"http://127.0.0.1:{self.port}/b.bin")
|
|
168
169
|
|
|
169
170
|
|
|
171
|
+
class _TruncatingOrigin(http.server.BaseHTTPRequestHandler):
|
|
172
|
+
"""Declare a full Content-Length, then send half the payload and
|
|
173
|
+
close the socket. Mirrors the real-world failure mode where the
|
|
174
|
+
upstream drops the connection mid-stream (lab-box fedora-44-desktop
|
|
175
|
+
flash that surfaced this bug)."""
|
|
176
|
+
|
|
177
|
+
PAYLOAD = b"abcdefghij" * 100 # 1000 bytes; will write half then close
|
|
178
|
+
|
|
179
|
+
def do_GET(self):
|
|
180
|
+
self.send_response(200)
|
|
181
|
+
self.send_header("Content-Type", "application/octet-stream")
|
|
182
|
+
self.send_header("Content-Length", str(len(self.PAYLOAD)))
|
|
183
|
+
self.end_headers()
|
|
184
|
+
half = len(self.PAYLOAD) // 2
|
|
185
|
+
self.wfile.write(self.PAYLOAD[:half])
|
|
186
|
+
# close the underlying socket so urllib observes EOF before
|
|
187
|
+
# Content-Length bytes arrive
|
|
188
|
+
self.wfile.flush()
|
|
189
|
+
try:
|
|
190
|
+
self.connection.shutdown(socket.SHUT_RDWR)
|
|
191
|
+
except OSError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
def log_message(self, format, *args):
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class TestTruncatedDownloadRejected(unittest.TestCase):
|
|
199
|
+
"""Regression for the lab-spotted bug where a transport-aborted
|
|
200
|
+
upstream stream silently became a permanent cached blob: future
|
|
201
|
+
HEADs returned 200 with the partial bytes, every consumer got a
|
|
202
|
+
malformed file, and the only escape was hand-deleting the blob.
|
|
203
|
+
Content-Length mismatches now fail loudly and leave no entry."""
|
|
204
|
+
|
|
205
|
+
def setUp(self):
|
|
206
|
+
self.httpd = socketserver.TCPServer(("127.0.0.1", 0), _TruncatingOrigin)
|
|
207
|
+
self.port = self.httpd.server_address[1]
|
|
208
|
+
self.t = threading.Thread(target=self.httpd.serve_forever, daemon=True)
|
|
209
|
+
self.t.start()
|
|
210
|
+
self.store = server.Store(tempfile.mkdtemp(), keep_query=False)
|
|
211
|
+
|
|
212
|
+
def tearDown(self):
|
|
213
|
+
self.httpd.shutdown()
|
|
214
|
+
self.httpd.server_close()
|
|
215
|
+
|
|
216
|
+
def test_truncated_upstream_raises_and_leaves_no_blob(self):
|
|
217
|
+
url = f"http://127.0.0.1:{self.port}/truncated.bin"
|
|
218
|
+
# _TruncatingOrigin truncates EVERY response (including
|
|
219
|
+
# ranged retries) so capping max_resume_attempts at 1 keeps
|
|
220
|
+
# the test fast: the single attempt cuts at 500 bytes,
|
|
221
|
+
# exhausts the budget, and the TruncatedDownload fires.
|
|
222
|
+
with self.assertRaises(server.TruncatedDownload) as cm:
|
|
223
|
+
self.store.store_from_origin(url, max_resume_attempts=1)
|
|
224
|
+
# the message must name both totals so the operator can see
|
|
225
|
+
# how short the upstream came
|
|
226
|
+
msg = str(cm.exception)
|
|
227
|
+
self.assertIn("1000", msg) # declared
|
|
228
|
+
self.assertIn("500", msg) # got
|
|
229
|
+
# no row was written; no blob file lingers on disk
|
|
230
|
+
self.assertIsNone(self.store.get_blob(url))
|
|
231
|
+
blobs = list(self.store.blob_path("").rsplit("/", 1)[0:1])
|
|
232
|
+
if os.path.isdir(blobs[0]):
|
|
233
|
+
self.assertEqual(os.listdir(blobs[0]), [])
|
|
234
|
+
|
|
235
|
+
def test_repeat_request_after_truncation_can_retry_cleanly(self):
|
|
236
|
+
url = f"http://127.0.0.1:{self.port}/truncated.bin"
|
|
237
|
+
with self.assertRaises(server.TruncatedDownload):
|
|
238
|
+
self.store.store_from_origin(url, max_resume_attempts=1)
|
|
239
|
+
# second attempt against the same URL would have hit the
|
|
240
|
+
# poisoned cache before the fix; now it must repeat the
|
|
241
|
+
# failure mode (no sticky blob blocking the retry) so a
|
|
242
|
+
# later origin recovery can re-fill the entry cleanly.
|
|
243
|
+
with self.assertRaises(server.TruncatedDownload):
|
|
244
|
+
self.store.store_from_origin(url, max_resume_attempts=1)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# --------------------------------------------------------------------------
|
|
248
|
+
# Range-resume: a flaky upstream that cuts mid-stream MUST be retried with
|
|
249
|
+
# ``Range: bytes=<got>-`` so the partial is filled rather than discarded.
|
|
250
|
+
# This is the lab-spotted ghcr.io failure mode where Azure Blob Storage
|
|
251
|
+
# SAS URLs expire mid-download for any blob bigger than a few minutes of
|
|
252
|
+
# bandwidth: a single attempt always loses, but a retried Range request
|
|
253
|
+
# starts a fresh SAS window and the second leg finishes the blob.
|
|
254
|
+
# --------------------------------------------------------------------------
|
|
255
|
+
class _ResumableTruncatingOrigin(http.server.BaseHTTPRequestHandler):
|
|
256
|
+
"""Cut the FIRST GET in half; honor ``Range: bytes=<n>-`` on retries
|
|
257
|
+
by serving from offset n to end. Mirrors the ghcr -> Azure Blob
|
|
258
|
+
pattern: each connection has a hard wall-clock limit but the bytes
|
|
259
|
+
themselves are available on re-fetch.
|
|
260
|
+
|
|
261
|
+
Shared class-level counter so multiple instances (the threaded server
|
|
262
|
+
spawns one handler per request) all see the same call count and the
|
|
263
|
+
first GET truncates regardless of which thread services it.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
PAYLOAD = b"abcdefghij" * 100 # 1000 bytes
|
|
267
|
+
_lock = threading.Lock()
|
|
268
|
+
_calls = 0
|
|
269
|
+
|
|
270
|
+
@classmethod
|
|
271
|
+
def reset(cls) -> None:
|
|
272
|
+
with cls._lock:
|
|
273
|
+
cls._calls = 0
|
|
274
|
+
|
|
275
|
+
def do_GET(self):
|
|
276
|
+
with self._lock:
|
|
277
|
+
self.__class__._calls += 1
|
|
278
|
+
call = self._calls
|
|
279
|
+
rng = self.headers.get("Range") or ""
|
|
280
|
+
start = 0
|
|
281
|
+
if rng.startswith("bytes="):
|
|
282
|
+
try:
|
|
283
|
+
start = int(rng[len("bytes=") :].split("-", 1)[0])
|
|
284
|
+
except ValueError:
|
|
285
|
+
start = 0
|
|
286
|
+
full = len(self.PAYLOAD)
|
|
287
|
+
if start > 0:
|
|
288
|
+
# ranged retry: serve the rest cleanly
|
|
289
|
+
body = self.PAYLOAD[start:]
|
|
290
|
+
self.send_response(206)
|
|
291
|
+
self.send_header("Content-Type", "application/octet-stream")
|
|
292
|
+
self.send_header("Content-Length", str(len(body)))
|
|
293
|
+
self.send_header(
|
|
294
|
+
"Content-Range",
|
|
295
|
+
f"bytes {start}-{full - 1}/{full}",
|
|
296
|
+
)
|
|
297
|
+
self.end_headers()
|
|
298
|
+
self.wfile.write(body)
|
|
299
|
+
return
|
|
300
|
+
# first attempt: declare full length but cut at half
|
|
301
|
+
self.send_response(200)
|
|
302
|
+
self.send_header("Content-Type", "application/octet-stream")
|
|
303
|
+
self.send_header("Content-Length", str(full))
|
|
304
|
+
self.end_headers()
|
|
305
|
+
if call == 1:
|
|
306
|
+
half = full // 2
|
|
307
|
+
self.wfile.write(self.PAYLOAD[:half])
|
|
308
|
+
self.wfile.flush()
|
|
309
|
+
try:
|
|
310
|
+
self.connection.shutdown(socket.SHUT_RDWR)
|
|
311
|
+
except OSError:
|
|
312
|
+
pass
|
|
313
|
+
else:
|
|
314
|
+
# any non-ranged retry serves the whole thing (covers the
|
|
315
|
+
# 200-on-Range fallback path: origin ignored Range, we
|
|
316
|
+
# restart from 0)
|
|
317
|
+
self.wfile.write(self.PAYLOAD)
|
|
318
|
+
|
|
319
|
+
def log_message(self, format, *args):
|
|
320
|
+
pass
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class TestRangeResumeOnTruncation(unittest.TestCase):
|
|
324
|
+
def setUp(self):
|
|
325
|
+
_ResumableTruncatingOrigin.reset()
|
|
326
|
+
self.httpd = socketserver.ThreadingTCPServer(("127.0.0.1", 0), _ResumableTruncatingOrigin)
|
|
327
|
+
self.port = self.httpd.server_address[1]
|
|
328
|
+
self.t = threading.Thread(target=self.httpd.serve_forever, daemon=True)
|
|
329
|
+
self.t.start()
|
|
330
|
+
self.store = server.Store(tempfile.mkdtemp(), keep_query=False)
|
|
331
|
+
|
|
332
|
+
def tearDown(self):
|
|
333
|
+
self.httpd.shutdown()
|
|
334
|
+
self.httpd.server_close()
|
|
335
|
+
|
|
336
|
+
def test_truncated_stream_resumes_via_range(self):
|
|
337
|
+
"""First GET cuts at byte 500; second GET (with
|
|
338
|
+
``Range: bytes=500-``) returns 206 and the remaining 500.
|
|
339
|
+
Result: a complete 1000-byte blob in the cache, sha256 matches
|
|
340
|
+
the upstream's full payload, no TruncatedDownload raised."""
|
|
341
|
+
import hashlib
|
|
342
|
+
|
|
343
|
+
url = f"http://127.0.0.1:{self.port}/resumable.bin"
|
|
344
|
+
row = self.store.store_from_origin(url)
|
|
345
|
+
self.assertEqual(row["size"], len(_ResumableTruncatingOrigin.PAYLOAD))
|
|
346
|
+
self.assertEqual(
|
|
347
|
+
row["sha256"],
|
|
348
|
+
hashlib.sha256(_ResumableTruncatingOrigin.PAYLOAD).hexdigest(),
|
|
349
|
+
)
|
|
350
|
+
with open(self.store.blob_path(row["key"]), "rb") as f:
|
|
351
|
+
self.assertEqual(f.read(), _ResumableTruncatingOrigin.PAYLOAD)
|
|
352
|
+
|
|
353
|
+
def test_progress_callback_reports_continuing_offset_on_resume(self):
|
|
354
|
+
"""Progress reports must be monotonic across the resume: the
|
|
355
|
+
second leg's reads start at 500 (the partial-so-far) and walk
|
|
356
|
+
up to 1000, NOT restart at 0. An operator dashboard watching
|
|
357
|
+
``progress`` for a stuck job needs to see the bytes climb."""
|
|
358
|
+
observed: list[tuple[int, int | None]] = []
|
|
359
|
+
url = f"http://127.0.0.1:{self.port}/resumable.bin"
|
|
360
|
+
self.store.store_from_origin(url, progress=lambda d, t: observed.append((d, t)))
|
|
361
|
+
# final report should be the full payload
|
|
362
|
+
self.assertEqual(observed[-1][0], len(_ResumableTruncatingOrigin.PAYLOAD))
|
|
363
|
+
# at no point did the byte counter regress
|
|
364
|
+
for prev, curr in zip(observed, observed[1:], strict=False):
|
|
365
|
+
self.assertGreaterEqual(curr[0], prev[0])
|
|
366
|
+
# the resume actually crossed the cut point: at least one
|
|
367
|
+
# progress call lands above the half-mark (otherwise we
|
|
368
|
+
# would have stalled at 500)
|
|
369
|
+
half = len(_ResumableTruncatingOrigin.PAYLOAD) // 2
|
|
370
|
+
self.assertTrue(any(d > half for d, _ in observed))
|
|
371
|
+
|
|
372
|
+
|
|
170
373
|
# --------------------------------------------------------------------------
|
|
171
374
|
# _shim: URL detection, rewrite, real-tool resolution, env, path-encoding
|
|
172
375
|
# --------------------------------------------------------------------------
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|