withcache 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: withcache
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
5
5
  Project-URL: Homepage, https://github.com/safl/withcache
6
6
  Author-email: "Simon A. F. Lund" <safl@safl.dk>
@@ -2,7 +2,7 @@
2
2
  .name = .withcache_shim,
3
3
  // Zig requires a literal here; keep it in lockstep with the project's
4
4
  // single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
5
- .version = "0.4.0",
5
+ .version = "0.4.1",
6
6
  .fingerprint = 0xd7d96c5ed212ccaa,
7
7
  .minimum_zig_version = "0.16.0",
8
8
  .paths = .{
@@ -12,6 +12,6 @@ All modules are stdlib-only and self-contained.
12
12
 
13
13
  from .client import blob_url, cache_base, is_cached, serve_url
14
14
 
15
- __version__ = "0.4.0"
15
+ __version__ = "0.4.1"
16
16
 
17
17
  __all__ = ["__version__", "blob_url", "cache_base", "is_cached", "serve_url"]
@@ -325,6 +325,17 @@ class Store:
325
325
  size += len(chunk)
326
326
  if progress:
327
327
  progress(size, total)
328
+ # urllib's read loop exits on clean EOF AND on transport-
329
+ # aborted close; HTTPResponse only raises IncompleteRead
330
+ # in some configurations. When the origin declared
331
+ # Content-Length, treat that as the contract and refuse
332
+ # to promote a short blob. A silent partial-promotion
333
+ # would serve malformed bytes to every future consumer
334
+ # with no way for them to invalidate the entry.
335
+ if total is not None and size != total:
336
+ raise TruncatedDownload(
337
+ f"upstream truncated for {url}: declared {total} bytes, got {size}"
338
+ )
328
339
  os.replace(tmp, self.blob_path(key))
329
340
  except BaseException:
330
341
  if os.path.exists(tmp):
@@ -369,6 +380,14 @@ class CacheFull(Exception):
369
380
  """Raised when --max-bytes is reached; the fill is refused, not evicted."""
370
381
 
371
382
 
383
+ class TruncatedDownload(Exception):
384
+ """Raised when the upstream stream ended before the declared
385
+ Content-Length. The temp file is removed and no blob row is
386
+ written, so the same URL re-enqueues cleanly on the next request
387
+ instead of permanently serving a malformed file.
388
+ """
389
+
390
+
372
391
  @dataclass
373
392
  class Job:
374
393
  id: int
@@ -7,6 +7,7 @@ without an install.
7
7
  import http.server
8
8
  import os
9
9
  import shutil
10
+ import socket
10
11
  import socketserver
11
12
  import sys
12
13
  import tempfile
@@ -167,6 +168,78 @@ class TestStoreFromOrigin(unittest.TestCase):
167
168
  store.store_from_origin(f"http://127.0.0.1:{self.port}/b.bin")
168
169
 
169
170
 
171
+ class _TruncatingOrigin(http.server.BaseHTTPRequestHandler):
172
+ """Declare a full Content-Length, then send half the payload and
173
+ close the socket. Mirrors the real-world failure mode where the
174
+ upstream drops the connection mid-stream (lab-box fedora-44-desktop
175
+ flash that surfaced this bug)."""
176
+
177
+ PAYLOAD = b"abcdefghij" * 100 # 1000 bytes; will write half then close
178
+
179
+ def do_GET(self):
180
+ self.send_response(200)
181
+ self.send_header("Content-Type", "application/octet-stream")
182
+ self.send_header("Content-Length", str(len(self.PAYLOAD)))
183
+ self.end_headers()
184
+ half = len(self.PAYLOAD) // 2
185
+ self.wfile.write(self.PAYLOAD[:half])
186
+ # close the underlying socket so urllib observes EOF before
187
+ # Content-Length bytes arrive
188
+ self.wfile.flush()
189
+ try:
190
+ self.connection.shutdown(socket.SHUT_RDWR)
191
+ except OSError:
192
+ pass
193
+
194
+ def log_message(self, format, *args):
195
+ pass
196
+
197
+
198
+ class TestTruncatedDownloadRejected(unittest.TestCase):
199
+ """Regression for the lab-spotted bug where a transport-aborted
200
+ upstream stream silently became a permanent cached blob: future
201
+ HEADs returned 200 with the partial bytes, every consumer got a
202
+ malformed file, and the only escape was hand-deleting the blob.
203
+ Content-Length mismatches now fail loudly and leave no entry."""
204
+
205
+ def setUp(self):
206
+ self.httpd = socketserver.TCPServer(("127.0.0.1", 0), _TruncatingOrigin)
207
+ self.port = self.httpd.server_address[1]
208
+ self.t = threading.Thread(target=self.httpd.serve_forever, daemon=True)
209
+ self.t.start()
210
+ self.store = server.Store(tempfile.mkdtemp(), keep_query=False)
211
+
212
+ def tearDown(self):
213
+ self.httpd.shutdown()
214
+ self.httpd.server_close()
215
+
216
+ def test_truncated_upstream_raises_and_leaves_no_blob(self):
217
+ url = f"http://127.0.0.1:{self.port}/truncated.bin"
218
+ with self.assertRaises(server.TruncatedDownload) as cm:
219
+ self.store.store_from_origin(url)
220
+ # the message must name both totals so the operator can see
221
+ # how short the upstream came
222
+ msg = str(cm.exception)
223
+ self.assertIn("1000", msg) # declared
224
+ self.assertIn("500", msg) # got
225
+ # no row was written; no blob file lingers on disk
226
+ self.assertIsNone(self.store.get_blob(url))
227
+ blobs = list(self.store.blob_path("").rsplit("/", 1)[0:1])
228
+ if os.path.isdir(blobs[0]):
229
+ self.assertEqual(os.listdir(blobs[0]), [])
230
+
231
+ def test_repeat_request_after_truncation_can_retry_cleanly(self):
232
+ url = f"http://127.0.0.1:{self.port}/truncated.bin"
233
+ with self.assertRaises(server.TruncatedDownload):
234
+ self.store.store_from_origin(url)
235
+ # second attempt against the same URL would have hit the
236
+ # poisoned cache before the fix; now it must repeat the
237
+ # failure mode (no sticky blob blocking the retry) so a
238
+ # later origin recovery can re-fill the entry cleanly.
239
+ with self.assertRaises(server.TruncatedDownload):
240
+ self.store.store_from_origin(url)
241
+
242
+
170
243
  # --------------------------------------------------------------------------
171
244
  # _shim: URL detection, rewrite, real-tool resolution, env, path-encoding
172
245
  # --------------------------------------------------------------------------
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes