withcache 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {withcache-0.3.0 → withcache-0.4.0}/PKG-INFO +2 -2
- {withcache-0.3.0 → withcache-0.4.0}/README.md +1 -1
- {withcache-0.3.0 → withcache-0.4.0}/shim/build.zig.zon +1 -1
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/__init__.py +1 -1
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/client.py +31 -4
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/server.py +22 -7
- {withcache-0.3.0 → withcache-0.4.0}/tests/test_withcache.py +111 -0
- {withcache-0.3.0 → withcache-0.4.0}/.gitignore +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/LICENSE +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/deploy/Containerfile +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/deploy/compose.yml +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/hatch_build.py +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/pyproject.toml +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/shim/build.zig +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/shim/shim.zig +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/_shim.py +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/curlwithcache.py +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/static/htmx.min.js +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/static/pico.min.css +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/src/withcache/wgetwithcache.py +0 -0
- {withcache-0.3.0 → withcache-0.4.0}/tests/test_differential.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: withcache
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
|
|
5
5
|
Project-URL: Homepage, https://github.com/safl/withcache
|
|
6
6
|
Author-email: "Simon A. F. Lund" <safl@safl.dk>
|
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
|
18
18
|
|
|
19
19
|
# withcache
|
|
20
20
|
|
|
21
|
-
[](https://github.com/safl/withcache/actions/workflows/ci.yml)
|
|
21
|
+
[](https://github.com/safl/withcache/actions/workflows/ci-cd.yml)
|
|
22
22
|
[](https://pypi.org/project/withcache/)
|
|
23
23
|
[](LICENSE)
|
|
24
24
|
[](https://ziglang.org)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# withcache
|
|
2
2
|
|
|
3
|
-
[](https://github.com/safl/withcache/actions/workflows/ci.yml)
|
|
3
|
+
[](https://github.com/safl/withcache/actions/workflows/ci-cd.yml)
|
|
4
4
|
[](https://pypi.org/project/withcache/)
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://ziglang.org)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
.name = .withcache_shim,
|
|
3
3
|
// Zig requires a literal here; keep it in lockstep with the project's
|
|
4
4
|
// single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
|
|
5
|
-
.version = "0.
|
|
5
|
+
.version = "0.4.0",
|
|
6
6
|
.fingerprint = 0xd7d96c5ed212ccaa,
|
|
7
7
|
.minimum_zig_version = "0.16.0",
|
|
8
8
|
.paths = .{
|
|
@@ -37,13 +37,31 @@ def blob_url(server: str, origin: str) -> str:
|
|
|
37
37
|
return _shim.blob_url(_shim.cache_base(server), origin)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def is_cached(
|
|
40
|
+
def is_cached(
|
|
41
|
+
server: str,
|
|
42
|
+
origin: str,
|
|
43
|
+
timeout: float = PROBE_TIMEOUT,
|
|
44
|
+
headers: dict[str, str] | None = None,
|
|
45
|
+
) -> bool:
|
|
41
46
|
"""True if the cache-host already holds ``origin`` (a ``HEAD`` on ``/b/``
|
|
42
47
|
returns 200). A miss (404), an unreachable host, a timeout, or any error
|
|
43
48
|
returns False, so a caller can safely fall back to the origin. The HEAD
|
|
44
49
|
also *warms* an auto-fetch cache-host: the miss is recorded and the
|
|
45
|
-
background fill enqueued, so a later probe flips to cached.
|
|
50
|
+
background fill enqueued, so a later probe flips to cached.
|
|
51
|
+
|
|
52
|
+
``headers`` (optional) attaches request headers to the HEAD. The
|
|
53
|
+
cache-host forwards a client-supplied ``Authorization`` into its
|
|
54
|
+
background-fetch worker, so a consumer that has just minted an OCI
|
|
55
|
+
bearer (the typical use case: bty resolving an ``oras://`` catalog
|
|
56
|
+
entry to a ``ghcr.io`` blob URL at import time) can warm the cache
|
|
57
|
+
against that token-gated origin in one probe. Other entries in
|
|
58
|
+
``headers`` round-trip the same way; only ``Authorization`` is
|
|
59
|
+
forwarded into the fetch on the server side.
|
|
60
|
+
"""
|
|
46
61
|
req = urllib.request.Request(blob_url(server, origin), method="HEAD")
|
|
62
|
+
if headers:
|
|
63
|
+
for k, v in headers.items():
|
|
64
|
+
req.add_header(k, v)
|
|
47
65
|
try:
|
|
48
66
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
49
67
|
return bool(resp.status == 200)
|
|
@@ -53,10 +71,19 @@ def is_cached(server: str, origin: str, timeout: float = PROBE_TIMEOUT) -> bool:
|
|
|
53
71
|
return False # unreachable / timeout -> caller serves the origin itself
|
|
54
72
|
|
|
55
73
|
|
|
56
|
-
def serve_url(
|
|
74
|
+
def serve_url(
|
|
75
|
+
server: str,
|
|
76
|
+
origin: str,
|
|
77
|
+
timeout: float = PROBE_TIMEOUT,
|
|
78
|
+
headers: dict[str, str] | None = None,
|
|
79
|
+
) -> str | None:
|
|
57
80
|
"""The cache-host serve URL for ``origin`` if the cache holds it, else
|
|
58
81
|
``None`` -- the convenience form of "use the cache when warm":
|
|
59
82
|
|
|
60
83
|
url = client.serve_url(cache, origin) or origin
|
|
84
|
+
|
|
85
|
+
``headers`` is passed through to :func:`is_cached` for the HEAD probe;
|
|
86
|
+
the returned serve URL never carries auth (cached bytes are served
|
|
87
|
+
without revisiting the origin).
|
|
61
88
|
"""
|
|
62
|
-
return blob_url(server, origin) if is_cached(server, origin, timeout) else None
|
|
89
|
+
return blob_url(server, origin) if is_cached(server, origin, timeout, headers=headers) else None
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""withcache cache-host —
|
|
2
|
+
"""withcache cache-host — a URL-keyed artifact cache.
|
|
3
3
|
|
|
4
4
|
Stdlib only (http.server + sqlite3 + urllib). Serves cached blobs keyed by
|
|
5
|
-
their origin URL.
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
their origin URL. By default a cache miss is auto-fetched: it is recorded in the
|
|
6
|
+
miss table and pulled from origin in the background, so the next request hits
|
|
7
|
+
(the client falls through to origin on the first miss). Run with `--curate` to
|
|
8
|
+
require an operator to approve each pull via a small web UI instead; either way
|
|
9
|
+
you can pre-seed an artifact with the "Add from URI" form.
|
|
9
10
|
|
|
10
11
|
This is the only component that needs internet egress (and any vendor creds).
|
|
11
12
|
Clients never write to it.
|
|
12
13
|
|
|
13
|
-
Auth (
|
|
14
|
+
Auth (single-tenant: env password + signed cookie): the read path
|
|
14
15
|
(`/blob`, `/healthz`) is open so clients never log in; the operator surface
|
|
15
16
|
(`/` and `/admin/*`) is gated behind a server-signed session cookie. Login at
|
|
16
17
|
`POST /ui/login` checks the password in $WITHCACHE_ADMIN_PASSWORD and flips the
|
|
@@ -663,7 +664,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
663
664
|
# falls through on a miss). In --curate mode an operator triggers
|
|
664
665
|
# the pull instead; when the cache is full we record the miss but
|
|
665
666
|
# schedule nothing (delete something first).
|
|
666
|
-
|
|
667
|
+
#
|
|
668
|
+
# Forward the client's ``Authorization`` header into the worker
|
|
669
|
+
# job so a token-gated origin (typical use case: a fresh OCI
|
|
670
|
+
# bearer on a ghcr.io blob URL minted by bty-web at catalog
|
|
671
|
+
# import time) can be fetched. Without this the worker runs
|
|
672
|
+
# anonymous and 401s; the URL stays uncached forever. Keep the
|
|
673
|
+
# allowlist narrow on purpose: ``Authorization`` is the only
|
|
674
|
+
# request header we proxy onto the worker. The ``/admin/fetch``
|
|
675
|
+
# operator endpoint still carries its own ``headers=`` payload
|
|
676
|
+
# for the curated path.
|
|
677
|
+
fwd_headers = None
|
|
678
|
+
auth = self.headers.get("Authorization")
|
|
679
|
+
if auth:
|
|
680
|
+
fwd_headers = {"Authorization": auth}
|
|
681
|
+
self.mgr.enqueue(url, headers=fwd_headers)
|
|
667
682
|
self.send_text(404, "cache miss (recorded)\n")
|
|
668
683
|
return
|
|
669
684
|
path = self.store.blob_path(row["key"])
|
|
@@ -11,6 +11,7 @@ import socketserver
|
|
|
11
11
|
import sys
|
|
12
12
|
import tempfile
|
|
13
13
|
import threading
|
|
14
|
+
import time
|
|
14
15
|
import unittest
|
|
15
16
|
|
|
16
17
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
@@ -482,6 +483,60 @@ class TestFetchWithHeaders(unittest.TestCase):
|
|
|
482
483
|
self.assertEqual(row["size"], len(PAYLOAD))
|
|
483
484
|
|
|
484
485
|
|
|
486
|
+
# --------------------------------------------------------------------------
|
|
487
|
+
# HEAD with an Authorization header should propagate that header into the
|
|
488
|
+
# auto-fetch worker so a 401-gated origin (e.g. a ghcr.io blob URL behind a
|
|
489
|
+
# bty-minted OCI bearer) actually fills. Without this propagation the worker
|
|
490
|
+
# pulls anonymous, the origin 401s, and the URL stays uncached forever.
|
|
491
|
+
# --------------------------------------------------------------------------
|
|
492
|
+
class TestHeadForwardsAuthorizationToAutoFetch(unittest.TestCase):
|
|
493
|
+
def setUp(self):
|
|
494
|
+
self.origin = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
|
|
495
|
+
threading.Thread(target=self.origin.serve_forever, daemon=True).start()
|
|
496
|
+
self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/blob.bin"
|
|
497
|
+
self.httpd, self.store = _start_withcache(auto_fetch=True)
|
|
498
|
+
self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
|
|
499
|
+
|
|
500
|
+
def tearDown(self):
|
|
501
|
+
for s in (self.origin, self.httpd):
|
|
502
|
+
s.shutdown()
|
|
503
|
+
s.server_close()
|
|
504
|
+
|
|
505
|
+
def _wait_for_fill(self, timeout_s=2.0):
|
|
506
|
+
deadline = time.monotonic() + timeout_s
|
|
507
|
+
while time.monotonic() < deadline:
|
|
508
|
+
if self.store.get_blob(self.origin_url) is not None:
|
|
509
|
+
return True
|
|
510
|
+
time.sleep(0.02)
|
|
511
|
+
return False
|
|
512
|
+
|
|
513
|
+
def test_head_with_authorization_triggers_authed_fetch(self):
|
|
514
|
+
bu = _shim.blob_url(self.base, self.origin_url)
|
|
515
|
+
req = urllib.request.Request(bu, method="HEAD")
|
|
516
|
+
req.add_header("Authorization", _AuthOrigin.TOKEN)
|
|
517
|
+
with self.assertRaises(urllib.error.HTTPError) as cm:
|
|
518
|
+
urllib.request.urlopen(req)
|
|
519
|
+
self.assertEqual(cm.exception.code, 404) # miss; recorded + enqueued
|
|
520
|
+
# The worker should have fetched in the background using the header.
|
|
521
|
+
self.assertTrue(
|
|
522
|
+
self._wait_for_fill(),
|
|
523
|
+
"expected blob to be cached after auth-bearing HEAD",
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def test_head_without_authorization_leaves_origin_401_and_cache_empty(self):
|
|
527
|
+
# Negative: no Authorization on the HEAD means the worker is enqueued
|
|
528
|
+
# anonymous, the origin 401s, nothing lands. Verifies the new code
|
|
529
|
+
# path is genuinely opt-in (HEAD without auth keeps the old behaviour).
|
|
530
|
+
bu = _shim.blob_url(self.base, self.origin_url)
|
|
531
|
+
with self.assertRaises(urllib.error.HTTPError) as cm:
|
|
532
|
+
urllib.request.urlopen(urllib.request.Request(bu, method="HEAD"))
|
|
533
|
+
self.assertEqual(cm.exception.code, 404)
|
|
534
|
+
self.assertFalse(
|
|
535
|
+
self._wait_for_fill(timeout_s=0.5),
|
|
536
|
+
"expected no blob without forwarded auth",
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
|
|
485
540
|
# --------------------------------------------------------------------------
|
|
486
541
|
# Pure helpers
|
|
487
542
|
# --------------------------------------------------------------------------
|
|
@@ -539,5 +594,61 @@ class TestClientLibrary(unittest.TestCase):
|
|
|
539
594
|
self.assertFalse(client.is_cached("http://127.0.0.1:9", self.origin_url, timeout=0.5))
|
|
540
595
|
|
|
541
596
|
|
|
597
|
+
# --------------------------------------------------------------------------
|
|
598
|
+
# Client + server end-to-end: a HEAD with ``headers={"Authorization": ...}``
|
|
599
|
+
# warms the cache against a 401-gated origin. Mirrors the bty oras case
|
|
600
|
+
# (resolved ghcr.io blob URL + freshly-minted OCI bearer).
|
|
601
|
+
# --------------------------------------------------------------------------
|
|
602
|
+
class TestClientLibraryAuthForwarding(unittest.TestCase):
|
|
603
|
+
def setUp(self):
|
|
604
|
+
self.origin = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
|
|
605
|
+
threading.Thread(target=self.origin.serve_forever, daemon=True).start()
|
|
606
|
+
self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/blob.bin"
|
|
607
|
+
self.httpd, self.store = _start_withcache(auto_fetch=True)
|
|
608
|
+
self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
|
|
609
|
+
|
|
610
|
+
def tearDown(self):
|
|
611
|
+
for s in (self.origin, self.httpd):
|
|
612
|
+
s.shutdown()
|
|
613
|
+
s.server_close()
|
|
614
|
+
|
|
615
|
+
def test_is_cached_with_authorization_warms_auth_gated_origin(self):
|
|
616
|
+
# Cold: no auth -> background fetch goes anonymous, 401s, cache empty.
|
|
617
|
+
self.assertFalse(client.is_cached(self.base, self.origin_url))
|
|
618
|
+
deadline = time.monotonic() + 0.5
|
|
619
|
+
while time.monotonic() < deadline:
|
|
620
|
+
if self.store.get_blob(self.origin_url) is not None:
|
|
621
|
+
break
|
|
622
|
+
time.sleep(0.02)
|
|
623
|
+
self.assertIsNone(self.store.get_blob(self.origin_url))
|
|
624
|
+
|
|
625
|
+
# Warm-with-token: HEAD carries Authorization; server forwards it
|
|
626
|
+
# into the fetch worker; the auth-gated origin returns the bytes.
|
|
627
|
+
self.assertFalse(
|
|
628
|
+
client.is_cached(
|
|
629
|
+
self.base,
|
|
630
|
+
self.origin_url,
|
|
631
|
+
headers={"Authorization": _AuthOrigin.TOKEN},
|
|
632
|
+
)
|
|
633
|
+
)
|
|
634
|
+
deadline = time.monotonic() + 2.0
|
|
635
|
+
while time.monotonic() < deadline:
|
|
636
|
+
if self.store.get_blob(self.origin_url) is not None:
|
|
637
|
+
break
|
|
638
|
+
time.sleep(0.02)
|
|
639
|
+
self.assertIsNotNone(
|
|
640
|
+
self.store.get_blob(self.origin_url),
|
|
641
|
+
"expected auth-bearing HEAD to fill the cache via forwarded Authorization",
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# And once cached, the auth header is no longer needed: a plain HEAD
|
|
645
|
+
# hits 200, serve_url returns the blob URL without auth.
|
|
646
|
+
self.assertTrue(client.is_cached(self.base, self.origin_url))
|
|
647
|
+
self.assertEqual(
|
|
648
|
+
client.serve_url(self.base, self.origin_url),
|
|
649
|
+
client.blob_url(self.base, self.origin_url),
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
|
|
542
653
|
if __name__ == "__main__":
|
|
543
654
|
unittest.main(verbosity=2)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|