withcache 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: withcache
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
5
5
  Project-URL: Homepage, https://github.com/safl/withcache
6
6
  Author-email: "Simon A. F. Lund" <safl@safl.dk>
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
18
18
 
19
19
  # withcache
20
20
 
21
- [![ci](https://github.com/safl/withcache/actions/workflows/ci.yml/badge.svg)](https://github.com/safl/withcache/actions/workflows/ci.yml)
21
+ [![ci](https://github.com/safl/withcache/actions/workflows/ci-cd.yml/badge.svg)](https://github.com/safl/withcache/actions/workflows/ci-cd.yml)
22
22
  [![PyPI](https://img.shields.io/pypi/v/withcache.svg)](https://pypi.org/project/withcache/)
23
23
  [![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](LICENSE)
24
24
  [![built with Zig](https://img.shields.io/badge/built%20with-Zig%200.16.0-f7a41d.svg)](https://ziglang.org)
@@ -1,6 +1,6 @@
1
1
  # withcache
2
2
 
3
- [![ci](https://github.com/safl/withcache/actions/workflows/ci.yml/badge.svg)](https://github.com/safl/withcache/actions/workflows/ci.yml)
3
+ [![ci](https://github.com/safl/withcache/actions/workflows/ci-cd.yml/badge.svg)](https://github.com/safl/withcache/actions/workflows/ci-cd.yml)
4
4
  [![PyPI](https://img.shields.io/pypi/v/withcache.svg)](https://pypi.org/project/withcache/)
5
5
  [![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](LICENSE)
6
6
  [![built with Zig](https://img.shields.io/badge/built%20with-Zig%200.16.0-f7a41d.svg)](https://ziglang.org)
@@ -2,7 +2,7 @@
2
2
  .name = .withcache_shim,
3
3
  // Zig requires a literal here; keep it in lockstep with the project's
4
4
  // single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
5
- .version = "0.3.0",
5
+ .version = "0.4.0",
6
6
  .fingerprint = 0xd7d96c5ed212ccaa,
7
7
  .minimum_zig_version = "0.16.0",
8
8
  .paths = .{
@@ -12,6 +12,6 @@ All modules are stdlib-only and self-contained.
12
12
 
13
13
  from .client import blob_url, cache_base, is_cached, serve_url
14
14
 
15
- __version__ = "0.3.0"
15
+ __version__ = "0.4.0"
16
16
 
17
17
  __all__ = ["__version__", "blob_url", "cache_base", "is_cached", "serve_url"]
@@ -37,13 +37,31 @@ def blob_url(server: str, origin: str) -> str:
37
37
  return _shim.blob_url(_shim.cache_base(server), origin)
38
38
 
39
39
 
40
- def is_cached(server: str, origin: str, timeout: float = PROBE_TIMEOUT) -> bool:
40
+ def is_cached(
41
+ server: str,
42
+ origin: str,
43
+ timeout: float = PROBE_TIMEOUT,
44
+ headers: dict[str, str] | None = None,
45
+ ) -> bool:
41
46
  """True if the cache-host already holds ``origin`` (a ``HEAD`` on ``/b/``
42
47
  returns 200). A miss (404), an unreachable host, a timeout, or any error
43
48
  returns False, so a caller can safely fall back to the origin. The HEAD
44
49
  also *warms* an auto-fetch cache-host: the miss is recorded and the
45
- background fill enqueued, so a later probe flips to cached."""
50
+ background fill enqueued, so a later probe flips to cached.
51
+
52
+ ``headers`` (optional) attaches request headers to the HEAD. The
53
+ cache-host forwards a client-supplied ``Authorization`` into its
54
+ background-fetch worker, so a consumer that has just minted an OCI
55
+ bearer (the typical use case: bty resolving an ``oras://`` catalog
56
+ entry to a ``ghcr.io`` blob URL at import time) can warm the cache
57
+ against that token-gated origin in one probe. Other entries in
58
+ ``headers`` round-trip the same way; only ``Authorization`` is
59
+ forwarded into the fetch on the server side.
60
+ """
46
61
  req = urllib.request.Request(blob_url(server, origin), method="HEAD")
62
+ if headers:
63
+ for k, v in headers.items():
64
+ req.add_header(k, v)
47
65
  try:
48
66
  with urllib.request.urlopen(req, timeout=timeout) as resp:
49
67
  return bool(resp.status == 200)
@@ -53,10 +71,19 @@ def is_cached(server: str, origin: str, timeout: float = PROBE_TIMEOUT) -> bool:
53
71
  return False # unreachable / timeout -> caller serves the origin itself
54
72
 
55
73
 
56
- def serve_url(server: str, origin: str, timeout: float = PROBE_TIMEOUT) -> str | None:
74
+ def serve_url(
75
+ server: str,
76
+ origin: str,
77
+ timeout: float = PROBE_TIMEOUT,
78
+ headers: dict[str, str] | None = None,
79
+ ) -> str | None:
57
80
  """The cache-host serve URL for ``origin`` if the cache holds it, else
58
81
  ``None`` -- the convenience form of "use the cache when warm":
59
82
 
60
83
  url = client.serve_url(cache, origin) or origin
84
+
85
+ ``headers`` is passed through to :func:`is_cached` for the HEAD probe;
86
+ the returned serve URL never carries auth (cached bytes are served
87
+ without revisiting the origin).
61
88
  """
62
- return blob_url(server, origin) if is_cached(server, origin, timeout) else None
89
+ return blob_url(server, origin) if is_cached(server, origin, timeout, headers=headers) else None
@@ -1,16 +1,17 @@
1
1
  #!/usr/bin/env python3
2
- """withcache cache-host — an operator-curated artifact cache.
2
+ """withcache cache-host — a URL-keyed artifact cache.
3
3
 
4
4
  Stdlib only (http.server + sqlite3 + urllib). Serves cached blobs keyed by
5
- their origin URL. A cache miss is *not* fetched automatically: it is recorded
6
- in a miss table so an operator can review it and press "Download", at which
7
- point the cache-host pulls the artifact from origin and stores it. There is
8
- also an "add from URI" form to pre-seed an artifact before anyone misses it.
5
+ their origin URL. By default a cache miss is auto-fetched: it is recorded in the
6
+ miss table and pulled from origin in the background, so the next request hits
7
+ (the client falls through to origin on the first miss). Run with `--curate` to
8
+ require an operator to approve each pull via a small web UI instead; either way
9
+ you can pre-seed an artifact with the "Add from URI" form.
9
10
 
10
11
  This is the only component that needs internet egress (and any vendor creds).
11
12
  Clients never write to it.
12
13
 
13
- Auth (modelled on bty's single-tenant approach, minus PAM): the read path
14
+ Auth (single-tenant: env password + signed cookie): the read path
14
15
  (`/blob`, `/healthz`) is open so clients never log in; the operator surface
15
16
  (`/` and `/admin/*`) is gated behind a server-signed session cookie. Login at
16
17
  `POST /ui/login` checks the password in $WITHCACHE_ADMIN_PASSWORD and flips the
@@ -663,7 +664,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
663
664
  # falls through on a miss). In --curate mode an operator triggers
664
665
  # the pull instead; when the cache is full we record the miss but
665
666
  # schedule nothing (delete something first).
666
- self.mgr.enqueue(url)
667
+ #
668
+ # Forward the client's ``Authorization`` header into the worker
669
+ # job so a token-gated origin (typical use case: a fresh OCI
670
+ # bearer on a ghcr.io blob URL minted by bty-web at catalog
671
+ # import time) can be fetched. Without this the worker runs
672
+ # anonymous and 401s; the URL stays uncached forever. Keep the
673
+ # allowlist narrow on purpose: ``Authorization`` is the only
674
+ # request header we proxy onto the worker. The ``/admin/fetch``
675
+ # operator endpoint still carries its own ``headers=`` payload
676
+ # for the curated path.
677
+ fwd_headers = None
678
+ auth = self.headers.get("Authorization")
679
+ if auth:
680
+ fwd_headers = {"Authorization": auth}
681
+ self.mgr.enqueue(url, headers=fwd_headers)
667
682
  self.send_text(404, "cache miss (recorded)\n")
668
683
  return
669
684
  path = self.store.blob_path(row["key"])
@@ -11,6 +11,7 @@ import socketserver
11
11
  import sys
12
12
  import tempfile
13
13
  import threading
14
+ import time
14
15
  import unittest
15
16
 
16
17
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
@@ -482,6 +483,60 @@ class TestFetchWithHeaders(unittest.TestCase):
482
483
  self.assertEqual(row["size"], len(PAYLOAD))
483
484
 
484
485
 
486
+ # --------------------------------------------------------------------------
487
+ # HEAD with an Authorization header should propagate that header into the
488
+ # auto-fetch worker so a 401-gated origin (e.g. a ghcr.io blob URL behind a
489
+ # bty-minted OCI bearer) actually fills. Without this propagation the worker
490
+ # pulls anonymous, the origin 401s, and the URL stays uncached forever.
491
+ # --------------------------------------------------------------------------
492
+ class TestHeadForwardsAuthorizationToAutoFetch(unittest.TestCase):
493
+ def setUp(self):
494
+ self.origin = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
495
+ threading.Thread(target=self.origin.serve_forever, daemon=True).start()
496
+ self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/blob.bin"
497
+ self.httpd, self.store = _start_withcache(auto_fetch=True)
498
+ self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
499
+
500
+ def tearDown(self):
501
+ for s in (self.origin, self.httpd):
502
+ s.shutdown()
503
+ s.server_close()
504
+
505
+ def _wait_for_fill(self, timeout_s=2.0):
506
+ deadline = time.monotonic() + timeout_s
507
+ while time.monotonic() < deadline:
508
+ if self.store.get_blob(self.origin_url) is not None:
509
+ return True
510
+ time.sleep(0.02)
511
+ return False
512
+
513
+ def test_head_with_authorization_triggers_authed_fetch(self):
514
+ bu = _shim.blob_url(self.base, self.origin_url)
515
+ req = urllib.request.Request(bu, method="HEAD")
516
+ req.add_header("Authorization", _AuthOrigin.TOKEN)
517
+ with self.assertRaises(urllib.error.HTTPError) as cm:
518
+ urllib.request.urlopen(req)
519
+ self.assertEqual(cm.exception.code, 404) # miss; recorded + enqueued
520
+ # The worker should have fetched in the background using the header.
521
+ self.assertTrue(
522
+ self._wait_for_fill(),
523
+ "expected blob to be cached after auth-bearing HEAD",
524
+ )
525
+
526
+ def test_head_without_authorization_leaves_origin_401_and_cache_empty(self):
527
+ # Negative: no Authorization on the HEAD means the worker is enqueued
528
+ # anonymous, the origin 401s, nothing lands. Verifies the new code
529
+ # path is genuinely opt-in (HEAD without auth keeps the old behaviour).
530
+ bu = _shim.blob_url(self.base, self.origin_url)
531
+ with self.assertRaises(urllib.error.HTTPError) as cm:
532
+ urllib.request.urlopen(urllib.request.Request(bu, method="HEAD"))
533
+ self.assertEqual(cm.exception.code, 404)
534
+ self.assertFalse(
535
+ self._wait_for_fill(timeout_s=0.5),
536
+ "expected no blob without forwarded auth",
537
+ )
538
+
539
+
485
540
  # --------------------------------------------------------------------------
486
541
  # Pure helpers
487
542
  # --------------------------------------------------------------------------
@@ -539,5 +594,61 @@ class TestClientLibrary(unittest.TestCase):
539
594
  self.assertFalse(client.is_cached("http://127.0.0.1:9", self.origin_url, timeout=0.5))
540
595
 
541
596
 
597
+ # --------------------------------------------------------------------------
598
+ # Client + server end-to-end: a HEAD with ``headers={"Authorization": ...}``
599
+ # warms the cache against a 401-gated origin. Mirrors the bty oras case
600
+ # (resolved ghcr.io blob URL + freshly-minted OCI bearer).
601
+ # --------------------------------------------------------------------------
602
+ class TestClientLibraryAuthForwarding(unittest.TestCase):
603
+ def setUp(self):
604
+ self.origin = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
605
+ threading.Thread(target=self.origin.serve_forever, daemon=True).start()
606
+ self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/blob.bin"
607
+ self.httpd, self.store = _start_withcache(auto_fetch=True)
608
+ self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
609
+
610
+ def tearDown(self):
611
+ for s in (self.origin, self.httpd):
612
+ s.shutdown()
613
+ s.server_close()
614
+
615
+ def test_is_cached_with_authorization_warms_auth_gated_origin(self):
616
+ # Cold: no auth -> background fetch goes anonymous, 401s, cache empty.
617
+ self.assertFalse(client.is_cached(self.base, self.origin_url))
618
+ deadline = time.monotonic() + 0.5
619
+ while time.monotonic() < deadline:
620
+ if self.store.get_blob(self.origin_url) is not None:
621
+ break
622
+ time.sleep(0.02)
623
+ self.assertIsNone(self.store.get_blob(self.origin_url))
624
+
625
+ # Warm-with-token: HEAD carries Authorization; server forwards it
626
+ # into the fetch worker; the auth-gated origin returns the bytes.
627
+ self.assertFalse(
628
+ client.is_cached(
629
+ self.base,
630
+ self.origin_url,
631
+ headers={"Authorization": _AuthOrigin.TOKEN},
632
+ )
633
+ )
634
+ deadline = time.monotonic() + 2.0
635
+ while time.monotonic() < deadline:
636
+ if self.store.get_blob(self.origin_url) is not None:
637
+ break
638
+ time.sleep(0.02)
639
+ self.assertIsNotNone(
640
+ self.store.get_blob(self.origin_url),
641
+ "expected auth-bearing HEAD to fill the cache via forwarded Authorization",
642
+ )
643
+
644
+ # And once cached, the auth header is no longer needed: a plain HEAD
645
+ # hits 200, serve_url returns the blob URL without auth.
646
+ self.assertTrue(client.is_cached(self.base, self.origin_url))
647
+ self.assertEqual(
648
+ client.serve_url(self.base, self.origin_url),
649
+ client.blob_url(self.base, self.origin_url),
650
+ )
651
+
652
+
542
653
  if __name__ == "__main__":
543
654
  unittest.main(verbosity=2)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes