withcache 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {withcache-0.2.0 → withcache-0.4.0}/PKG-INFO +26 -5
- {withcache-0.2.0 → withcache-0.4.0}/README.md +25 -4
- {withcache-0.2.0 → withcache-0.4.0}/deploy/Containerfile +4 -1
- {withcache-0.2.0 → withcache-0.4.0}/shim/build.zig.zon +1 -1
- withcache-0.4.0/src/withcache/__init__.py +17 -0
- withcache-0.4.0/src/withcache/client.py +89 -0
- {withcache-0.2.0 → withcache-0.4.0}/src/withcache/server.py +120 -22
- {withcache-0.2.0 → withcache-0.4.0}/tests/test_withcache.py +229 -1
- withcache-0.2.0/src/withcache/__init__.py +0 -11
- {withcache-0.2.0 → withcache-0.4.0}/.gitignore +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/LICENSE +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/deploy/compose.yml +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/hatch_build.py +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/pyproject.toml +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/shim/build.zig +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/shim/shim.zig +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/src/withcache/_shim.py +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/src/withcache/curlwithcache.py +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/src/withcache/static/htmx.min.js +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/src/withcache/static/pico.min.css +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/src/withcache/wgetwithcache.py +0 -0
- {withcache-0.2.0 → withcache-0.4.0}/tests/test_differential.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: withcache
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
|
|
5
5
|
Project-URL: Homepage, https://github.com/safl/withcache
|
|
6
6
|
Author-email: "Simon A. F. Lund" <safl@safl.dk>
|
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
|
18
18
|
|
|
19
19
|
# withcache
|
|
20
20
|
|
|
21
|
-
[](https://github.com/safl/withcache/actions/workflows/ci.yml)
|
|
21
|
+
[](https://github.com/safl/withcache/actions/workflows/ci-cd.yml)
|
|
22
22
|
[](https://pypi.org/project/withcache/)
|
|
23
23
|
[](LICENSE)
|
|
24
24
|
[](https://ziglang.org)
|
|
@@ -111,8 +111,10 @@ WITHCACHE_ADMIN_PASSWORD=change-me withcache-server --data-dir ./data --port 300
|
|
|
111
111
|
|
|
112
112
|
Data (blobs + `cache.db` + `session-secret`) lives in the `/data` volume (or
|
|
113
113
|
`--data-dir`). Artifacts are immutable per version, so there's no cache
|
|
114
|
-
invalidation. `--workers N` sets the number of concurrent download workers,
|
|
115
|
-
`--curate` switches from auto-fetch to operator-approved pulls
|
|
114
|
+
invalidation. `--workers N` sets the number of concurrent download workers,
|
|
115
|
+
`--curate` switches from auto-fetch to operator-approved pulls, and `--max-bytes`
|
|
116
|
+
(e.g. `50G`) caps the cache: when full it refuses new fills (no auto-eviction),
|
|
117
|
+
and you free space by deleting artifacts in the UI.
|
|
116
118
|
|
|
117
119
|
## Use the shims (transparent `curl` / `wget`)
|
|
118
120
|
|
|
@@ -238,7 +240,7 @@ Notes & limits (all degrade gracefully; worst case is "no caching, curl still wo
|
|
|
238
240
|
`http://withcache-server:3000/` (Pico.css + HTMX, bundled offline) shows:
|
|
239
241
|
- **Misses**: auto-fetched by default, or (under `--curate`) each with **Download** (queues a background pull) and **Dismiss**.
|
|
240
242
|
- **Downloads**: live progress bars, `queued/running/completed/cancelled/failed`, **Cancel**, and **Clear finished**. Downloads run in a background worker pool, not in the request, so large pulls never block, modelled on [bty]'s job managers.
|
|
241
|
-
- **Cached artifacts**: URL, size, **hits** (times served) and **misses** (times requested before it was cached), SHA-256, fetched-at.
|
|
243
|
+
- **Cached artifacts**: URL, size, **hits** (times served) and **misses** (times requested before it was cached), SHA-256, fetched-at, each with **Delete** to free space.
|
|
242
244
|
- **Add from URI**: pre-seed an artifact before anyone misses it.
|
|
243
245
|
|
|
244
246
|
## Auth
|
|
@@ -264,6 +266,25 @@ CDN/presigned URLs (whose tokens change every request) still match by path. Pass
|
|
|
264
266
|
(`.deb`/`.rpm`) are GPG-signed and verified by the client regardless of
|
|
265
267
|
transport, so caching them this way is safe.
|
|
266
268
|
|
|
269
|
+
## Consume from another tool (the client library)
|
|
270
|
+
|
|
271
|
+
A tool that already knows its download URLs (e.g. an installer or a provisioner)
|
|
272
|
+
can prefer the cache without shelling out to a shim or re-implementing the `/b/`
|
|
273
|
+
scheme. `withcache.client` is stdlib-only, so importing it adds no dependencies:
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
from withcache import client
|
|
277
|
+
|
|
278
|
+
# "use the cache when it's warm, the origin otherwise"
|
|
279
|
+
url = client.serve_url("http://cache:3000", origin) or origin
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
`is_cached()` is a graceful `HEAD` (a miss, timeout, or unreachable cache all
|
|
283
|
+
return `False`, so you fall back to the origin), and it doubles as a warm-up:
|
|
284
|
+
the probe records the miss and, in auto-fetch mode, enqueues the fill, so the
|
|
285
|
+
next call flips to the cache. The encoding is shared with the shims and server,
|
|
286
|
+
so consumers stay in lockstep with the cache-host.
|
|
287
|
+
|
|
267
288
|
## Tests
|
|
268
289
|
|
|
269
290
|
```sh
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# withcache
|
|
2
2
|
|
|
3
|
-
[](https://github.com/safl/withcache/actions/workflows/ci.yml)
|
|
3
|
+
[](https://github.com/safl/withcache/actions/workflows/ci-cd.yml)
|
|
4
4
|
[](https://pypi.org/project/withcache/)
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://ziglang.org)
|
|
@@ -93,8 +93,10 @@ WITHCACHE_ADMIN_PASSWORD=change-me withcache-server --data-dir ./data --port 300
|
|
|
93
93
|
|
|
94
94
|
Data (blobs + `cache.db` + `session-secret`) lives in the `/data` volume (or
|
|
95
95
|
`--data-dir`). Artifacts are immutable per version, so there's no cache
|
|
96
|
-
invalidation. `--workers N` sets the number of concurrent download workers,
|
|
97
|
-
`--curate` switches from auto-fetch to operator-approved pulls
|
|
96
|
+
invalidation. `--workers N` sets the number of concurrent download workers,
|
|
97
|
+
`--curate` switches from auto-fetch to operator-approved pulls, and `--max-bytes`
|
|
98
|
+
(e.g. `50G`) caps the cache: when full it refuses new fills (no auto-eviction),
|
|
99
|
+
and you free space by deleting artifacts in the UI.
|
|
98
100
|
|
|
99
101
|
## Use the shims (transparent `curl` / `wget`)
|
|
100
102
|
|
|
@@ -220,7 +222,7 @@ Notes & limits (all degrade gracefully; worst case is "no caching, curl still wo
|
|
|
220
222
|
`http://withcache-server:3000/` (Pico.css + HTMX, bundled offline) shows:
|
|
221
223
|
- **Misses**: auto-fetched by default, or (under `--curate`) each with **Download** (queues a background pull) and **Dismiss**.
|
|
222
224
|
- **Downloads**: live progress bars, `queued/running/completed/cancelled/failed`, **Cancel**, and **Clear finished**. Downloads run in a background worker pool, not in the request, so large pulls never block, modelled on [bty]'s job managers.
|
|
223
|
-
- **Cached artifacts**: URL, size, **hits** (times served) and **misses** (times requested before it was cached), SHA-256, fetched-at.
|
|
225
|
+
- **Cached artifacts**: URL, size, **hits** (times served) and **misses** (times requested before it was cached), SHA-256, fetched-at, each with **Delete** to free space.
|
|
224
226
|
- **Add from URI**: pre-seed an artifact before anyone misses it.
|
|
225
227
|
|
|
226
228
|
## Auth
|
|
@@ -246,6 +248,25 @@ CDN/presigned URLs (whose tokens change every request) still match by path. Pass
|
|
|
246
248
|
(`.deb`/`.rpm`) are GPG-signed and verified by the client regardless of
|
|
247
249
|
transport, so caching them this way is safe.
|
|
248
250
|
|
|
251
|
+
## Consume from another tool (the client library)
|
|
252
|
+
|
|
253
|
+
A tool that already knows its download URLs (e.g. an installer or a provisioner)
|
|
254
|
+
can prefer the cache without shelling out to a shim or re-implementing the `/b/`
|
|
255
|
+
scheme. `withcache.client` is stdlib-only, so importing it adds no dependencies:
|
|
256
|
+
|
|
257
|
+
```python
|
|
258
|
+
from withcache import client
|
|
259
|
+
|
|
260
|
+
# "use the cache when it's warm, the origin otherwise"
|
|
261
|
+
url = client.serve_url("http://cache:3000", origin) or origin
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
`is_cached()` is a graceful `HEAD` (a miss, timeout, or unreachable cache all
|
|
265
|
+
return `False`, so you fall back to the origin), and it doubles as a warm-up:
|
|
266
|
+
the probe records the miss and, in auto-fetch mode, enqueues the fill, so the
|
|
267
|
+
next call flips to the cache. The encoding is shared with the shims and server,
|
|
268
|
+
so consumers stay in lockstep with the cache-host.
|
|
269
|
+
|
|
249
270
|
## Tests
|
|
250
271
|
|
|
251
272
|
```sh
|
|
@@ -4,8 +4,11 @@
|
|
|
4
4
|
FROM python:3.12-slim
|
|
5
5
|
|
|
6
6
|
# Install the package (no third-party deps) to get the withcache-server command.
|
|
7
|
+
# hatch_build.py is the wheel build hook (ships the shims); without it the build
|
|
8
|
+
# fails. No zig in this image, so the shims install as Python launchers, which
|
|
9
|
+
# is fine -- the container only runs withcache-server.
|
|
7
10
|
WORKDIR /app
|
|
8
|
-
COPY pyproject.toml README.md /app/
|
|
11
|
+
COPY pyproject.toml README.md hatch_build.py /app/
|
|
9
12
|
COPY src /app/src
|
|
10
13
|
RUN pip install --no-cache-dir /app
|
|
11
14
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
.name = .withcache_shim,
|
|
3
3
|
// Zig requires a literal here; keep it in lockstep with the project's
|
|
4
4
|
// single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
|
|
5
|
-
.version = "0.
|
|
5
|
+
.version = "0.4.0",
|
|
6
6
|
.fingerprint = 0xd7d96c5ed212ccaa,
|
|
7
7
|
.minimum_zig_version = "0.16.0",
|
|
8
8
|
.paths = .{
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""withcache — operator-curated, URL-keyed artifact cache for a small lab.
|
|
2
|
+
|
|
3
|
+
- ``withcache-server`` (withcache.server:main): the cache-host.
|
|
4
|
+
- ``curlwithcache`` / ``wgetwithcache``: transparent curl/wget shims, shipped
|
|
5
|
+
as a native binary or a Python launcher (see hatch_build.py).
|
|
6
|
+
- ``withcache.client``: a tiny, stdlib-only library for other tools to consume
|
|
7
|
+
a cache-host (build serve URLs, probe what's cached) without re-implementing
|
|
8
|
+
the ``/b/`` URL scheme.
|
|
9
|
+
|
|
10
|
+
All modules are stdlib-only and self-contained.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .client import blob_url, cache_base, is_cached, serve_url
|
|
14
|
+
|
|
15
|
+
__version__ = "0.4.0"
|
|
16
|
+
|
|
17
|
+
__all__ = ["__version__", "blob_url", "cache_base", "is_cached", "serve_url"]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""A tiny client for consuming a withcache cache-host from other tools.
|
|
2
|
+
|
|
3
|
+
Lets a consumer (e.g. bty) point downloads at withcache without re-implementing
|
|
4
|
+
the ``/b/`` URL scheme. Stdlib only, so importing it pulls in no third-party
|
|
5
|
+
dependencies.
|
|
6
|
+
|
|
7
|
+
from withcache import client
|
|
8
|
+
|
|
9
|
+
# "use the cache when it's warm, the origin otherwise"
|
|
10
|
+
url = client.serve_url("http://cache:3000", origin) or origin
|
|
11
|
+
|
|
12
|
+
The ``/b/<urlsafe-b64(origin)>/<basename>`` encoding is shared with the shims
|
|
13
|
+
and the server (one definition in :mod:`withcache._shim`), so consumers stay in
|
|
14
|
+
lockstep with the cache-host automatically.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import urllib.error
|
|
20
|
+
import urllib.request
|
|
21
|
+
|
|
22
|
+
from . import _shim
|
|
23
|
+
|
|
24
|
+
__all__ = ["PROBE_TIMEOUT", "blob_url", "cache_base", "is_cached", "serve_url"]
|
|
25
|
+
|
|
26
|
+
PROBE_TIMEOUT = 3.0 # seconds; never block the caller on a slow/unreachable cache
|
|
27
|
+
|
|
28
|
+
#: Normalize a server value: accepts 'host', 'host:3000', or 'http://host:3000'.
|
|
29
|
+
cache_base = _shim.cache_base
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def blob_url(server: str, origin: str) -> str:
|
|
33
|
+
"""The cache-host serve URL for ``origin``:
|
|
34
|
+
``<server>/b/<urlsafe-b64(origin), unpadded>/<basename>``. The trailing
|
|
35
|
+
basename is cosmetic (so any downloader names the saved file after the
|
|
36
|
+
artifact); the cache keys on the decoded origin URL."""
|
|
37
|
+
return _shim.blob_url(_shim.cache_base(server), origin)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def is_cached(
|
|
41
|
+
server: str,
|
|
42
|
+
origin: str,
|
|
43
|
+
timeout: float = PROBE_TIMEOUT,
|
|
44
|
+
headers: dict[str, str] | None = None,
|
|
45
|
+
) -> bool:
|
|
46
|
+
"""True if the cache-host already holds ``origin`` (a ``HEAD`` on ``/b/``
|
|
47
|
+
returns 200). A miss (404), an unreachable host, a timeout, or any error
|
|
48
|
+
returns False, so a caller can safely fall back to the origin. The HEAD
|
|
49
|
+
also *warms* an auto-fetch cache-host: the miss is recorded and the
|
|
50
|
+
background fill enqueued, so a later probe flips to cached.
|
|
51
|
+
|
|
52
|
+
``headers`` (optional) attaches request headers to the HEAD. The
|
|
53
|
+
cache-host forwards a client-supplied ``Authorization`` into its
|
|
54
|
+
background-fetch worker, so a consumer that has just minted an OCI
|
|
55
|
+
bearer (the typical use case: bty resolving an ``oras://`` catalog
|
|
56
|
+
entry to a ``ghcr.io`` blob URL at import time) can warm the cache
|
|
57
|
+
against that token-gated origin in one probe. Other entries in
|
|
58
|
+
``headers`` round-trip the same way; only ``Authorization`` is
|
|
59
|
+
forwarded into the fetch on the server side.
|
|
60
|
+
"""
|
|
61
|
+
req = urllib.request.Request(blob_url(server, origin), method="HEAD")
|
|
62
|
+
if headers:
|
|
63
|
+
for k, v in headers.items():
|
|
64
|
+
req.add_header(k, v)
|
|
65
|
+
try:
|
|
66
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
67
|
+
return bool(resp.status == 200)
|
|
68
|
+
except urllib.error.HTTPError:
|
|
69
|
+
return False # 404 miss (now recorded + enqueued by the cache-host)
|
|
70
|
+
except (urllib.error.URLError, OSError):
|
|
71
|
+
return False # unreachable / timeout -> caller serves the origin itself
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def serve_url(
|
|
75
|
+
server: str,
|
|
76
|
+
origin: str,
|
|
77
|
+
timeout: float = PROBE_TIMEOUT,
|
|
78
|
+
headers: dict[str, str] | None = None,
|
|
79
|
+
) -> str | None:
|
|
80
|
+
"""The cache-host serve URL for ``origin`` if the cache holds it, else
|
|
81
|
+
``None`` -- the convenience form of "use the cache when warm":
|
|
82
|
+
|
|
83
|
+
url = client.serve_url(cache, origin) or origin
|
|
84
|
+
|
|
85
|
+
``headers`` is passed through to :func:`is_cached` for the HEAD probe;
|
|
86
|
+
the returned serve URL never carries auth (cached bytes are served
|
|
87
|
+
without revisiting the origin).
|
|
88
|
+
"""
|
|
89
|
+
return blob_url(server, origin) if is_cached(server, origin, timeout, headers=headers) else None
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""withcache cache-host —
|
|
2
|
+
"""withcache cache-host — a URL-keyed artifact cache.
|
|
3
3
|
|
|
4
4
|
Stdlib only (http.server + sqlite3 + urllib). Serves cached blobs keyed by
|
|
5
|
-
their origin URL.
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
their origin URL. By default a cache miss is auto-fetched: it is recorded in the
|
|
6
|
+
miss table and pulled from origin in the background, so the next request hits
|
|
7
|
+
(the client falls through to origin on the first miss). Run with `--curate` to
|
|
8
|
+
require an operator to approve each pull via a small web UI instead; either way
|
|
9
|
+
you can pre-seed an artifact with the "Add from URI" form.
|
|
9
10
|
|
|
10
11
|
This is the only component that needs internet egress (and any vendor creds).
|
|
11
12
|
Clients never write to it.
|
|
12
13
|
|
|
13
|
-
Auth (
|
|
14
|
+
Auth (single-tenant: env password + signed cookie): the read path
|
|
14
15
|
(`/blob`, `/healthz`) is open so clients never log in; the operator surface
|
|
15
16
|
(`/` and `/admin/*`) is gated behind a server-signed session cookie. Login at
|
|
16
17
|
`POST /ui/login` checks the password in $WITHCACHE_ADMIN_PASSWORD and flips the
|
|
@@ -60,6 +61,28 @@ def human_size(n: int) -> str:
|
|
|
60
61
|
return f"{n} B"
|
|
61
62
|
|
|
62
63
|
|
|
64
|
+
def parse_size(s: str) -> int:
|
|
65
|
+
"""Parse '0', '1024', '50M', '20G', '1.5T' into bytes (suffixes are 1024-based)."""
|
|
66
|
+
s = str(s).strip()
|
|
67
|
+
if not s:
|
|
68
|
+
return 0
|
|
69
|
+
units = {"K": 1024, "M": 1024**2, "G": 1024**3, "T": 1024**4}
|
|
70
|
+
if s[-1].upper() in units:
|
|
71
|
+
return int(float(s[:-1]) * units[s[-1].upper()])
|
|
72
|
+
return int(s)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def parse_headers(raw: str) -> dict | None:
|
|
76
|
+
"""Parse 'Name: Value' lines (e.g. a registry Authorization header that bty
|
|
77
|
+
pre-resolves for an oras blob) into a dict for the origin fetch; None if empty."""
|
|
78
|
+
out = {}
|
|
79
|
+
for line in (raw or "").splitlines():
|
|
80
|
+
name, sep, value = line.partition(":")
|
|
81
|
+
if sep and name.strip():
|
|
82
|
+
out[name.strip()] = value.strip()
|
|
83
|
+
return out or None
|
|
84
|
+
|
|
85
|
+
|
|
63
86
|
# --------------------------------------------------------------------------
|
|
64
87
|
# Auth — server-signed session cookie (bty-style, env-password instead of PAM)
|
|
65
88
|
# --------------------------------------------------------------------------
|
|
@@ -135,12 +158,13 @@ class Auth:
|
|
|
135
158
|
class Store:
|
|
136
159
|
"""Blobs on disk keyed by hash(normalized url); metadata in SQLite."""
|
|
137
160
|
|
|
138
|
-
def __init__(self, data_dir: str, keep_query: bool):
|
|
161
|
+
def __init__(self, data_dir: str, keep_query: bool, max_bytes: int = 0):
|
|
139
162
|
self.data_dir = os.path.abspath(data_dir)
|
|
140
163
|
self.blob_dir = os.path.join(self.data_dir, "blobs")
|
|
141
164
|
self.tmp_dir = os.path.join(self.data_dir, "tmp")
|
|
142
165
|
self.db_path = os.path.join(self.data_dir, "cache.db")
|
|
143
166
|
self.keep_query = keep_query
|
|
167
|
+
self.max_bytes = max_bytes # cap on total cached bytes; 0 = unlimited
|
|
144
168
|
os.makedirs(self.blob_dir, exist_ok=True)
|
|
145
169
|
os.makedirs(self.tmp_dir, exist_ok=True)
|
|
146
170
|
self._init_db()
|
|
@@ -217,6 +241,15 @@ class Store:
|
|
|
217
241
|
m = c.execute("SELECT COUNT(*) FROM misses").fetchone()[0]
|
|
218
242
|
return b, m
|
|
219
243
|
|
|
244
|
+
def total_size(self) -> int:
|
|
245
|
+
with self.conn() as c:
|
|
246
|
+
return c.execute("SELECT COALESCE(SUM(size), 0) FROM blobs").fetchone()[0]
|
|
247
|
+
|
|
248
|
+
def has_capacity(self) -> bool:
|
|
249
|
+
"""False once stored bytes reach --max-bytes (0 = unlimited). The guard
|
|
250
|
+
refuses *new* fills when full; it never evicts (delete is manual)."""
|
|
251
|
+
return self.max_bytes <= 0 or self.total_size() < self.max_bytes
|
|
252
|
+
|
|
220
253
|
# -- writes ------------------------------------------------------------
|
|
221
254
|
def record_miss(self, url: str):
|
|
222
255
|
key = self.key_of(self.normalize(url))
|
|
@@ -243,17 +276,34 @@ class Store:
|
|
|
243
276
|
with _DB_WRITE_LOCK, self.conn() as c:
|
|
244
277
|
c.execute("DELETE FROM misses WHERE key=?", (key,))
|
|
245
278
|
|
|
246
|
-
def
|
|
279
|
+
def delete_blob(self, key: str):
|
|
280
|
+
"""Drop a cached artifact (row + bytes). The manual half of eviction."""
|
|
281
|
+
with _DB_WRITE_LOCK, self.conn() as c:
|
|
282
|
+
c.execute("DELETE FROM blobs WHERE key=?", (key,))
|
|
283
|
+
try:
|
|
284
|
+
os.remove(self.blob_path(key))
|
|
285
|
+
except FileNotFoundError:
|
|
286
|
+
pass
|
|
287
|
+
|
|
288
|
+
def store_from_origin(self, url: str, progress=None, cancel=None, headers=None) -> sqlite3.Row:
|
|
247
289
|
"""Operator-triggered: pull the artifact from origin and store it.
|
|
248
290
|
|
|
249
291
|
``progress(done, total)`` is called as bytes arrive (total may be None);
|
|
250
292
|
``cancel()`` is polled between chunks and, if truthy, aborts the pull
|
|
251
293
|
with :class:`DownloadCancelled` and leaves no partial file behind.
|
|
294
|
+
``headers`` adds request headers to the origin fetch (e.g. a registry
|
|
295
|
+
bearer token bty pre-resolved for an oras blob). Raises :class:`CacheFull`
|
|
296
|
+
if the cache is already at --max-bytes.
|
|
252
297
|
"""
|
|
298
|
+
if not self.has_capacity():
|
|
299
|
+
raise CacheFull(f"cache full (>= {self.max_bytes} bytes); refusing to fetch {url}")
|
|
253
300
|
normalized = self.normalize(url)
|
|
254
301
|
key = self.key_of(normalized)
|
|
255
302
|
tmp = os.path.join(self.tmp_dir, key + ".part")
|
|
256
|
-
|
|
303
|
+
req_headers = {"User-Agent": USER_AGENT}
|
|
304
|
+
if headers:
|
|
305
|
+
req_headers.update(headers)
|
|
306
|
+
req = urllib.request.Request(url, headers=req_headers)
|
|
257
307
|
sha = hashlib.sha256()
|
|
258
308
|
size = 0
|
|
259
309
|
try:
|
|
@@ -315,6 +365,10 @@ class DownloadCancelled(Exception):
|
|
|
315
365
|
"""Raised inside a worker when its job's cancel flag is set."""
|
|
316
366
|
|
|
317
367
|
|
|
368
|
+
class CacheFull(Exception):
|
|
369
|
+
"""Raised when --max-bytes is reached; the fill is refused, not evicted."""
|
|
370
|
+
|
|
371
|
+
|
|
318
372
|
@dataclass
|
|
319
373
|
class Job:
|
|
320
374
|
id: int
|
|
@@ -326,6 +380,7 @@ class Job:
|
|
|
326
380
|
finished_at: float | None = None
|
|
327
381
|
error: str | None = None
|
|
328
382
|
sha256: str | None = None
|
|
383
|
+
headers: dict | None = field(default=None, repr=False) # e.g. registry auth; never logged
|
|
329
384
|
_cancel: threading.Event = field(default_factory=threading.Event, repr=False)
|
|
330
385
|
|
|
331
386
|
|
|
@@ -345,12 +400,12 @@ class DownloadManager:
|
|
|
345
400
|
for _ in range(max(1, workers)):
|
|
346
401
|
threading.Thread(target=self._worker, daemon=True).start()
|
|
347
402
|
|
|
348
|
-
def enqueue(self, url: str) -> Job:
|
|
403
|
+
def enqueue(self, url: str, headers: dict | None = None) -> Job:
|
|
349
404
|
with self._lock:
|
|
350
405
|
jid = self._active.get(url)
|
|
351
406
|
if jid is not None and self._jobs[jid].status in PENDING_STATES:
|
|
352
407
|
return self._jobs[jid] # dedup an already-pending pull
|
|
353
|
-
job = Job(id=next(self._ids), url=url)
|
|
408
|
+
job = Job(id=next(self._ids), url=url, headers=headers)
|
|
354
409
|
self._jobs[job.id] = job
|
|
355
410
|
self._active[url] = job.id
|
|
356
411
|
self._q.put(job.id)
|
|
@@ -392,6 +447,7 @@ class DownloadManager:
|
|
|
392
447
|
job.url,
|
|
393
448
|
progress=lambda done, total, j=job: _set_progress(j, done, total),
|
|
394
449
|
cancel=job._cancel.is_set,
|
|
450
|
+
headers=job.headers,
|
|
395
451
|
)
|
|
396
452
|
with self._lock:
|
|
397
453
|
job.status = "completed"
|
|
@@ -474,7 +530,13 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
474
530
|
else:
|
|
475
531
|
self.send_text(404, "")
|
|
476
532
|
|
|
477
|
-
ADMIN_POST = (
|
|
533
|
+
ADMIN_POST = (
|
|
534
|
+
"/admin/fetch",
|
|
535
|
+
"/admin/dismiss",
|
|
536
|
+
"/admin/delete",
|
|
537
|
+
"/admin/cancel",
|
|
538
|
+
"/admin/clear",
|
|
539
|
+
)
|
|
478
540
|
|
|
479
541
|
def do_POST(self):
|
|
480
542
|
parsed = urllib.parse.urlsplit(self.path)
|
|
@@ -490,9 +552,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
490
552
|
if parsed.path == "/admin/fetch":
|
|
491
553
|
url = form.get("url", "").strip()
|
|
492
554
|
if url:
|
|
493
|
-
self.mgr.enqueue(url)
|
|
555
|
+
self.mgr.enqueue(url, headers=parse_headers(form.get("header", "")))
|
|
494
556
|
elif parsed.path == "/admin/dismiss":
|
|
495
557
|
self.store.dismiss(form.get("key", "").strip())
|
|
558
|
+
elif parsed.path == "/admin/delete":
|
|
559
|
+
self.store.delete_blob(form.get("key", "").strip())
|
|
496
560
|
elif parsed.path == "/admin/cancel":
|
|
497
561
|
jid = form.get("id", "")
|
|
498
562
|
if jid.isdigit():
|
|
@@ -594,11 +658,27 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
594
658
|
row = self.store.get_blob(url)
|
|
595
659
|
if row is None:
|
|
596
660
|
self.store.record_miss(url)
|
|
597
|
-
if self.auto_fetch:
|
|
661
|
+
if self.auto_fetch and self.store.has_capacity():
|
|
598
662
|
# Pull it in the background so the next request hits; the client
|
|
599
|
-
# gets this one from origin (the shim
|
|
600
|
-
# In --curate mode an operator triggers
|
|
601
|
-
|
|
663
|
+
# gets this one from origin (the shim, or bty's fallback chain,
|
|
664
|
+
# falls through on a miss). In --curate mode an operator triggers
|
|
665
|
+
# the pull instead; when the cache is full we record the miss but
|
|
666
|
+
# schedule nothing (delete something first).
|
|
667
|
+
#
|
|
668
|
+
# Forward the client's ``Authorization`` header into the worker
|
|
669
|
+
# job so a token-gated origin (typical use case: a fresh OCI
|
|
670
|
+
# bearer on a ghcr.io blob URL minted by bty-web at catalog
|
|
671
|
+
# import time) can be fetched. Without this the worker runs
|
|
672
|
+
# anonymous and 401s; the URL stays uncached forever. Keep the
|
|
673
|
+
# allowlist narrow on purpose: ``Authorization`` is the only
|
|
674
|
+
# request header we proxy onto the worker. The ``/admin/fetch``
|
|
675
|
+
# operator endpoint still carries its own ``headers=`` payload
|
|
676
|
+
# for the curated path.
|
|
677
|
+
fwd_headers = None
|
|
678
|
+
auth = self.headers.get("Authorization")
|
|
679
|
+
if auth:
|
|
680
|
+
fwd_headers = {"Authorization": auth}
|
|
681
|
+
self.mgr.enqueue(url, headers=fwd_headers)
|
|
602
682
|
self.send_text(404, "cache miss (recorded)\n")
|
|
603
683
|
return
|
|
604
684
|
path = self.store.blob_path(row["key"])
|
|
@@ -736,6 +816,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
736
816
|
jobs = self.mgr.list()
|
|
737
817
|
misses = self.store.list_misses()
|
|
738
818
|
blobs = self.store.list_blobs()
|
|
819
|
+
used = human_size(self.store.total_size())
|
|
820
|
+
if self.store.max_bytes:
|
|
821
|
+
used += f" / {human_size(self.store.max_bytes)}"
|
|
822
|
+
full = "" if self.store.has_capacity() else " · <strong>cache full</strong>"
|
|
739
823
|
|
|
740
824
|
job_rows = (
|
|
741
825
|
"".join(self._job_row(j) for j in jobs)
|
|
@@ -774,14 +858,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
774
858
|
<td class="num">{b["misses"]}</td>
|
|
775
859
|
<td class="mono">{html.escape(b["sha256"][:12])}…</td>
|
|
776
860
|
<td><small>{html.escape(b["fetched_at"])}</small></td>
|
|
861
|
+
<td>
|
|
862
|
+
<form hx-post="/admin/delete" hx-target="#dash" hx-swap="innerHTML"
|
|
863
|
+
hx-confirm="Delete this cached artifact?">
|
|
864
|
+
<input type="hidden" name="key" value="{html.escape(b["key"], quote=True)}">
|
|
865
|
+
<button type="submit" class="secondary outline">Delete</button>
|
|
866
|
+
</form>
|
|
867
|
+
</td>
|
|
777
868
|
</tr>"""
|
|
778
869
|
for b in blobs
|
|
779
870
|
)
|
|
780
|
-
or '<tr><td colspan="
|
|
871
|
+
or '<tr><td colspan="7"><em>Cache is empty.</em></td></tr>'
|
|
781
872
|
)
|
|
782
873
|
|
|
783
874
|
return f"""
|
|
784
|
-
<p><small>{nblobs} cached · {nmisses} pending miss(es)</small></p>
|
|
875
|
+
<p><small>{nblobs} cached ({used}){full} · {nmisses} pending miss(es)</small></p>
|
|
785
876
|
|
|
786
877
|
<div class="row">
|
|
787
878
|
<h4>Downloads</h4>
|
|
@@ -805,7 +896,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
805
896
|
<figure><table class="striped">
|
|
806
897
|
<thead><tr>
|
|
807
898
|
<th>URL</th><th>Size</th><th class="num">Hits</th><th class="num">Misses</th>
|
|
808
|
-
<th>SHA-256</th><th>Fetched</th>
|
|
899
|
+
<th>SHA-256</th><th>Fetched</th><th>Action</th>
|
|
809
900
|
</tr></thead>
|
|
810
901
|
<tbody>{blob_rows}</tbody>
|
|
811
902
|
</table></figure>"""
|
|
@@ -869,9 +960,15 @@ def main():
|
|
|
869
960
|
help="require an operator to approve each pull (default: auto-fetch a "
|
|
870
961
|
"missed artifact in the background so the next request hits)",
|
|
871
962
|
)
|
|
963
|
+
ap.add_argument(
|
|
964
|
+
"--max-bytes",
|
|
965
|
+
default="0",
|
|
966
|
+
help="cap total cached bytes and refuse new fills when full (0 = "
|
|
967
|
+
"unlimited; accepts 1024-based suffixes, e.g. 50G). Eviction is manual.",
|
|
968
|
+
)
|
|
872
969
|
args = ap.parse_args()
|
|
873
970
|
|
|
874
|
-
store = Store(args.data_dir, keep_query=args.keep_query)
|
|
971
|
+
store = Store(args.data_dir, keep_query=args.keep_query, max_bytes=parse_size(args.max_bytes))
|
|
875
972
|
auth = Auth(resolve_secret(store.data_dir), os.environ.get("WITHCACHE_ADMIN_PASSWORD"))
|
|
876
973
|
mgr = DownloadManager(store, workers=args.workers)
|
|
877
974
|
|
|
@@ -883,7 +980,8 @@ def main():
|
|
|
883
980
|
print(
|
|
884
981
|
f"withcache cache-host on http://{args.host}:{args.port} "
|
|
885
982
|
f"(data={store.data_dir}, keep_query={args.keep_query}, workers={args.workers}, "
|
|
886
|
-
f"mode={'curate' if args.curate else 'auto-fetch'}
|
|
983
|
+
f"mode={'curate' if args.curate else 'auto-fetch'}, "
|
|
984
|
+
f"max_bytes={'unlimited' if not store.max_bytes else human_size(store.max_bytes)})",
|
|
887
985
|
flush=True,
|
|
888
986
|
)
|
|
889
987
|
if not auth.enabled:
|
|
@@ -11,6 +11,7 @@ import socketserver
|
|
|
11
11
|
import sys
|
|
12
12
|
import tempfile
|
|
13
13
|
import threading
|
|
14
|
+
import time
|
|
14
15
|
import unittest
|
|
15
16
|
|
|
16
17
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
@@ -19,7 +20,7 @@ import base64 # noqa: E402
|
|
|
19
20
|
import urllib.error # noqa: E402
|
|
20
21
|
import urllib.request # noqa: E402
|
|
21
22
|
|
|
22
|
-
from withcache import _shim, curlwithcache, server, wgetwithcache # noqa: E402
|
|
23
|
+
from withcache import _shim, client, curlwithcache, server, wgetwithcache # noqa: E402
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
# --------------------------------------------------------------------------
|
|
@@ -148,6 +149,23 @@ class TestStoreFromOrigin(unittest.TestCase):
|
|
|
148
149
|
got = self.store.get_blob(url)
|
|
149
150
|
self.assertEqual((got["hits"], got["misses"]), (2, 2))
|
|
150
151
|
|
|
152
|
+
def test_delete_blob_removes_row_and_file(self):
|
|
153
|
+
url = f"http://127.0.0.1:{self.port}/artifact.bin"
|
|
154
|
+
row = self.store.store_from_origin(url)
|
|
155
|
+
path = self.store.blob_path(row["key"])
|
|
156
|
+
self.assertTrue(os.path.exists(path))
|
|
157
|
+
self.store.delete_blob(row["key"])
|
|
158
|
+
self.assertIsNone(self.store.get_blob(url))
|
|
159
|
+
self.assertFalse(os.path.exists(path))
|
|
160
|
+
|
|
161
|
+
def test_capacity_guard_refuses_new_fills_when_full(self):
|
|
162
|
+
store = server.Store(tempfile.mkdtemp(), keep_query=False, max_bytes=1)
|
|
163
|
+
self.assertTrue(store.has_capacity()) # empty: room for the first
|
|
164
|
+
store.store_from_origin(f"http://127.0.0.1:{self.port}/a.bin")
|
|
165
|
+
self.assertFalse(store.has_capacity()) # now over the 1-byte cap
|
|
166
|
+
with self.assertRaises(server.CacheFull):
|
|
167
|
+
store.store_from_origin(f"http://127.0.0.1:{self.port}/b.bin")
|
|
168
|
+
|
|
151
169
|
|
|
152
170
|
# --------------------------------------------------------------------------
|
|
153
171
|
# _shim: URL detection, rewrite, real-tool resolution, env, path-encoding
|
|
@@ -422,5 +440,215 @@ class TestAutoFetchOnMiss(unittest.TestCase):
|
|
|
422
440
|
httpd.server_close()
|
|
423
441
|
|
|
424
442
|
|
|
443
|
+
# --------------------------------------------------------------------------
|
|
444
|
+
# Fetch-with-headers: a registry blob behind bearer auth (the oras case). bty
|
|
445
|
+
# pre-resolves the token and hands it to withcache for the fill.
|
|
446
|
+
# --------------------------------------------------------------------------
|
|
447
|
+
class _AuthOrigin(http.server.BaseHTTPRequestHandler):
|
|
448
|
+
TOKEN = "Bearer s3cret"
|
|
449
|
+
|
|
450
|
+
def do_GET(self):
|
|
451
|
+
if self.headers.get("Authorization") != self.TOKEN:
|
|
452
|
+
self.send_response(401)
|
|
453
|
+
self.send_header("Content-Length", "0")
|
|
454
|
+
self.end_headers()
|
|
455
|
+
return
|
|
456
|
+
self.send_response(200)
|
|
457
|
+
self.send_header("Content-Length", str(len(PAYLOAD)))
|
|
458
|
+
self.end_headers()
|
|
459
|
+
self.wfile.write(PAYLOAD)
|
|
460
|
+
|
|
461
|
+
def log_message(self, format, *args):
|
|
462
|
+
pass
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
class TestFetchWithHeaders(unittest.TestCase):
|
|
466
|
+
def setUp(self):
|
|
467
|
+
self.httpd = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
|
|
468
|
+
threading.Thread(target=self.httpd.serve_forever, daemon=True).start()
|
|
469
|
+
self.url = f"http://127.0.0.1:{self.httpd.server_address[1]}/blob.bin"
|
|
470
|
+
self.store = server.Store(tempfile.mkdtemp(), keep_query=False)
|
|
471
|
+
|
|
472
|
+
def tearDown(self):
|
|
473
|
+
self.httpd.shutdown()
|
|
474
|
+
self.httpd.server_close()
|
|
475
|
+
|
|
476
|
+
def test_fetch_without_header_is_rejected(self):
|
|
477
|
+
with self.assertRaises(urllib.error.HTTPError) as cm:
|
|
478
|
+
self.store.store_from_origin(self.url)
|
|
479
|
+
self.assertEqual(cm.exception.code, 401)
|
|
480
|
+
|
|
481
|
+
def test_fetch_with_bearer_header_succeeds(self):
|
|
482
|
+
row = self.store.store_from_origin(self.url, headers={"Authorization": _AuthOrigin.TOKEN})
|
|
483
|
+
self.assertEqual(row["size"], len(PAYLOAD))
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
# --------------------------------------------------------------------------
|
|
487
|
+
# HEAD with an Authorization header should propagate that header into the
|
|
488
|
+
# auto-fetch worker so a 401-gated origin (e.g. a ghcr.io blob URL behind a
|
|
489
|
+
# bty-minted OCI bearer) actually fills. Without this propagation the worker
|
|
490
|
+
# pulls anonymous, the origin 401s, and the URL stays uncached forever.
|
|
491
|
+
# --------------------------------------------------------------------------
|
|
492
|
+
class TestHeadForwardsAuthorizationToAutoFetch(unittest.TestCase):
|
|
493
|
+
def setUp(self):
|
|
494
|
+
self.origin = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
|
|
495
|
+
threading.Thread(target=self.origin.serve_forever, daemon=True).start()
|
|
496
|
+
self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/blob.bin"
|
|
497
|
+
self.httpd, self.store = _start_withcache(auto_fetch=True)
|
|
498
|
+
self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
|
|
499
|
+
|
|
500
|
+
def tearDown(self):
|
|
501
|
+
for s in (self.origin, self.httpd):
|
|
502
|
+
s.shutdown()
|
|
503
|
+
s.server_close()
|
|
504
|
+
|
|
505
|
+
def _wait_for_fill(self, timeout_s=2.0):
|
|
506
|
+
deadline = time.monotonic() + timeout_s
|
|
507
|
+
while time.monotonic() < deadline:
|
|
508
|
+
if self.store.get_blob(self.origin_url) is not None:
|
|
509
|
+
return True
|
|
510
|
+
time.sleep(0.02)
|
|
511
|
+
return False
|
|
512
|
+
|
|
513
|
+
def test_head_with_authorization_triggers_authed_fetch(self):
|
|
514
|
+
bu = _shim.blob_url(self.base, self.origin_url)
|
|
515
|
+
req = urllib.request.Request(bu, method="HEAD")
|
|
516
|
+
req.add_header("Authorization", _AuthOrigin.TOKEN)
|
|
517
|
+
with self.assertRaises(urllib.error.HTTPError) as cm:
|
|
518
|
+
urllib.request.urlopen(req)
|
|
519
|
+
self.assertEqual(cm.exception.code, 404) # miss; recorded + enqueued
|
|
520
|
+
# The worker should have fetched in the background using the header.
|
|
521
|
+
self.assertTrue(
|
|
522
|
+
self._wait_for_fill(),
|
|
523
|
+
"expected blob to be cached after auth-bearing HEAD",
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def test_head_without_authorization_leaves_origin_401_and_cache_empty(self):
|
|
527
|
+
# Negative: no Authorization on the HEAD means the worker is enqueued
|
|
528
|
+
# anonymous, the origin 401s, nothing lands. Verifies the new code
|
|
529
|
+
# path is genuinely opt-in (HEAD without auth keeps the old behaviour).
|
|
530
|
+
bu = _shim.blob_url(self.base, self.origin_url)
|
|
531
|
+
with self.assertRaises(urllib.error.HTTPError) as cm:
|
|
532
|
+
urllib.request.urlopen(urllib.request.Request(bu, method="HEAD"))
|
|
533
|
+
self.assertEqual(cm.exception.code, 404)
|
|
534
|
+
self.assertFalse(
|
|
535
|
+
self._wait_for_fill(timeout_s=0.5),
|
|
536
|
+
"expected no blob without forwarded auth",
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
# --------------------------------------------------------------------------
|
|
541
|
+
# Pure helpers
|
|
542
|
+
# --------------------------------------------------------------------------
|
|
543
|
+
class TestParsers(unittest.TestCase):
|
|
544
|
+
def test_parse_size(self):
|
|
545
|
+
self.assertEqual(server.parse_size(""), 0)
|
|
546
|
+
self.assertEqual(server.parse_size("0"), 0)
|
|
547
|
+
self.assertEqual(server.parse_size("1024"), 1024)
|
|
548
|
+
self.assertEqual(server.parse_size("50M"), 50 * 1024**2)
|
|
549
|
+
self.assertEqual(server.parse_size("1.5G"), int(1.5 * 1024**3))
|
|
550
|
+
|
|
551
|
+
def test_parse_headers(self):
|
|
552
|
+
self.assertIsNone(server.parse_headers(""))
|
|
553
|
+
self.assertEqual(
|
|
554
|
+
server.parse_headers("Authorization: Bearer x"), {"Authorization": "Bearer x"}
|
|
555
|
+
)
|
|
556
|
+
self.assertEqual(server.parse_headers("A: 1\nB: 2"), {"A": "1", "B": "2"})
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
# --------------------------------------------------------------------------
|
|
560
|
+
# Client library: what a consumer (e.g. bty) imports instead of reimplementing
|
|
561
|
+
# the /b/ protocol.
|
|
562
|
+
# --------------------------------------------------------------------------
|
|
563
|
+
class TestClientLibrary(unittest.TestCase):
|
|
564
|
+
def setUp(self):
|
|
565
|
+
self.origin = socketserver.TCPServer(("127.0.0.1", 0), _Origin)
|
|
566
|
+
threading.Thread(target=self.origin.serve_forever, daemon=True).start()
|
|
567
|
+
self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/art.bin"
|
|
568
|
+
self.httpd, self.store = _start_withcache()
|
|
569
|
+
self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
|
|
570
|
+
|
|
571
|
+
def tearDown(self):
|
|
572
|
+
for s in (self.origin, self.httpd):
|
|
573
|
+
s.shutdown()
|
|
574
|
+
s.server_close()
|
|
575
|
+
|
|
576
|
+
def test_blob_url_matches_shim_and_normalizes_server(self):
|
|
577
|
+
# accepts a host/host:port/http URL and emits the same /b/ URL as the shim
|
|
578
|
+
self.assertEqual(
|
|
579
|
+
client.blob_url(self.base, self.origin_url),
|
|
580
|
+
_shim.blob_url(_shim.cache_base(self.base), self.origin_url),
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
def test_is_cached_and_serve_url_track_the_cache(self):
|
|
584
|
+
self.assertFalse(client.is_cached(self.base, self.origin_url))
|
|
585
|
+
self.assertIsNone(client.serve_url(self.base, self.origin_url))
|
|
586
|
+
self.store.store_from_origin(self.origin_url) # warm it
|
|
587
|
+
self.assertTrue(client.is_cached(self.base, self.origin_url))
|
|
588
|
+
self.assertEqual(
|
|
589
|
+
client.serve_url(self.base, self.origin_url),
|
|
590
|
+
client.blob_url(self.base, self.origin_url),
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
def test_is_cached_unreachable_is_false(self):
|
|
594
|
+
self.assertFalse(client.is_cached("http://127.0.0.1:9", self.origin_url, timeout=0.5))
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# --------------------------------------------------------------------------
|
|
598
|
+
# Client + server end-to-end: a HEAD with ``headers={"Authorization": ...}``
|
|
599
|
+
# warms the cache against a 401-gated origin. Mirrors the bty oras case
|
|
600
|
+
# (resolved ghcr.io blob URL + freshly-minted OCI bearer).
|
|
601
|
+
# --------------------------------------------------------------------------
|
|
602
|
+
class TestClientLibraryAuthForwarding(unittest.TestCase):
|
|
603
|
+
def setUp(self):
|
|
604
|
+
self.origin = socketserver.TCPServer(("127.0.0.1", 0), _AuthOrigin)
|
|
605
|
+
threading.Thread(target=self.origin.serve_forever, daemon=True).start()
|
|
606
|
+
self.origin_url = f"http://127.0.0.1:{self.origin.server_address[1]}/blob.bin"
|
|
607
|
+
self.httpd, self.store = _start_withcache(auto_fetch=True)
|
|
608
|
+
self.base = f"http://127.0.0.1:{self.httpd.server_address[1]}"
|
|
609
|
+
|
|
610
|
+
def tearDown(self):
|
|
611
|
+
for s in (self.origin, self.httpd):
|
|
612
|
+
s.shutdown()
|
|
613
|
+
s.server_close()
|
|
614
|
+
|
|
615
|
+
def test_is_cached_with_authorization_warms_auth_gated_origin(self):
|
|
616
|
+
# Cold: no auth -> background fetch goes anonymous, 401s, cache empty.
|
|
617
|
+
self.assertFalse(client.is_cached(self.base, self.origin_url))
|
|
618
|
+
deadline = time.monotonic() + 0.5
|
|
619
|
+
while time.monotonic() < deadline:
|
|
620
|
+
if self.store.get_blob(self.origin_url) is not None:
|
|
621
|
+
break
|
|
622
|
+
time.sleep(0.02)
|
|
623
|
+
self.assertIsNone(self.store.get_blob(self.origin_url))
|
|
624
|
+
|
|
625
|
+
# Warm-with-token: HEAD carries Authorization; server forwards it
|
|
626
|
+
# into the fetch worker; the auth-gated origin returns the bytes.
|
|
627
|
+
self.assertFalse(
|
|
628
|
+
client.is_cached(
|
|
629
|
+
self.base,
|
|
630
|
+
self.origin_url,
|
|
631
|
+
headers={"Authorization": _AuthOrigin.TOKEN},
|
|
632
|
+
)
|
|
633
|
+
)
|
|
634
|
+
deadline = time.monotonic() + 2.0
|
|
635
|
+
while time.monotonic() < deadline:
|
|
636
|
+
if self.store.get_blob(self.origin_url) is not None:
|
|
637
|
+
break
|
|
638
|
+
time.sleep(0.02)
|
|
639
|
+
self.assertIsNotNone(
|
|
640
|
+
self.store.get_blob(self.origin_url),
|
|
641
|
+
"expected auth-bearing HEAD to fill the cache via forwarded Authorization",
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# And once cached, the auth header is no longer needed: a plain HEAD
|
|
645
|
+
# hits 200, serve_url returns the blob URL without auth.
|
|
646
|
+
self.assertTrue(client.is_cached(self.base, self.origin_url))
|
|
647
|
+
self.assertEqual(
|
|
648
|
+
client.serve_url(self.base, self.origin_url),
|
|
649
|
+
client.blob_url(self.base, self.origin_url),
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
|
|
425
653
|
if __name__ == "__main__":
|
|
426
654
|
unittest.main(verbosity=2)
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
"""withcache — operator-curated, URL-keyed artifact cache for a small lab.
|
|
2
|
-
|
|
3
|
-
Two console entry points (see pyproject.toml):
|
|
4
|
-
withcache -> withcache.client:main (the cache-aware downloader)
|
|
5
|
-
withcache-server -> withcache.server:main (the cache-host)
|
|
6
|
-
|
|
7
|
-
Both modules are stdlib-only and self-contained, so either file can also be
|
|
8
|
-
copied and run on its own with a plain ``python3``.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
__version__ = "0.2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|