withcache 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
withcache/server.py ADDED
@@ -0,0 +1,901 @@
1
+ #!/usr/bin/env python3
2
+ """withcache cache-host — an operator-curated artifact cache.
3
+
4
+ Stdlib only (http.server + sqlite3 + urllib). Serves cached blobs keyed by
5
+ their origin URL. A cache miss is *not* fetched automatically: it is recorded
6
+ in a miss table so an operator can review it and press "Download", at which
7
+ point the cache-host pulls the artifact from origin and stores it. There is
8
+ also an "add from URI" form to pre-seed an artifact before anyone misses it.
9
+
10
+ This is the only component that needs internet egress (and any vendor creds).
11
+ Clients never write to it.
12
+
13
+ Auth (modelled on bty's single-tenant approach, minus PAM): the read path
14
+ (`/blob`, `/healthz`) is open so clients never log in; the operator surface
15
+ (`/` and `/admin/*`) is gated behind a server-signed session cookie. Login at
16
+ `POST /ui/login` checks the password in $WITHCACHE_ADMIN_PASSWORD and flips the
17
+ cookie to authenticated; the cookie is HMAC-signed with a secret read from
18
+ $WITHCACHE_SESSION_SECRET or persisted to ``<data-dir>/session-secret``. If no
19
+ admin password is set, the operator UI is left open (with a startup warning).
20
+ """
21
+
22
+ import argparse
23
+ import base64
24
+ import hashlib
25
+ import hmac
26
+ import html
27
+ import http.cookies
28
+ import http.server
29
+ import itertools
30
+ import json
31
+ import os
32
+ import queue
33
+ import secrets
34
+ import socketserver
35
+ import sqlite3
36
+ import threading
37
+ import time
38
+ import urllib.parse
39
+ import urllib.request
40
+ from dataclasses import dataclass, field
41
+ from datetime import datetime, timezone
42
+
43
+ CHUNK = 64 * 1024
44
+ USER_AGENT = "withcache-cache/0.1"
45
+ STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
46
+ MIME_TYPES = {".css": "text/css; charset=utf-8", ".js": "application/javascript; charset=utf-8"}
47
+ _DB_WRITE_LOCK = threading.Lock()
48
+
49
+
50
+ def now_iso() -> str:
51
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
52
+
53
+
54
+ def human_size(n: int) -> str:
55
+ f = float(n)
56
+ for unit in ("B", "KiB", "MiB", "GiB", "TiB"):
57
+ if f < 1024 or unit == "TiB":
58
+ return f"{f:.0f} {unit}" if unit == "B" else f"{f:.1f} {unit}"
59
+ f /= 1024
60
+ return f"{n} B"
61
+
62
+
63
+ # --------------------------------------------------------------------------
64
+ # Auth — server-signed session cookie (bty-style, env-password instead of PAM)
65
+ # --------------------------------------------------------------------------
66
+ def _b64e(raw: bytes) -> str:
67
+ return base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
68
+
69
+
70
+ def _b64d(s: str) -> bytes:
71
+ return base64.urlsafe_b64decode(s + "=" * (-len(s) % 4))
72
+
73
+
74
+ def resolve_secret(data_dir: str) -> bytes:
75
+ """$WITHCACHE_SESSION_SECRET if set + non-empty, else a random key persisted
76
+ to <data-dir>/session-secret so cookies survive restarts. Mirrors bty's
77
+ _resolve_secret_key: a blank env value must NOT silently weaken signing."""
78
+ env = (os.environ.get("WITHCACHE_SESSION_SECRET") or "").strip()
79
+ if env:
80
+ return env.encode("utf-8")
81
+ path = os.path.join(data_dir, "session-secret")
82
+ if os.path.exists(path):
83
+ with open(path, "rb") as f:
84
+ data = f.read().strip()
85
+ if data:
86
+ return data
87
+ secret = secrets.token_hex(32).encode("ascii")
88
+ fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
89
+ with os.fdopen(fd, "wb") as f:
90
+ f.write(secret)
91
+ return secret
92
+
93
+
94
+ class Auth:
95
+ COOKIE = "withcache-token"
96
+ MAX_AGE = 7 * 24 * 3600 # cookie lifetime, seconds
97
+
98
+ def __init__(self, secret: bytes, password: str | None):
99
+ self.secret = secret
100
+ self.password = password or None
101
+
102
+ @property
103
+ def enabled(self) -> bool:
104
+ return self.password is not None
105
+
106
+ def _sign(self, payload_b64: str) -> str:
107
+ mac = hmac.new(self.secret, payload_b64.encode("ascii"), hashlib.sha256)
108
+ return _b64e(mac.digest())
109
+
110
+ def make_token(self) -> str:
111
+ payload = _b64e(json.dumps({"a": 1, "iat": int(time.time())}).encode())
112
+ return f"{payload}.{self._sign(payload)}"
113
+
114
+ def valid(self, token: str) -> bool:
115
+ try:
116
+ payload, sig = token.split(".", 1)
117
+ if not hmac.compare_digest(sig, self._sign(payload)):
118
+ return False
119
+ data = json.loads(_b64d(payload))
120
+ if int(time.time()) - int(data.get("iat", 0)) > self.MAX_AGE:
121
+ return False
122
+ return bool(data.get("a"))
123
+ except Exception:
124
+ return False
125
+
126
+ def check_password(self, pw: str) -> bool:
127
+ if not self.password:
128
+ return False
129
+ return hmac.compare_digest(pw.encode("utf-8"), self.password.encode("utf-8"))
130
+
131
+
132
+ # --------------------------------------------------------------------------
133
+ # Storage
134
+ # --------------------------------------------------------------------------
135
+ class Store:
136
+ """Blobs on disk keyed by hash(normalized url); metadata in SQLite."""
137
+
138
+ def __init__(self, data_dir: str, keep_query: bool):
139
+ self.data_dir = os.path.abspath(data_dir)
140
+ self.blob_dir = os.path.join(self.data_dir, "blobs")
141
+ self.tmp_dir = os.path.join(self.data_dir, "tmp")
142
+ self.db_path = os.path.join(self.data_dir, "cache.db")
143
+ self.keep_query = keep_query
144
+ os.makedirs(self.blob_dir, exist_ok=True)
145
+ os.makedirs(self.tmp_dir, exist_ok=True)
146
+ self._init_db()
147
+
148
+ def conn(self) -> sqlite3.Connection:
149
+ c = sqlite3.connect(self.db_path, timeout=30)
150
+ c.row_factory = sqlite3.Row
151
+ return c
152
+
153
+ def _init_db(self):
154
+ with self.conn() as c:
155
+ c.executescript(
156
+ """
157
+ CREATE TABLE IF NOT EXISTS blobs (
158
+ key TEXT PRIMARY KEY,
159
+ url TEXT NOT NULL,
160
+ size INTEGER NOT NULL,
161
+ sha256 TEXT NOT NULL,
162
+ content_type TEXT,
163
+ fetched_at TEXT NOT NULL,
164
+ hits INTEGER NOT NULL DEFAULT 0,
165
+ misses INTEGER NOT NULL DEFAULT 0
166
+ );
167
+ CREATE TABLE IF NOT EXISTS misses (
168
+ key TEXT PRIMARY KEY,
169
+ url TEXT NOT NULL,
170
+ count INTEGER NOT NULL,
171
+ first_seen TEXT NOT NULL,
172
+ last_seen TEXT NOT NULL
173
+ );
174
+ """
175
+ )
176
+ # Migrate DBs created before the per-blob request counters existed.
177
+ cols = {r["name"] for r in c.execute("PRAGMA table_info(blobs)")}
178
+ for col in ("hits", "misses"):
179
+ if col not in cols:
180
+ c.execute(f"ALTER TABLE blobs ADD COLUMN {col} INTEGER NOT NULL DEFAULT 0")
181
+
182
+ # -- key handling ------------------------------------------------------
183
+ def normalize(self, url: str) -> str:
184
+ p = urllib.parse.urlsplit(url)
185
+ base = f"{p.scheme.lower()}://{p.netloc.lower()}{p.path}"
186
+ if self.keep_query and p.query:
187
+ return f"{base}?{p.query}"
188
+ return base
189
+
190
+ @staticmethod
191
+ def key_of(normalized: str) -> str:
192
+ return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
193
+
194
+ def blob_path(self, key: str) -> str:
195
+ return os.path.join(self.blob_dir, key)
196
+
197
+ # -- reads -------------------------------------------------------------
198
+ def get_blob(self, url: str):
199
+ key = self.key_of(self.normalize(url))
200
+ with self.conn() as c:
201
+ row = c.execute("SELECT * FROM blobs WHERE key=?", (key,)).fetchone()
202
+ if row and os.path.exists(self.blob_path(key)):
203
+ return row
204
+ return None
205
+
206
+ def list_blobs(self):
207
+ with self.conn() as c:
208
+ return c.execute("SELECT * FROM blobs ORDER BY fetched_at DESC").fetchall()
209
+
210
+ def list_misses(self):
211
+ with self.conn() as c:
212
+ return c.execute("SELECT * FROM misses ORDER BY last_seen DESC").fetchall()
213
+
214
+ def counts(self):
215
+ with self.conn() as c:
216
+ b = c.execute("SELECT COUNT(*) FROM blobs").fetchone()[0]
217
+ m = c.execute("SELECT COUNT(*) FROM misses").fetchone()[0]
218
+ return b, m
219
+
220
+ # -- writes ------------------------------------------------------------
221
+ def record_miss(self, url: str):
222
+ key = self.key_of(self.normalize(url))
223
+ ts = now_iso()
224
+ with _DB_WRITE_LOCK, self.conn() as c:
225
+ c.execute(
226
+ """
227
+ INSERT INTO misses (key, url, count, first_seen, last_seen)
228
+ VALUES (?, ?, 1, ?, ?)
229
+ ON CONFLICT(key) DO UPDATE SET
230
+ count = count + 1,
231
+ last_seen = excluded.last_seen,
232
+ url = excluded.url
233
+ """,
234
+ (key, url, ts, ts),
235
+ )
236
+
237
+ def record_hit(self, key: str):
238
+ """Count one cache-served download (the GET, not the shim's HEAD probe)."""
239
+ with _DB_WRITE_LOCK, self.conn() as c:
240
+ c.execute("UPDATE blobs SET hits = hits + 1 WHERE key=?", (key,))
241
+
242
+ def dismiss(self, key: str):
243
+ with _DB_WRITE_LOCK, self.conn() as c:
244
+ c.execute("DELETE FROM misses WHERE key=?", (key,))
245
+
246
+ def store_from_origin(self, url: str, progress=None, cancel=None) -> sqlite3.Row:
247
+ """Operator-triggered: pull the artifact from origin and store it.
248
+
249
+ ``progress(done, total)`` is called as bytes arrive (total may be None);
250
+ ``cancel()`` is polled between chunks and, if truthy, aborts the pull
251
+ with :class:`DownloadCancelled` and leaves no partial file behind.
252
+ """
253
+ normalized = self.normalize(url)
254
+ key = self.key_of(normalized)
255
+ tmp = os.path.join(self.tmp_dir, key + ".part")
256
+ req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
257
+ sha = hashlib.sha256()
258
+ size = 0
259
+ try:
260
+ with urllib.request.urlopen(req, timeout=120) as resp:
261
+ content_type = resp.headers.get_content_type()
262
+ cl = resp.headers.get("Content-Length")
263
+ total = int(cl) if cl and cl.isdigit() else None
264
+ if progress:
265
+ progress(0, total)
266
+ with open(tmp, "wb") as f:
267
+ while True:
268
+ if cancel and cancel():
269
+ raise DownloadCancelled()
270
+ chunk = resp.read(CHUNK)
271
+ if not chunk:
272
+ break
273
+ f.write(chunk)
274
+ sha.update(chunk)
275
+ size += len(chunk)
276
+ if progress:
277
+ progress(size, total)
278
+ os.replace(tmp, self.blob_path(key))
279
+ except BaseException:
280
+ if os.path.exists(tmp):
281
+ os.remove(tmp) # no half-written blob on cancel/error
282
+ raise
283
+ ts = now_iso()
284
+ with _DB_WRITE_LOCK, self.conn() as c:
285
+ # Carry the miss count accumulated while uncached onto the blob, so a
286
+ # URL's total request history (misses-before-cached + hits-since)
287
+ # survives the miss->cached transition. hits are preserved on a
288
+ # re-download (not in the UPDATE set).
289
+ row = c.execute("SELECT count FROM misses WHERE key=?", (key,)).fetchone()
290
+ prior_misses = row["count"] if row else 0
291
+ c.execute(
292
+ """
293
+ INSERT INTO blobs (key, url, size, sha256, content_type, fetched_at, hits, misses)
294
+ VALUES (?, ?, ?, ?, ?, ?, 0, ?)
295
+ ON CONFLICT(key) DO UPDATE SET
296
+ url = excluded.url, size = excluded.size,
297
+ sha256 = excluded.sha256, content_type = excluded.content_type,
298
+ fetched_at = excluded.fetched_at,
299
+ misses = blobs.misses + excluded.misses
300
+ """,
301
+ (key, url, size, sha.hexdigest(), content_type, ts, prior_misses),
302
+ )
303
+ c.execute("DELETE FROM misses WHERE key=?", (key,))
304
+ return c.execute("SELECT * FROM blobs WHERE key=?", (key,)).fetchone()
305
+
306
+
307
+ # --------------------------------------------------------------------------
308
+ # Background download manager (thread pool; modelled on bty's job managers)
309
+ # --------------------------------------------------------------------------
310
+ JOB_STATES = ("queued", "running", "completed", "cancelled", "failed")
311
+ PENDING_STATES = frozenset(("queued", "running"))
312
+
313
+
314
+ class DownloadCancelled(Exception):
315
+ """Raised inside a worker when its job's cancel flag is set."""
316
+
317
+
318
+ @dataclass
319
+ class Job:
320
+ id: int
321
+ url: str
322
+ status: str = "queued"
323
+ bytes_done: int = 0
324
+ bytes_total: int | None = None
325
+ started_at: float | None = None
326
+ finished_at: float | None = None
327
+ error: str | None = None
328
+ sha256: str | None = None
329
+ _cancel: threading.Event = field(default_factory=threading.Event, repr=False)
330
+
331
+
332
+ class DownloadManager:
333
+ """Operator-triggered downloads run here, not in the request handler:
334
+ enqueue() returns immediately, worker threads pull from a queue, each job
335
+ reports progress and honors a per-job cancel flag. Jobs are in-memory
336
+ (completed artifacts persist as blobs); restarting drops in-flight jobs."""
337
+
338
+ def __init__(self, store: Store, workers: int = 2):
339
+ self.store = store
340
+ self._jobs: dict[int, Job] = {}
341
+ self._active: dict[str, int] = {} # url -> job id, while queued/running
342
+ self._lock = threading.Lock()
343
+ self._q: queue.Queue[int] = queue.Queue()
344
+ self._ids = itertools.count(1)
345
+ for _ in range(max(1, workers)):
346
+ threading.Thread(target=self._worker, daemon=True).start()
347
+
348
+ def enqueue(self, url: str) -> Job:
349
+ with self._lock:
350
+ jid = self._active.get(url)
351
+ if jid is not None and self._jobs[jid].status in PENDING_STATES:
352
+ return self._jobs[jid] # dedup an already-pending pull
353
+ job = Job(id=next(self._ids), url=url)
354
+ self._jobs[job.id] = job
355
+ self._active[url] = job.id
356
+ self._q.put(job.id)
357
+ return job
358
+
359
+ def cancel(self, jid: int) -> Job | None:
360
+ with self._lock:
361
+ job = self._jobs.get(jid)
362
+ if job is None:
363
+ return None
364
+ if job.status in PENDING_STATES:
365
+ job._cancel.set()
366
+ if job.status == "queued": # never started: terminate now
367
+ job.status = "cancelled"
368
+ job.finished_at = time.time()
369
+ self._active.pop(job.url, None)
370
+ return job
371
+
372
+ def list(self) -> list[Job]:
373
+ with self._lock:
374
+ return sorted(self._jobs.values(), key=lambda j: j.id, reverse=True)
375
+
376
+ def clear_finished(self):
377
+ with self._lock:
378
+ for jid in [j.id for j in self._jobs.values() if j.status not in PENDING_STATES]:
379
+ self._jobs.pop(jid, None)
380
+
381
+ def _worker(self):
382
+ while True:
383
+ jid = self._q.get()
384
+ with self._lock:
385
+ job = self._jobs.get(jid)
386
+ if job is None or job.status != "queued":
387
+ continue # cancelled while queued, or gone
388
+ job.status = "running"
389
+ job.started_at = time.time()
390
+ try:
391
+ row = self.store.store_from_origin(
392
+ job.url,
393
+ progress=lambda done, total, j=job: _set_progress(j, done, total),
394
+ cancel=job._cancel.is_set,
395
+ )
396
+ with self._lock:
397
+ job.status = "completed"
398
+ job.sha256 = row["sha256"]
399
+ job.bytes_done = job.bytes_total = row["size"]
400
+ except DownloadCancelled:
401
+ with self._lock:
402
+ job.status = "cancelled"
403
+ except Exception as e:
404
+ with self._lock:
405
+ job.status = "cancelled" if job._cancel.is_set() else "failed"
406
+ job.error = str(e)
407
+ finally:
408
+ with self._lock:
409
+ job.finished_at = time.time()
410
+ if self._active.get(job.url) == job.id:
411
+ self._active.pop(job.url, None)
412
+
413
+
414
+ def _set_progress(job: Job, done: int, total: int | None):
415
+ job.bytes_done = done
416
+ if total is not None:
417
+ job.bytes_total = total
418
+
419
+
420
+ # --------------------------------------------------------------------------
421
+ # HTTP handler
422
+ # --------------------------------------------------------------------------
423
+ class Handler(http.server.BaseHTTPRequestHandler):
424
+ server_version = "withcache/0.1"
425
+ protocol_version = "HTTP/1.1"
426
+
427
+ @property
428
+ def store(self) -> Store:
429
+ return self.server.store # type: ignore[attr-defined]
430
+
431
+ @property
432
+ def auth(self) -> Auth:
433
+ return self.server.auth # type: ignore[attr-defined]
434
+
435
+ @property
436
+ def mgr(self) -> DownloadManager:
437
+ return self.server.mgr # type: ignore[attr-defined]
438
+
439
+ @property
440
+ def auto_fetch(self) -> bool:
441
+ return self.server.auto_fetch # type: ignore[attr-defined]
442
+
443
+ def log_message(self, format, *args): # quieter, single-line
444
+ print(f"{self.address_string()} - {format % args}", flush=True)
445
+
446
+ # -- routing -----------------------------------------------------------
447
+ def do_GET(self):
448
+ parsed = urllib.parse.urlsplit(self.path)
449
+ if parsed.path == "/blob" or parsed.path.startswith("/b/"):
450
+ self.handle_blob(parsed, head_only=False)
451
+ elif parsed.path == "/healthz":
452
+ self.send_text(200, "ok\n")
453
+ elif parsed.path.startswith("/static/"):
454
+ self.serve_static(parsed)
455
+ elif parsed.path == "/ui/login":
456
+ self.handle_login_form()
457
+ elif parsed.path == "/admin/dash":
458
+ if not self.is_authed():
459
+ self.send_text(401, "login required\n")
460
+ else:
461
+ self.send_html(200, self.render_dash())
462
+ elif parsed.path == "/":
463
+ if not self.is_authed():
464
+ self.redirect("/ui/login")
465
+ else:
466
+ self.send_html(200, self.render_page())
467
+ else:
468
+ self.send_text(404, "not found\n")
469
+
470
+ def do_HEAD(self):
471
+ parsed = urllib.parse.urlsplit(self.path)
472
+ if parsed.path == "/blob" or parsed.path.startswith("/b/"):
473
+ self.handle_blob(parsed, head_only=True)
474
+ else:
475
+ self.send_text(404, "")
476
+
477
+ ADMIN_POST = ("/admin/fetch", "/admin/dismiss", "/admin/cancel", "/admin/clear")
478
+
479
+ def do_POST(self):
480
+ parsed = urllib.parse.urlsplit(self.path)
481
+ form = self.read_form()
482
+ if parsed.path == "/ui/login":
483
+ self.handle_login_submit(form)
484
+ elif parsed.path == "/ui/logout":
485
+ self.handle_logout()
486
+ elif parsed.path in self.ADMIN_POST:
487
+ if not self.is_authed():
488
+ self.send_text(401, "login required\n")
489
+ return
490
+ if parsed.path == "/admin/fetch":
491
+ url = form.get("url", "").strip()
492
+ if url:
493
+ self.mgr.enqueue(url)
494
+ elif parsed.path == "/admin/dismiss":
495
+ self.store.dismiss(form.get("key", "").strip())
496
+ elif parsed.path == "/admin/cancel":
497
+ jid = form.get("id", "")
498
+ if jid.isdigit():
499
+ self.mgr.cancel(int(jid))
500
+ elif parsed.path == "/admin/clear":
501
+ self.mgr.clear_finished()
502
+ self.respond_admin()
503
+ else:
504
+ self.send_text(404, "not found\n")
505
+
506
+ # -- auth helpers ------------------------------------------------------
507
+ def is_authed(self) -> bool:
508
+ if not self.auth.enabled:
509
+ return True # no password configured -> open operator UI
510
+ token = self.cookie(Auth.COOKIE)
511
+ return bool(token and self.auth.valid(token))
512
+
513
+ def cookie(self, name: str):
514
+ raw = self.headers.get("Cookie")
515
+ if not raw:
516
+ return None
517
+ jar = http.cookies.SimpleCookie()
518
+ try:
519
+ jar.load(raw)
520
+ except http.cookies.CookieError:
521
+ return None
522
+ m = jar.get(name)
523
+ return m.value if m else None
524
+
525
+ def is_htmx(self) -> bool:
526
+ return self.headers.get("HX-Request") == "true"
527
+
528
+ def serve_static(self, parsed):
529
+ name = os.path.basename(parsed.path) # basename blocks path traversal
530
+ path = os.path.join(STATIC_DIR, name)
531
+ if not name or not os.path.isfile(path):
532
+ self.send_text(404, "not found\n")
533
+ return
534
+ with open(path, "rb") as f:
535
+ data = f.read()
536
+ ext = os.path.splitext(name)[1]
537
+ self.send_response(200)
538
+ self.send_header("Content-Type", MIME_TYPES.get(ext, "application/octet-stream"))
539
+ self.send_header("Content-Length", str(len(data)))
540
+ self.send_header("Cache-Control", "public, max-age=86400")
541
+ self.end_headers()
542
+ if self.command != "HEAD":
543
+ self.wfile.write(data)
544
+
545
+ def respond_admin(self):
546
+ """HTMX actions get the refreshed dashboard fragment; plain form posts
547
+ (no JS) fall back to a full-page redirect."""
548
+ if self.is_htmx():
549
+ self.send_html(200, self.render_dash())
550
+ else:
551
+ self.redirect("/")
552
+
553
+ def handle_login_form(self):
554
+ if self.is_authed():
555
+ self.redirect("/")
556
+ return
557
+ self.send_html(200, self.render_login())
558
+
559
+ def handle_login_submit(self, form):
560
+ if not self.auth.enabled:
561
+ self.redirect("/")
562
+ return
563
+ if self.auth.check_password(form.get("password", "")):
564
+ cookie = (
565
+ f"{Auth.COOKIE}={self.auth.make_token()}; HttpOnly; "
566
+ f"SameSite=Lax; Path=/; Max-Age={Auth.MAX_AGE}"
567
+ )
568
+ self.redirect("/", set_cookie=cookie)
569
+ print(f"{self.address_string()} - login succeeded", flush=True)
570
+ else:
571
+ print(f"{self.address_string()} - login failed", flush=True)
572
+ self.send_html(401, self.render_login(error="Invalid password."))
573
+
574
+ def handle_logout(self):
575
+ expired = f"{Auth.COOKIE}=; HttpOnly; SameSite=Lax; Path=/; Max-Age=0"
576
+ self.redirect("/ui/login", set_cookie=expired)
577
+
578
+ # -- blob serving ------------------------------------------------------
579
+ def _blob_origin(self, parsed) -> str:
580
+ """Origin URL from either /blob?url=<origin> or /b/<base64>/<name>."""
581
+ if parsed.path.startswith("/b/"):
582
+ token = parsed.path[len("/b/") :].split("/", 1)[0]
583
+ try:
584
+ return base64.urlsafe_b64decode(token + "=" * (-len(token) % 4)).decode("utf-8")
585
+ except (ValueError, UnicodeDecodeError):
586
+ return ""
587
+ return (urllib.parse.parse_qs(parsed.query).get("url") or [""])[0]
588
+
589
+ def handle_blob(self, parsed, head_only: bool):
590
+ url = self._blob_origin(parsed)
591
+ if not url:
592
+ self.send_text(400, "missing url\n")
593
+ return
594
+ row = self.store.get_blob(url)
595
+ if row is None:
596
+ self.store.record_miss(url)
597
+ if self.auto_fetch:
598
+ # Pull it in the background so the next request hits; the client
599
+ # gets this one from origin (the shim falls through on a miss).
600
+ # In --curate mode an operator triggers the pull instead.
601
+ self.mgr.enqueue(url)
602
+ self.send_text(404, "cache miss (recorded)\n")
603
+ return
604
+ path = self.store.blob_path(row["key"])
605
+ self.send_response(200)
606
+ self.send_header("Content-Type", row["content_type"] or "application/octet-stream")
607
+ self.send_header("Content-Length", str(row["size"]))
608
+ self.send_header("X-Withcache-Sha256", row["sha256"])
609
+ self.end_headers()
610
+ if head_only:
611
+ return # the shim's HEAD probe — not a served download, so don't count it
612
+ self.store.record_hit(row["key"])
613
+ try:
614
+ with open(path, "rb") as f:
615
+ while True:
616
+ chunk = f.read(CHUNK)
617
+ if not chunk:
618
+ break
619
+ self.wfile.write(chunk)
620
+ except (BrokenPipeError, ConnectionResetError):
621
+ pass # client went away mid-stream
622
+
623
+ # -- helpers -----------------------------------------------------------
624
+ def read_form(self) -> dict:
625
+ length = int(self.headers.get("Content-Length", 0) or 0)
626
+ body = self.rfile.read(length).decode("utf-8") if length else ""
627
+ return {k: v[0] for k, v in urllib.parse.parse_qs(body).items()}
628
+
629
+ def send_text(self, code: int, text: str):
630
+ data = text.encode("utf-8")
631
+ self.send_response(code)
632
+ self.send_header("Content-Type", "text/plain; charset=utf-8")
633
+ self.send_header("Content-Length", str(len(data)))
634
+ self.end_headers()
635
+ if self.command != "HEAD":
636
+ self.wfile.write(data)
637
+
638
+ def send_html(self, code: int, body: str):
639
+ data = body.encode("utf-8")
640
+ self.send_response(code)
641
+ self.send_header("Content-Type", "text/html; charset=utf-8")
642
+ self.send_header("Content-Length", str(len(data)))
643
+ self.end_headers()
644
+ self.wfile.write(data)
645
+
646
+ def redirect(self, location: str, set_cookie: str | None = None):
647
+ self.send_response(303)
648
+ self.send_header("Location", location)
649
+ if set_cookie:
650
+ self.send_header("Set-Cookie", set_cookie)
651
+ self.send_header("Content-Length", "0")
652
+ self.end_headers()
653
+
654
+ # -- HTML --------------------------------------------------------------
655
+ STATUS_COLORS = {
656
+ "queued": "#888",
657
+ "running": "var(--pico-primary, #0172ad)",
658
+ "completed": "#2e7d32",
659
+ "failed": "#c0392b",
660
+ "cancelled": "#888",
661
+ }
662
+
663
+ def _head(self, title: str) -> str:
664
+ return f"""<!doctype html>
665
+ <html lang="en"><head>
666
+ <meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1">
667
+ <title>{title}</title>
668
+ <link rel="stylesheet" href="/static/pico.min.css">
669
+ <script src="/static/htmx.min.js"></script>
670
+ <style>
671
+ main.container {{ max-width: 1100px; padding-top: 1rem; }}
672
+ h4 {{ margin-bottom: .4rem; }}
673
+ table {{ font-size: .9rem; margin-bottom: 0; }}
674
+ .url {{ word-break: break-all; }}
675
+ .num {{ text-align: right; }}
676
+ .mono {{ font-family: var(--pico-font-family-monospace); font-size: .85em; }}
677
+ td form {{ display: inline; margin: 0; }}
678
+ td button {{ width: auto; display: inline-block; margin: 0 .3rem 0 0;
679
+ padding: .15rem .6rem; font-size: .8rem; }}
680
+ td progress {{ margin: 0 0 .15rem; }}
681
+ #spin {{ width: 7rem; height: .5rem; margin: 0; }}
682
+ .row {{ display: flex; align-items: center; justify-content: space-between; }}
683
+ .err {{ background: var(--pico-del-color, #c0392b); color: #fff;
684
+ padding: .7rem 1rem; border-radius: var(--pico-border-radius); margin-bottom: 1rem; }}
685
+ </style>
686
+ </head>"""
687
+
688
+ def render_login(self, error: str = "") -> str:
689
+ err = f'<div class="err">{html.escape(error)}</div>' if error else ""
690
+ return f"""{self._head("withcache — login")}
691
+ <body><main class="container">
692
+ <article style="max-width: 24rem; margin: 4rem auto;">
693
+ <hgroup><h2>withcache</h2><p>operator login</p></hgroup>
694
+ {err}
695
+ <form method="post" action="/ui/login">
696
+ <input type="password" name="password" placeholder="Admin password" autofocus required>
697
+ <button type="submit">Log in</button>
698
+ </form>
699
+ </article>
700
+ </main></body></html>"""
701
+
702
+ def render_page(self) -> str:
703
+ logout = (
704
+ '<li><form method="post" action="/ui/logout" style="margin:0">'
705
+ '<button type="submit" class="secondary outline" '
706
+ 'style="width:auto;padding:.3rem .8rem">Log out</button></form></li>'
707
+ if self.auth.enabled
708
+ else ""
709
+ )
710
+ return f"""{self._head("withcache cache-host")}
711
+ <body><main class="container">
712
+ <nav>
713
+ <ul><li><strong>withcache</strong> &nbsp;<small>cache-host</small></li></ul>
714
+ <ul>
715
+ <li><progress id="spin" class="htmx-indicator"></progress></li>
716
+ {logout}
717
+ </ul>
718
+ </nav>
719
+
720
+ <h4>Add from URI</h4>
721
+ <form hx-post="/admin/fetch" hx-target="#dash" hx-swap="innerHTML"
722
+ hx-indicator="#spin" hx-on::after-request="this.reset()">
723
+ <fieldset role="group">
724
+ <input type="url" name="url" placeholder="https://origin/path/artifact.tar.gz" required>
725
+ <button type="submit">Fetch &amp; store</button>
726
+ </fieldset>
727
+ </form>
728
+
729
+ <div id="dash" hx-get="/admin/dash" hx-trigger="load, every 1s" hx-swap="innerHTML">
730
+ {self.render_dash()}
731
+ </div>
732
+ </main></body></html>"""
733
+
734
+ def render_dash(self) -> str:
735
+ nblobs, nmisses = self.store.counts()
736
+ jobs = self.mgr.list()
737
+ misses = self.store.list_misses()
738
+ blobs = self.store.list_blobs()
739
+
740
+ job_rows = (
741
+ "".join(self._job_row(j) for j in jobs)
742
+ or '<tr><td colspan="4"><em>No downloads yet.</em></td></tr>'
743
+ )
744
+
745
+ miss_rows = (
746
+ "".join(
747
+ f"""<tr>
748
+ <td class="url">{html.escape(m["url"])}</td>
749
+ <td class="num">{m["count"]}</td>
750
+ <td><small>{html.escape(m["last_seen"])}</small></td>
751
+ <td>
752
+ <form hx-post="/admin/fetch" hx-target="#dash"
753
+ hx-swap="innerHTML" hx-indicator="#spin">
754
+ <input type="hidden" name="url" value="{html.escape(m["url"], quote=True)}">
755
+ <button type="submit">Download</button>
756
+ </form>
757
+ <form hx-post="/admin/dismiss" hx-target="#dash" hx-swap="innerHTML">
758
+ <input type="hidden" name="key" value="{html.escape(m["key"], quote=True)}">
759
+ <button type="submit" class="secondary outline">Dismiss</button>
760
+ </form>
761
+ </td>
762
+ </tr>"""
763
+ for m in misses
764
+ )
765
+ or '<tr><td colspan="4"><em>No misses recorded.</em></td></tr>'
766
+ )
767
+
768
+ blob_rows = (
769
+ "".join(
770
+ f"""<tr>
771
+ <td class="url">{html.escape(b["url"])}</td>
772
+ <td>{human_size(b["size"])}</td>
773
+ <td class="num">{b["hits"]}</td>
774
+ <td class="num">{b["misses"]}</td>
775
+ <td class="mono">{html.escape(b["sha256"][:12])}…</td>
776
+ <td><small>{html.escape(b["fetched_at"])}</small></td>
777
+ </tr>"""
778
+ for b in blobs
779
+ )
780
+ or '<tr><td colspan="6"><em>Cache is empty.</em></td></tr>'
781
+ )
782
+
783
+ return f"""
784
+ <p><small>{nblobs} cached &middot; {nmisses} pending miss(es)</small></p>
785
+
786
+ <div class="row">
787
+ <h4>Downloads</h4>
788
+ <form hx-post="/admin/clear" hx-target="#dash" hx-swap="innerHTML" style="margin:0">
789
+ <button type="submit" class="secondary outline" style="width:auto;padding:.2rem .7rem">
790
+ Clear finished</button>
791
+ </form>
792
+ </div>
793
+ <figure><table class="striped">
794
+ <thead><tr><th>Artifact</th><th>Progress</th><th>Status</th><th></th></tr></thead>
795
+ <tbody>{job_rows}</tbody>
796
+ </table></figure>
797
+
798
+ <h4>Misses</h4>
799
+ <figure><table class="striped">
800
+ <thead><tr><th>URL</th><th class="num">Misses</th><th>Last seen</th><th>Action</th></tr></thead>
801
+ <tbody>{miss_rows}</tbody>
802
+ </table></figure>
803
+
804
+ <h4>Cached artifacts</h4>
805
+ <figure><table class="striped">
806
+ <thead><tr>
807
+ <th>URL</th><th>Size</th><th class="num">Hits</th><th class="num">Misses</th>
808
+ <th>SHA-256</th><th>Fetched</th>
809
+ </tr></thead>
810
+ <tbody>{blob_rows}</tbody>
811
+ </table></figure>"""
812
+
813
+ def _job_row(self, j: Job) -> str:
814
+ name = os.path.basename(urllib.parse.urlsplit(j.url).path) or j.url
815
+ if j.status == "running":
816
+ if j.bytes_total:
817
+ pct = int(j.bytes_done * 100 / j.bytes_total)
818
+ prog = (
819
+ f'<progress value="{j.bytes_done}" max="{j.bytes_total}"></progress>'
820
+ f"<small>{human_size(j.bytes_done)} / {human_size(j.bytes_total)} "
821
+ f"({pct}%)</small>"
822
+ )
823
+ else:
824
+ prog = f"<progress></progress><small>{human_size(j.bytes_done)}</small>"
825
+ elif j.status == "completed":
826
+ prog = f"<small>{human_size(j.bytes_done)}</small>"
827
+ elif j.status == "failed":
828
+ prog = f"<small>{html.escape(j.error or 'error')}</small>"
829
+ else: # queued / cancelled
830
+ prog = "<small>—</small>"
831
+ cancel = ""
832
+ if j.status in PENDING_STATES:
833
+ cancel = (
834
+ '<form hx-post="/admin/cancel" hx-target="#dash" hx-swap="innerHTML">'
835
+ f'<input type="hidden" name="id" value="{j.id}">'
836
+ '<button type="submit" class="secondary outline">Cancel</button></form>'
837
+ )
838
+ color = self.STATUS_COLORS.get(j.status, "#888")
839
+ return f"""<tr>
840
+ <td class="url" title="{html.escape(j.url, quote=True)}">{html.escape(name)}</td>
841
+ <td>{prog}</td>
842
+ <td><small style="color:{color};font-weight:600">{j.status}</small></td>
843
+ <td>{cancel}</td>
844
+ </tr>"""
845
+
846
+
847
+ class ThreadingHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
848
+ daemon_threads = True
849
+ allow_reuse_address = True
850
+
851
+
852
+ def main():
853
+ ap = argparse.ArgumentParser(description="withcache cache-host")
854
+ ap.add_argument("--host", default="0.0.0.0")
855
+ ap.add_argument("--port", type=int, default=3000)
856
+ ap.add_argument("--data-dir", default="./data")
857
+ ap.add_argument(
858
+ "--keep-query",
859
+ action="store_true",
860
+ help="include the URL query string in the cache key "
861
+ "(default: drop it, so signed/tokened URLs still match by path)",
862
+ )
863
+ ap.add_argument(
864
+ "--workers", type=int, default=2, help="concurrent background download workers (default: 2)"
865
+ )
866
+ ap.add_argument(
867
+ "--curate",
868
+ action="store_true",
869
+ help="require an operator to approve each pull (default: auto-fetch a "
870
+ "missed artifact in the background so the next request hits)",
871
+ )
872
+ args = ap.parse_args()
873
+
874
+ store = Store(args.data_dir, keep_query=args.keep_query)
875
+ auth = Auth(resolve_secret(store.data_dir), os.environ.get("WITHCACHE_ADMIN_PASSWORD"))
876
+ mgr = DownloadManager(store, workers=args.workers)
877
+
878
+ httpd = ThreadingHTTPServer((args.host, args.port), Handler)
879
+ httpd.store = store # type: ignore[attr-defined]
880
+ httpd.auth = auth # type: ignore[attr-defined]
881
+ httpd.mgr = mgr # type: ignore[attr-defined]
882
+ httpd.auto_fetch = not args.curate # type: ignore[attr-defined]
883
+ print(
884
+ f"withcache cache-host on http://{args.host}:{args.port} "
885
+ f"(data={store.data_dir}, keep_query={args.keep_query}, workers={args.workers}, "
886
+ f"mode={'curate' if args.curate else 'auto-fetch'})",
887
+ flush=True,
888
+ )
889
+ if not auth.enabled:
890
+ print(
891
+ "WARNING: WITHCACHE_ADMIN_PASSWORD not set — operator UI is UNAUTHENTICATED.",
892
+ flush=True,
893
+ )
894
+ try:
895
+ httpd.serve_forever()
896
+ except KeyboardInterrupt:
897
+ print("\nbye", flush=True)
898
+
899
+
900
+ if __name__ == "__main__":
901
+ main()