withcache 0.4.3__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: withcache
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
5
5
  Project-URL: Homepage, https://github.com/safl/withcache
6
6
  Author-email: "Simon A. F. Lund" <safl@safl.dk>
@@ -2,7 +2,7 @@
2
2
  .name = .withcache_shim,
3
3
  // Zig requires a literal here; keep it in lockstep with the project's
4
4
  // single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
5
- .version = "0.4.3",
5
+ .version = "0.5.0",
6
6
  .fingerprint = 0xd7d96c5ed212ccaa,
7
7
  .minimum_zig_version = "0.16.0",
8
8
  .paths = .{
@@ -12,6 +12,6 @@ All modules are stdlib-only and self-contained.
12
12
 
13
13
  from .client import blob_url, cache_base, is_cached, serve_url
14
14
 
15
- __version__ = "0.4.3"
15
+ __version__ = "0.5.0"
16
16
 
17
17
  __all__ = ["__version__", "blob_url", "cache_base", "is_cached", "serve_url"]
@@ -64,6 +64,18 @@ def now_iso() -> str:
64
64
  return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
65
65
 
66
66
 
67
+ def _age_human(started_at: float, *, now: float | None = None) -> str:
68
+ """Render seconds-since as a compact ``Ns`` / ``Nm`` / ``Nh`` string for
69
+ the streams table. ``now`` is injectable so tests don't need
70
+ monkeypatching ``time.time`` to assert formatting."""
71
+ elapsed = int(max(0.0, (now if now is not None else time.time()) - started_at))
72
+ if elapsed < 60:
73
+ return f"{elapsed}s"
74
+ if elapsed < 3600:
75
+ return f"{elapsed // 60}m{elapsed % 60:02d}s"
76
+ return f"{elapsed // 3600}h{(elapsed % 3600) // 60:02d}m"
77
+
78
+
67
79
  def human_size(n: int) -> str:
68
80
  f = float(n)
69
81
  for unit in ("B", "KiB", "MiB", "GiB", "TiB"):
@@ -460,6 +472,65 @@ class TruncatedDownload(Exception):
460
472
  """
461
473
 
462
474
 
475
+ @dataclass
476
+ class Stream:
477
+ """One in-flight blob serve. Lives in memory only for the duration of
478
+ the response: registered before the first byte goes out, deregistered
479
+ in a finally block. Operator visibility into "what is the cache
480
+ currently uploading, and to whom" without touching the kernel's
481
+ /proc/net/tcp or the access log.
482
+ """
483
+
484
+ id: int
485
+ url: str
486
+ client: str # ``ip:port`` of the consumer
487
+ started_at: float
488
+ bytes_sent: int = 0
489
+ total: int | None = None # known up front from the blob row
490
+
491
+
492
+ class StreamRegistry:
493
+ """Thread-safe registry of in-flight blob serves. Reads (snapshot for
494
+ the operator dash) and writes (start / progress / finish from
495
+ request handler threads) all serialised on a single lock; the
496
+ contention window is the few microseconds of a dict mutation, and
497
+ progress updates are batched at one per chunk (see PROGRESS_STRIDE)
498
+ so a 4 GiB stream is ~64k updates, not millions.
499
+ """
500
+
501
+ PROGRESS_STRIDE = 16 # update bytes_sent every N chunks (~1 MiB at CHUNK=64K)
502
+
503
+ def __init__(self) -> None:
504
+ self._ids = itertools.count(1)
505
+ self._lock = threading.Lock()
506
+ self._active: dict[int, Stream] = {}
507
+
508
+ def start(self, url: str, client: str, total: int | None) -> Stream:
509
+ with self._lock:
510
+ s = Stream(
511
+ id=next(self._ids), url=url, client=client, started_at=time.time(), total=total
512
+ )
513
+ self._active[s.id] = s
514
+ return s
515
+
516
+ def bump(self, stream_id: int, bytes_sent: int) -> None:
517
+ # Caller already gates by PROGRESS_STRIDE so this is cheap; the
518
+ # write itself only takes the lock long enough to mutate the int.
519
+ with self._lock:
520
+ s = self._active.get(stream_id)
521
+ if s is not None:
522
+ s.bytes_sent = bytes_sent
523
+
524
+ def finish(self, stream_id: int) -> None:
525
+ with self._lock:
526
+ self._active.pop(stream_id, None)
527
+
528
+ def snapshot(self) -> list[Stream]:
529
+ with self._lock:
530
+ # Stable order: oldest first (matches the queue mental model).
531
+ return sorted(self._active.values(), key=lambda s: s.started_at)
532
+
533
+
463
534
  @dataclass
464
535
  class Job:
465
536
  id: int
@@ -587,6 +658,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
587
658
  def auto_fetch(self) -> bool:
588
659
  return self.server.auto_fetch # type: ignore[attr-defined]
589
660
 
661
+ @property
662
+ def streams(self) -> StreamRegistry:
663
+ return self.server.streams # type: ignore[attr-defined]
664
+
590
665
  def log_message(self, format, *args): # quieter, single-line
591
666
  print(f"{self.address_string()} - {format % args}", flush=True)
592
667
 
@@ -779,17 +854,39 @@ class Handler(http.server.BaseHTTPRequestHandler):
779
854
  self.send_header("X-Withcache-Sha256", row["sha256"])
780
855
  self.end_headers()
781
856
  if head_only:
782
- return # the shim's HEAD probe not a served download, so don't count it
857
+ return # the shim's HEAD probe (not a served download, so don't count it)
783
858
  self.store.record_hit(row["key"])
859
+ # Register the stream BEFORE we open the file so an operator
860
+ # watching the dash sees the serve immediately (even if the
861
+ # disk read stalls). The handler runs on a worker thread per
862
+ # the ThreadingHTTPServer mixin, so the registry sees
863
+ # concurrent calls; StreamRegistry serialises on its own lock.
864
+ client = f"{self.client_address[0]}:{self.client_address[1]}"
865
+ stream = self.streams.start(url=url, client=client, total=row["size"])
784
866
  try:
785
867
  with open(path, "rb") as f:
868
+ sent = 0
869
+ ticks = 0
786
870
  while True:
787
871
  chunk = f.read(CHUNK)
788
872
  if not chunk:
789
873
  break
790
874
  self.wfile.write(chunk)
875
+ sent += len(chunk)
876
+ ticks += 1
877
+ # Batched progress update: every 16 chunks (~1 MiB
878
+ # at CHUNK=64K) is plenty for a 1 Hz dashboard and
879
+ # keeps lock-contention sane on a busy box.
880
+ if ticks % StreamRegistry.PROGRESS_STRIDE == 0:
881
+ self.streams.bump(stream.id, sent)
882
+ # Final position so the dash's last frame shows the
883
+ # serve completing at the declared total, not at
884
+ # whatever the last batched update happened to be.
885
+ self.streams.bump(stream.id, sent)
791
886
  except (BrokenPipeError, ConnectionResetError):
792
887
  pass # client went away mid-stream
888
+ finally:
889
+ self.streams.finish(stream.id)
793
890
 
794
891
  # -- helpers -----------------------------------------------------------
795
892
  def read_form(self) -> dict:
@@ -903,7 +1000,15 @@ class Handler(http.server.BaseHTTPRequestHandler):
903
1000
  </fieldset>
904
1001
  </form>
905
1002
 
906
- <div id="dash" hx-get="/admin/dash" hx-trigger="load, every 1s" hx-swap="innerHTML">
1003
+ <!-- The hx-trigger gates polling on the user NOT having an active
1004
+ text selection, so highlight-and-copy a URL out of a table cell
1005
+ isn't wiped by the 1 Hz refresh. ``isCollapsed`` is true when
1006
+ there's no selection or the caret is a zero-width point; once
1007
+ the operator releases / clears the selection polling resumes
1008
+ on the next 1 s tick. -->
1009
+ <div id="dash" hx-get="/admin/dash"
1010
+ hx-trigger="load, every 1s [document.getSelection().isCollapsed]"
1011
+ hx-swap="innerHTML">
907
1012
  {self.render_dash()}
908
1013
  </div>
909
1014
  </main></body></html>"""
@@ -913,11 +1018,62 @@ class Handler(http.server.BaseHTTPRequestHandler):
913
1018
  jobs = self.mgr.list()
914
1019
  misses = self.store.list_misses()
915
1020
  blobs = self.store.list_blobs()
1021
+ streams = self.streams.snapshot()
916
1022
  used = human_size(self.store.total_size())
917
1023
  if self.store.max_bytes:
918
1024
  used += f" / {human_size(self.store.max_bytes)}"
919
1025
  full = "" if self.store.has_capacity() else " &middot; <strong>cache full</strong>"
920
1026
 
1027
+ # Tabs are pure-CSS via :target. The URL hash names the active
1028
+ # section; htmx innerHTML-replacement of #dash leaves the hash
1029
+ # alone, so the operator's tab choice survives every refresh.
1030
+ # ``body:not(:has(section:target))`` selects the default tab
1031
+ # when no hash is present; :has() lands cleanly on Chrome 105+,
1032
+ # Firefox 121+, Safari 15.4+, which is the whole modern web by
1033
+ # the time this ships in 2026.
1034
+ tab_style = """
1035
+ <style>
1036
+ nav.tabs { margin: 1rem 0 .25rem; border-bottom: 1px solid var(--pico-muted-border-color); }
1037
+ nav.tabs ul { display: flex; gap: 0; padding: 0; margin: 0; list-style: none; }
1038
+ nav.tabs li { margin: 0; }
1039
+ nav.tabs a {
1040
+ display: inline-block; padding: .45rem .9rem; text-decoration: none;
1041
+ color: var(--pico-muted-color); border-bottom: 2px solid transparent;
1042
+ margin-bottom: -1px; font-size: .9rem;
1043
+ }
1044
+ nav.tabs a:hover { color: var(--pico-color); }
1045
+ section.tab { display: none; padding-top: .75rem; }
1046
+ section.tab:target { display: block; }
1047
+ body:not(:has(section.tab:target)) section.tab#tab-streams { display: block; }
1048
+ body:has(#tab-streams:target) nav.tabs a[href="#tab-streams"],
1049
+ body:has(#tab-downloads:target) nav.tabs a[href="#tab-downloads"],
1050
+ body:has(#tab-misses:target) nav.tabs a[href="#tab-misses"],
1051
+ body:has(#tab-cached:target) nav.tabs a[href="#tab-cached"] {
1052
+ color: var(--pico-color);
1053
+ border-bottom-color: var(--pico-primary, #0172ad);
1054
+ font-weight: 600;
1055
+ }
1056
+ body:not(:has(section.tab:target)) nav.tabs a[href="#tab-streams"] {
1057
+ color: var(--pico-color);
1058
+ border-bottom-color: var(--pico-primary, #0172ad);
1059
+ font-weight: 600;
1060
+ }
1061
+ </style>
1062
+ """
1063
+
1064
+ stream_rows = (
1065
+ "".join(
1066
+ f"""<tr>
1067
+ <td class="url">{html.escape(s.url)}</td>
1068
+ <td class="mono"><small>{html.escape(s.client)}</small></td>
1069
+ <td>{self._stream_progress_cell(s)}</td>
1070
+ <td><small>{_age_human(s.started_at)}</small></td>
1071
+ </tr>"""
1072
+ for s in streams
1073
+ )
1074
+ or '<tr><td colspan="4"><em>No active streams.</em></td></tr>'
1075
+ )
1076
+
921
1077
  job_rows = (
922
1078
  "".join(self._job_row(j) for j in jobs)
923
1079
  or '<tr><td colspan="4"><em>No downloads yet.</em></td></tr>'
@@ -968,35 +1124,76 @@ class Handler(http.server.BaseHTTPRequestHandler):
968
1124
  or '<tr><td colspan="7"><em>Cache is empty.</em></td></tr>'
969
1125
  )
970
1126
 
1127
+ # Per-tab counts let the operator see at a glance whether each
1128
+ # section is empty without flipping to it.
1129
+ nstreams = len(streams)
1130
+ njobs = len(jobs)
1131
+
971
1132
  return f"""
972
1133
  <p><small>{nblobs} cached ({used}){full} &middot; {nmisses} pending miss(es)</small></p>
973
-
974
- <div class="row">
975
- <h4>Downloads</h4>
976
- <form hx-post="/admin/clear" hx-target="#dash" hx-swap="innerHTML" style="margin:0">
977
- <button type="submit" class="secondary outline" style="width:auto;padding:.2rem .7rem">
978
- Clear finished</button>
979
- </form>
980
- </div>
981
- <figure><table class="striped">
982
- <thead><tr><th>Artifact</th><th>Progress</th><th>Status</th><th></th></tr></thead>
983
- <tbody>{job_rows}</tbody>
984
- </table></figure>
985
-
986
- <h4>Misses</h4>
987
- <figure><table class="striped">
988
- <thead><tr><th>URL</th><th class="num">Misses</th><th>Last seen</th><th>Action</th></tr></thead>
989
- <tbody>{miss_rows}</tbody>
990
- </table></figure>
991
-
992
- <h4>Cached artifacts</h4>
993
- <figure><table class="striped">
994
- <thead><tr>
995
- <th>URL</th><th>Size</th><th class="num">Hits</th><th class="num">Misses</th>
996
- <th>SHA-256</th><th>Fetched</th><th>Action</th>
997
- </tr></thead>
998
- <tbody>{blob_rows}</tbody>
999
- </table></figure>"""
1134
+ {tab_style}
1135
+ <nav class="tabs"><ul>
1136
+ <li><a href="#tab-streams">Streams ({nstreams})</a></li>
1137
+ <li><a href="#tab-downloads">Downloads ({njobs})</a></li>
1138
+ <li><a href="#tab-misses">Misses ({nmisses})</a></li>
1139
+ <li><a href="#tab-cached">Cached ({nblobs})</a></li>
1140
+ </ul></nav>
1141
+
1142
+ <section id="tab-streams" class="tab">
1143
+ <figure><table class="striped">
1144
+ <thead><tr>
1145
+ <th>URL</th><th>Client</th><th>Progress</th><th>Age</th>
1146
+ </tr></thead>
1147
+ <tbody>{stream_rows}</tbody>
1148
+ </table></figure>
1149
+ </section>
1150
+
1151
+ <section id="tab-downloads" class="tab">
1152
+ <div class="row">
1153
+ <small>Auto-fetch workers feeding the cache.</small>
1154
+ <form hx-post="/admin/clear" hx-target="#dash" hx-swap="innerHTML" style="margin:0">
1155
+ <button type="submit" class="secondary outline" style="width:auto;padding:.2rem .7rem">
1156
+ Clear finished</button>
1157
+ </form>
1158
+ </div>
1159
+ <figure><table class="striped">
1160
+ <thead><tr><th>Artifact</th><th>Progress</th><th>Status</th><th></th></tr></thead>
1161
+ <tbody>{job_rows}</tbody>
1162
+ </table></figure>
1163
+ </section>
1164
+
1165
+ <section id="tab-misses" class="tab">
1166
+ <figure><table class="striped">
1167
+ <thead><tr>
1168
+ <th>URL</th><th class="num">Misses</th><th>Last seen</th><th>Action</th>
1169
+ </tr></thead>
1170
+ <tbody>{miss_rows}</tbody>
1171
+ </table></figure>
1172
+ </section>
1173
+
1174
+ <section id="tab-cached" class="tab">
1175
+ <figure><table class="striped">
1176
+ <thead><tr>
1177
+ <th>URL</th><th>Size</th><th class="num">Hits</th><th class="num">Misses</th>
1178
+ <th>SHA-256</th><th>Fetched</th><th>Action</th>
1179
+ </tr></thead>
1180
+ <tbody>{blob_rows}</tbody>
1181
+ </table></figure>
1182
+ </section>"""
1183
+
1184
+ def _stream_progress_cell(self, s: Stream) -> str:
1185
+ """One progress cell for an active stream: a <progress> bar when the
1186
+ total is known (always for a cached blob, since the size came off
1187
+ the row), with a small ``sent / total`` line under it. Falls back
1188
+ to bytes-only when total somehow went missing."""
1189
+ if s.total is None or s.total <= 0:
1190
+ return f'<small class="mono">{human_size(s.bytes_sent)}</small>'
1191
+ pct = min(100, int(s.bytes_sent * 100 / s.total))
1192
+ return (
1193
+ f'<progress value="{s.bytes_sent}" max="{s.total}"></progress>'
1194
+ f'<br><small class="mono">{human_size(s.bytes_sent)} / '
1195
+ f"{human_size(s.total)} ({pct}%)</small>"
1196
+ )
1000
1197
 
1001
1198
  def _job_row(self, j: Job) -> str:
1002
1199
  name = os.path.basename(urllib.parse.urlsplit(j.url).path) or j.url
@@ -1074,6 +1271,7 @@ def main():
1074
1271
  httpd.auth = auth # type: ignore[attr-defined]
1075
1272
  httpd.mgr = mgr # type: ignore[attr-defined]
1076
1273
  httpd.auto_fetch = not args.curate # type: ignore[attr-defined]
1274
+ httpd.streams = StreamRegistry() # type: ignore[attr-defined]
1077
1275
  print(
1078
1276
  f"withcache cache-host on http://{args.host}:{args.port} "
1079
1277
  f"(data={store.data_dir}, keep_query={args.keep_query}, workers={args.workers}, "
@@ -370,6 +370,113 @@ class TestRangeResumeOnTruncation(unittest.TestCase):
370
370
  self.assertTrue(any(d > half for d, _ in observed))
371
371
 
372
372
 
373
+ # --------------------------------------------------------------------------
374
+ # StreamRegistry: in-flight blob-serve registry powering the operator dash's
375
+ # "Streams" tab. Validates thread-safety + snapshot ordering + lifecycle.
376
+ # --------------------------------------------------------------------------
377
+ class TestStreamRegistry(unittest.TestCase):
378
+ def test_start_assigns_unique_ids_and_records_metadata(self):
379
+ reg = server.StreamRegistry()
380
+ a = reg.start(url="http://o/x", client="10.0.0.1:5000", total=1024)
381
+ b = reg.start(url="http://o/y", client="10.0.0.2:5000", total=None)
382
+ self.assertNotEqual(a.id, b.id)
383
+ self.assertEqual(a.url, "http://o/x")
384
+ self.assertEqual(a.client, "10.0.0.1:5000")
385
+ self.assertEqual(a.total, 1024)
386
+ self.assertIsNone(b.total)
387
+ # both visible in snapshot, oldest-first
388
+ snap = reg.snapshot()
389
+ self.assertEqual([s.id for s in snap], [a.id, b.id])
390
+
391
+ def test_bump_updates_bytes_sent_for_known_id(self):
392
+ reg = server.StreamRegistry()
393
+ s = reg.start(url="http://o/x", client="c", total=100)
394
+ reg.bump(s.id, 42)
395
+ self.assertEqual(reg.snapshot()[0].bytes_sent, 42)
396
+ # later bump moves forward; the registry doesn't enforce
397
+ # monotonicity (the handler is the only caller and it is monotonic)
398
+ reg.bump(s.id, 99)
399
+ self.assertEqual(reg.snapshot()[0].bytes_sent, 99)
400
+
401
+ def test_bump_unknown_id_is_a_silent_noop(self):
402
+ """A finish() that races against a final bump() must not crash:
403
+ the bump arrives, finds the id gone, returns silently. The
404
+ handler relies on this so its tight write loop doesn't have to
405
+ special-case the race."""
406
+ reg = server.StreamRegistry()
407
+ reg.bump(99999, 7)
408
+ self.assertEqual(reg.snapshot(), [])
409
+
410
+ def test_finish_removes_from_snapshot(self):
411
+ reg = server.StreamRegistry()
412
+ s = reg.start(url="http://o/x", client="c", total=10)
413
+ self.assertEqual(len(reg.snapshot()), 1)
414
+ reg.finish(s.id)
415
+ self.assertEqual(reg.snapshot(), [])
416
+ # second finish is a no-op (handler's finally: block can fire twice)
417
+ reg.finish(s.id)
418
+ self.assertEqual(reg.snapshot(), [])
419
+
420
+ def test_snapshot_returns_a_copy_not_the_live_dict(self):
421
+ """Operator code iterating a snapshot must not see torn state when
422
+ a worker thread starts/finishes a stream mid-iteration."""
423
+ reg = server.StreamRegistry()
424
+ s = reg.start(url="http://o/x", client="c", total=10)
425
+ snap = reg.snapshot()
426
+ reg.finish(s.id)
427
+ reg.start(url="http://o/y", client="c", total=10)
428
+ # snapshot taken before the mutations stays put
429
+ self.assertEqual(len(snap), 1)
430
+ self.assertEqual(snap[0].url, "http://o/x")
431
+
432
+ def test_concurrent_start_finish_under_load(self):
433
+ """Hammer the lock with 500 starts + 500 finishes from 10 threads;
434
+ the registry must end empty with no exception leaks."""
435
+ reg = server.StreamRegistry()
436
+ errors: list[BaseException] = []
437
+
438
+ def churn():
439
+ try:
440
+ for _ in range(50):
441
+ s = reg.start(url="http://o/x", client="c", total=10)
442
+ reg.bump(s.id, 5)
443
+ reg.finish(s.id)
444
+ except BaseException as e:
445
+ errors.append(e)
446
+
447
+ threads = [threading.Thread(target=churn) for _ in range(10)]
448
+ for t in threads:
449
+ t.start()
450
+ for t in threads:
451
+ t.join()
452
+ self.assertEqual(errors, [])
453
+ self.assertEqual(reg.snapshot(), [])
454
+
455
+
456
+ class TestAgeHuman(unittest.TestCase):
457
+ """``_age_human`` renders elapsed seconds into the compact form the
458
+ Streams table cell shows. Inject ``now`` so the test doesn't have
459
+ to monkeypatch ``time.time``."""
460
+
461
+ def test_seconds_only(self):
462
+ self.assertEqual(server._age_human(100.0, now=100.0), "0s")
463
+ self.assertEqual(server._age_human(100.0, now=159.0), "59s")
464
+
465
+ def test_minutes_pad_seconds(self):
466
+ self.assertEqual(server._age_human(100.0, now=160.0), "1m00s")
467
+ self.assertEqual(server._age_human(100.0, now=222.0), "2m02s")
468
+
469
+ def test_hours_pad_minutes(self):
470
+ self.assertEqual(server._age_human(0.0, now=3600.0), "1h00m")
471
+ self.assertEqual(server._age_human(0.0, now=3661.0), "1h01m")
472
+ self.assertEqual(server._age_human(0.0, now=7320.0), "2h02m")
473
+
474
+ def test_negative_clamps_to_zero(self):
475
+ # Started-at in the future (clock skew, replayed snapshot) renders
476
+ # as 0s rather than a confusing negative.
477
+ self.assertEqual(server._age_human(200.0, now=100.0), "0s")
478
+
479
+
373
480
  # --------------------------------------------------------------------------
374
481
  # _shim: URL detection, rewrite, real-tool resolution, env, path-encoding
375
482
  # --------------------------------------------------------------------------
@@ -536,6 +643,7 @@ def _start_withcache(auto_fetch=False):
536
643
  httpd.auth = server.Auth(b"k", None) # auth disabled -> read path open
537
644
  httpd.mgr = server.DownloadManager(store, workers=1)
538
645
  httpd.auto_fetch = auto_fetch
646
+ httpd.streams = server.StreamRegistry()
539
647
  threading.Thread(target=httpd.serve_forever, daemon=True).start()
540
648
  return httpd, store
541
649
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes