withcache 0.4.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {withcache-0.4.2 → withcache-0.5.0}/PKG-INFO +1 -1
- {withcache-0.4.2 → withcache-0.5.0}/shim/build.zig.zon +1 -1
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/__init__.py +1 -1
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/server.py +239 -33
- {withcache-0.4.2 → withcache-0.5.0}/tests/test_withcache.py +108 -0
- {withcache-0.4.2 → withcache-0.5.0}/.gitignore +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/LICENSE +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/README.md +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/deploy/Containerfile +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/deploy/compose.yml +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/hatch_build.py +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/pyproject.toml +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/shim/build.zig +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/shim/shim.zig +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/_shim.py +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/client.py +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/curlwithcache.py +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/static/htmx.min.js +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/static/pico.min.css +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/src/withcache/wgetwithcache.py +0 -0
- {withcache-0.4.2 → withcache-0.5.0}/tests/test_differential.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: withcache
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Operator-curated, URL-keyed artifact cache for a small lab (CUDA/ROCm/DOCA/firmware)
|
|
5
5
|
Project-URL: Homepage, https://github.com/safl/withcache
|
|
6
6
|
Author-email: "Simon A. F. Lund" <safl@safl.dk>
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
.name = .withcache_shim,
|
|
3
3
|
// Zig requires a literal here; keep it in lockstep with the project's
|
|
4
4
|
// single source (src/withcache/__init__.py) via `make bump` / `make version-check`.
|
|
5
|
-
.version = "0.
|
|
5
|
+
.version = "0.5.0",
|
|
6
6
|
.fingerprint = 0xd7d96c5ed212ccaa,
|
|
7
7
|
.minimum_zig_version = "0.16.0",
|
|
8
8
|
.paths = .{
|
|
@@ -41,8 +41,10 @@ import urllib.request
|
|
|
41
41
|
from dataclasses import dataclass, field
|
|
42
42
|
from datetime import datetime, timezone
|
|
43
43
|
|
|
44
|
+
from . import __version__
|
|
45
|
+
|
|
44
46
|
CHUNK = 64 * 1024
|
|
45
|
-
USER_AGENT = "withcache-cache/
|
|
47
|
+
USER_AGENT = f"withcache-cache/{__version__}"
|
|
46
48
|
# Resume budget for a single store_from_origin call. A truncated
|
|
47
49
|
# upstream stream re-fetches with ``Range: bytes=<got>-`` so the
|
|
48
50
|
# next attempt picks up where the cut happened. Five tries cover
|
|
@@ -62,6 +64,18 @@ def now_iso() -> str:
|
|
|
62
64
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
63
65
|
|
|
64
66
|
|
|
67
|
+
def _age_human(started_at: float, *, now: float | None = None) -> str:
|
|
68
|
+
"""Render seconds-since as a compact ``Ns`` / ``Nm`` / ``Nh`` string for
|
|
69
|
+
the streams table. ``now`` is injectable so tests don't need
|
|
70
|
+
monkeypatching ``time.time`` to assert formatting."""
|
|
71
|
+
elapsed = int(max(0.0, (now if now is not None else time.time()) - started_at))
|
|
72
|
+
if elapsed < 60:
|
|
73
|
+
return f"{elapsed}s"
|
|
74
|
+
if elapsed < 3600:
|
|
75
|
+
return f"{elapsed // 60}m{elapsed % 60:02d}s"
|
|
76
|
+
return f"{elapsed // 3600}h{(elapsed % 3600) // 60:02d}m"
|
|
77
|
+
|
|
78
|
+
|
|
65
79
|
def human_size(n: int) -> str:
|
|
66
80
|
f = float(n)
|
|
67
81
|
for unit in ("B", "KiB", "MiB", "GiB", "TiB"):
|
|
@@ -458,6 +472,65 @@ class TruncatedDownload(Exception):
|
|
|
458
472
|
"""
|
|
459
473
|
|
|
460
474
|
|
|
475
|
+
@dataclass
|
|
476
|
+
class Stream:
|
|
477
|
+
"""One in-flight blob serve. Lives in memory only for the duration of
|
|
478
|
+
the response: registered before the first byte goes out, deregistered
|
|
479
|
+
in a finally block. Operator visibility into "what is the cache
|
|
480
|
+
currently uploading, and to whom" without touching the kernel's
|
|
481
|
+
/proc/net/tcp or the access log.
|
|
482
|
+
"""
|
|
483
|
+
|
|
484
|
+
id: int
|
|
485
|
+
url: str
|
|
486
|
+
client: str # ``ip:port`` of the consumer
|
|
487
|
+
started_at: float
|
|
488
|
+
bytes_sent: int = 0
|
|
489
|
+
total: int | None = None # known up front from the blob row
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
class StreamRegistry:
|
|
493
|
+
"""Thread-safe registry of in-flight blob serves. Reads (snapshot for
|
|
494
|
+
the operator dash) and writes (start / progress / finish from
|
|
495
|
+
request handler threads) all serialised on a single lock; the
|
|
496
|
+
contention window is the few microseconds of a dict mutation, and
|
|
497
|
+
progress updates are batched at one per chunk (see PROGRESS_STRIDE)
|
|
498
|
+
so a 4 GiB stream is ~64k updates, not millions.
|
|
499
|
+
"""
|
|
500
|
+
|
|
501
|
+
PROGRESS_STRIDE = 16 # update bytes_sent every N chunks (~1 MiB at CHUNK=64K)
|
|
502
|
+
|
|
503
|
+
def __init__(self) -> None:
|
|
504
|
+
self._ids = itertools.count(1)
|
|
505
|
+
self._lock = threading.Lock()
|
|
506
|
+
self._active: dict[int, Stream] = {}
|
|
507
|
+
|
|
508
|
+
def start(self, url: str, client: str, total: int | None) -> Stream:
|
|
509
|
+
with self._lock:
|
|
510
|
+
s = Stream(
|
|
511
|
+
id=next(self._ids), url=url, client=client, started_at=time.time(), total=total
|
|
512
|
+
)
|
|
513
|
+
self._active[s.id] = s
|
|
514
|
+
return s
|
|
515
|
+
|
|
516
|
+
def bump(self, stream_id: int, bytes_sent: int) -> None:
|
|
517
|
+
# Caller already gates by PROGRESS_STRIDE so this is cheap; the
|
|
518
|
+
# write itself only takes the lock long enough to mutate the int.
|
|
519
|
+
with self._lock:
|
|
520
|
+
s = self._active.get(stream_id)
|
|
521
|
+
if s is not None:
|
|
522
|
+
s.bytes_sent = bytes_sent
|
|
523
|
+
|
|
524
|
+
def finish(self, stream_id: int) -> None:
|
|
525
|
+
with self._lock:
|
|
526
|
+
self._active.pop(stream_id, None)
|
|
527
|
+
|
|
528
|
+
def snapshot(self) -> list[Stream]:
|
|
529
|
+
with self._lock:
|
|
530
|
+
# Stable order: oldest first (matches the queue mental model).
|
|
531
|
+
return sorted(self._active.values(), key=lambda s: s.started_at)
|
|
532
|
+
|
|
533
|
+
|
|
461
534
|
@dataclass
|
|
462
535
|
class Job:
|
|
463
536
|
id: int
|
|
@@ -566,7 +639,7 @@ def _set_progress(job: Job, done: int, total: int | None):
|
|
|
566
639
|
# HTTP handler
|
|
567
640
|
# --------------------------------------------------------------------------
|
|
568
641
|
class Handler(http.server.BaseHTTPRequestHandler):
|
|
569
|
-
server_version = "withcache/
|
|
642
|
+
server_version = f"withcache/{__version__}"
|
|
570
643
|
protocol_version = "HTTP/1.1"
|
|
571
644
|
|
|
572
645
|
@property
|
|
@@ -585,6 +658,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
585
658
|
def auto_fetch(self) -> bool:
|
|
586
659
|
return self.server.auto_fetch # type: ignore[attr-defined]
|
|
587
660
|
|
|
661
|
+
@property
|
|
662
|
+
def streams(self) -> StreamRegistry:
|
|
663
|
+
return self.server.streams # type: ignore[attr-defined]
|
|
664
|
+
|
|
588
665
|
def log_message(self, format, *args): # quieter, single-line
|
|
589
666
|
print(f"{self.address_string()} - {format % args}", flush=True)
|
|
590
667
|
|
|
@@ -777,17 +854,39 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
777
854
|
self.send_header("X-Withcache-Sha256", row["sha256"])
|
|
778
855
|
self.end_headers()
|
|
779
856
|
if head_only:
|
|
780
|
-
return # the shim's HEAD probe
|
|
857
|
+
return # the shim's HEAD probe (not a served download, so don't count it)
|
|
781
858
|
self.store.record_hit(row["key"])
|
|
859
|
+
# Register the stream BEFORE we open the file so an operator
|
|
860
|
+
# watching the dash sees the serve immediately (even if the
|
|
861
|
+
# disk read stalls). The handler runs on a worker thread per
|
|
862
|
+
# the ThreadingHTTPServer mixin, so the registry sees
|
|
863
|
+
# concurrent calls; StreamRegistry serialises on its own lock.
|
|
864
|
+
client = f"{self.client_address[0]}:{self.client_address[1]}"
|
|
865
|
+
stream = self.streams.start(url=url, client=client, total=row["size"])
|
|
782
866
|
try:
|
|
783
867
|
with open(path, "rb") as f:
|
|
868
|
+
sent = 0
|
|
869
|
+
ticks = 0
|
|
784
870
|
while True:
|
|
785
871
|
chunk = f.read(CHUNK)
|
|
786
872
|
if not chunk:
|
|
787
873
|
break
|
|
788
874
|
self.wfile.write(chunk)
|
|
875
|
+
sent += len(chunk)
|
|
876
|
+
ticks += 1
|
|
877
|
+
# Batched progress update: every 16 chunks (~1 MiB
|
|
878
|
+
# at CHUNK=64K) is plenty for a 1 Hz dashboard and
|
|
879
|
+
# keeps lock-contention sane on a busy box.
|
|
880
|
+
if ticks % StreamRegistry.PROGRESS_STRIDE == 0:
|
|
881
|
+
self.streams.bump(stream.id, sent)
|
|
882
|
+
# Final position so the dash's last frame shows the
|
|
883
|
+
# serve completing at the declared total, not at
|
|
884
|
+
# whatever the last batched update happened to be.
|
|
885
|
+
self.streams.bump(stream.id, sent)
|
|
789
886
|
except (BrokenPipeError, ConnectionResetError):
|
|
790
887
|
pass # client went away mid-stream
|
|
888
|
+
finally:
|
|
889
|
+
self.streams.finish(stream.id)
|
|
791
890
|
|
|
792
891
|
# -- helpers -----------------------------------------------------------
|
|
793
892
|
def read_form(self) -> dict:
|
|
@@ -859,7 +958,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
859
958
|
return f"""{self._head("withcache — login")}
|
|
860
959
|
<body><main class="container">
|
|
861
960
|
<article style="max-width: 24rem; margin: 4rem auto;">
|
|
862
|
-
<hgroup
|
|
961
|
+
<hgroup>
|
|
962
|
+
<h2>withcache <small class="mono">v{html.escape(__version__)}</small></h2>
|
|
963
|
+
<p>operator login</p>
|
|
964
|
+
</hgroup>
|
|
863
965
|
{err}
|
|
864
966
|
<form method="post" action="/ui/login">
|
|
865
967
|
<input type="password" name="password" placeholder="Admin password" autofocus required>
|
|
@@ -879,7 +981,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
879
981
|
return f"""{self._head("withcache cache-host")}
|
|
880
982
|
<body><main class="container">
|
|
881
983
|
<nav>
|
|
882
|
-
<ul><li
|
|
984
|
+
<ul><li>
|
|
985
|
+
<strong>withcache</strong> <small>cache-host</small>
|
|
986
|
+
<small class="mono">v{html.escape(__version__)}</small>
|
|
987
|
+
</li></ul>
|
|
883
988
|
<ul>
|
|
884
989
|
<li><progress id="spin" class="htmx-indicator"></progress></li>
|
|
885
990
|
{logout}
|
|
@@ -895,7 +1000,15 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
895
1000
|
</fieldset>
|
|
896
1001
|
</form>
|
|
897
1002
|
|
|
898
|
-
|
|
1003
|
+
<!-- The hx-trigger gates polling on the user NOT having an active
|
|
1004
|
+
text selection, so highlight-and-copy a URL out of a table cell
|
|
1005
|
+
isn't wiped by the 1 Hz refresh. ``isCollapsed`` is true when
|
|
1006
|
+
there's no selection or the caret is a zero-width point; once
|
|
1007
|
+
the operator releases / clears the selection polling resumes
|
|
1008
|
+
on the next 1 s tick. -->
|
|
1009
|
+
<div id="dash" hx-get="/admin/dash"
|
|
1010
|
+
hx-trigger="load, every 1s [document.getSelection().isCollapsed]"
|
|
1011
|
+
hx-swap="innerHTML">
|
|
899
1012
|
{self.render_dash()}
|
|
900
1013
|
</div>
|
|
901
1014
|
</main></body></html>"""
|
|
@@ -905,11 +1018,62 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
905
1018
|
jobs = self.mgr.list()
|
|
906
1019
|
misses = self.store.list_misses()
|
|
907
1020
|
blobs = self.store.list_blobs()
|
|
1021
|
+
streams = self.streams.snapshot()
|
|
908
1022
|
used = human_size(self.store.total_size())
|
|
909
1023
|
if self.store.max_bytes:
|
|
910
1024
|
used += f" / {human_size(self.store.max_bytes)}"
|
|
911
1025
|
full = "" if self.store.has_capacity() else " · <strong>cache full</strong>"
|
|
912
1026
|
|
|
1027
|
+
# Tabs are pure-CSS via :target. The URL hash names the active
|
|
1028
|
+
# section; htmx innerHTML-replacement of #dash leaves the hash
|
|
1029
|
+
# alone, so the operator's tab choice survives every refresh.
|
|
1030
|
+
# ``body:not(:has(section:target))`` selects the default tab
|
|
1031
|
+
# when no hash is present; :has() lands cleanly on Chrome 105+,
|
|
1032
|
+
# Firefox 121+, Safari 15.4+, which is the whole modern web by
|
|
1033
|
+
# the time this ships in 2026.
|
|
1034
|
+
tab_style = """
|
|
1035
|
+
<style>
|
|
1036
|
+
nav.tabs { margin: 1rem 0 .25rem; border-bottom: 1px solid var(--pico-muted-border-color); }
|
|
1037
|
+
nav.tabs ul { display: flex; gap: 0; padding: 0; margin: 0; list-style: none; }
|
|
1038
|
+
nav.tabs li { margin: 0; }
|
|
1039
|
+
nav.tabs a {
|
|
1040
|
+
display: inline-block; padding: .45rem .9rem; text-decoration: none;
|
|
1041
|
+
color: var(--pico-muted-color); border-bottom: 2px solid transparent;
|
|
1042
|
+
margin-bottom: -1px; font-size: .9rem;
|
|
1043
|
+
}
|
|
1044
|
+
nav.tabs a:hover { color: var(--pico-color); }
|
|
1045
|
+
section.tab { display: none; padding-top: .75rem; }
|
|
1046
|
+
section.tab:target { display: block; }
|
|
1047
|
+
body:not(:has(section.tab:target)) section.tab#tab-streams { display: block; }
|
|
1048
|
+
body:has(#tab-streams:target) nav.tabs a[href="#tab-streams"],
|
|
1049
|
+
body:has(#tab-downloads:target) nav.tabs a[href="#tab-downloads"],
|
|
1050
|
+
body:has(#tab-misses:target) nav.tabs a[href="#tab-misses"],
|
|
1051
|
+
body:has(#tab-cached:target) nav.tabs a[href="#tab-cached"] {
|
|
1052
|
+
color: var(--pico-color);
|
|
1053
|
+
border-bottom-color: var(--pico-primary, #0172ad);
|
|
1054
|
+
font-weight: 600;
|
|
1055
|
+
}
|
|
1056
|
+
body:not(:has(section.tab:target)) nav.tabs a[href="#tab-streams"] {
|
|
1057
|
+
color: var(--pico-color);
|
|
1058
|
+
border-bottom-color: var(--pico-primary, #0172ad);
|
|
1059
|
+
font-weight: 600;
|
|
1060
|
+
}
|
|
1061
|
+
</style>
|
|
1062
|
+
"""
|
|
1063
|
+
|
|
1064
|
+
stream_rows = (
|
|
1065
|
+
"".join(
|
|
1066
|
+
f"""<tr>
|
|
1067
|
+
<td class="url">{html.escape(s.url)}</td>
|
|
1068
|
+
<td class="mono"><small>{html.escape(s.client)}</small></td>
|
|
1069
|
+
<td>{self._stream_progress_cell(s)}</td>
|
|
1070
|
+
<td><small>{_age_human(s.started_at)}</small></td>
|
|
1071
|
+
</tr>"""
|
|
1072
|
+
for s in streams
|
|
1073
|
+
)
|
|
1074
|
+
or '<tr><td colspan="4"><em>No active streams.</em></td></tr>'
|
|
1075
|
+
)
|
|
1076
|
+
|
|
913
1077
|
job_rows = (
|
|
914
1078
|
"".join(self._job_row(j) for j in jobs)
|
|
915
1079
|
or '<tr><td colspan="4"><em>No downloads yet.</em></td></tr>'
|
|
@@ -960,35 +1124,76 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|
|
960
1124
|
or '<tr><td colspan="7"><em>Cache is empty.</em></td></tr>'
|
|
961
1125
|
)
|
|
962
1126
|
|
|
1127
|
+
# Per-tab counts let the operator see at a glance whether each
|
|
1128
|
+
# section is empty without flipping to it.
|
|
1129
|
+
nstreams = len(streams)
|
|
1130
|
+
njobs = len(jobs)
|
|
1131
|
+
|
|
963
1132
|
return f"""
|
|
964
1133
|
<p><small>{nblobs} cached ({used}){full} · {nmisses} pending miss(es)</small></p>
|
|
965
|
-
|
|
966
|
-
<
|
|
967
|
-
<
|
|
968
|
-
<
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
<
|
|
974
|
-
<
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
</
|
|
990
|
-
<
|
|
991
|
-
|
|
1134
|
+
{tab_style}
|
|
1135
|
+
<nav class="tabs"><ul>
|
|
1136
|
+
<li><a href="#tab-streams">Streams ({nstreams})</a></li>
|
|
1137
|
+
<li><a href="#tab-downloads">Downloads ({njobs})</a></li>
|
|
1138
|
+
<li><a href="#tab-misses">Misses ({nmisses})</a></li>
|
|
1139
|
+
<li><a href="#tab-cached">Cached ({nblobs})</a></li>
|
|
1140
|
+
</ul></nav>
|
|
1141
|
+
|
|
1142
|
+
<section id="tab-streams" class="tab">
|
|
1143
|
+
<figure><table class="striped">
|
|
1144
|
+
<thead><tr>
|
|
1145
|
+
<th>URL</th><th>Client</th><th>Progress</th><th>Age</th>
|
|
1146
|
+
</tr></thead>
|
|
1147
|
+
<tbody>{stream_rows}</tbody>
|
|
1148
|
+
</table></figure>
|
|
1149
|
+
</section>
|
|
1150
|
+
|
|
1151
|
+
<section id="tab-downloads" class="tab">
|
|
1152
|
+
<div class="row">
|
|
1153
|
+
<small>Auto-fetch workers feeding the cache.</small>
|
|
1154
|
+
<form hx-post="/admin/clear" hx-target="#dash" hx-swap="innerHTML" style="margin:0">
|
|
1155
|
+
<button type="submit" class="secondary outline" style="width:auto;padding:.2rem .7rem">
|
|
1156
|
+
Clear finished</button>
|
|
1157
|
+
</form>
|
|
1158
|
+
</div>
|
|
1159
|
+
<figure><table class="striped">
|
|
1160
|
+
<thead><tr><th>Artifact</th><th>Progress</th><th>Status</th><th></th></tr></thead>
|
|
1161
|
+
<tbody>{job_rows}</tbody>
|
|
1162
|
+
</table></figure>
|
|
1163
|
+
</section>
|
|
1164
|
+
|
|
1165
|
+
<section id="tab-misses" class="tab">
|
|
1166
|
+
<figure><table class="striped">
|
|
1167
|
+
<thead><tr>
|
|
1168
|
+
<th>URL</th><th class="num">Misses</th><th>Last seen</th><th>Action</th>
|
|
1169
|
+
</tr></thead>
|
|
1170
|
+
<tbody>{miss_rows}</tbody>
|
|
1171
|
+
</table></figure>
|
|
1172
|
+
</section>
|
|
1173
|
+
|
|
1174
|
+
<section id="tab-cached" class="tab">
|
|
1175
|
+
<figure><table class="striped">
|
|
1176
|
+
<thead><tr>
|
|
1177
|
+
<th>URL</th><th>Size</th><th class="num">Hits</th><th class="num">Misses</th>
|
|
1178
|
+
<th>SHA-256</th><th>Fetched</th><th>Action</th>
|
|
1179
|
+
</tr></thead>
|
|
1180
|
+
<tbody>{blob_rows}</tbody>
|
|
1181
|
+
</table></figure>
|
|
1182
|
+
</section>"""
|
|
1183
|
+
|
|
1184
|
+
def _stream_progress_cell(self, s: Stream) -> str:
|
|
1185
|
+
"""One progress cell for an active stream: a <progress> bar when the
|
|
1186
|
+
total is known (always for a cached blob, since the size came off
|
|
1187
|
+
the row), with a small ``sent / total`` line under it. Falls back
|
|
1188
|
+
to bytes-only when total somehow went missing."""
|
|
1189
|
+
if s.total is None or s.total <= 0:
|
|
1190
|
+
return f'<small class="mono">{human_size(s.bytes_sent)}</small>'
|
|
1191
|
+
pct = min(100, int(s.bytes_sent * 100 / s.total))
|
|
1192
|
+
return (
|
|
1193
|
+
f'<progress value="{s.bytes_sent}" max="{s.total}"></progress>'
|
|
1194
|
+
f'<br><small class="mono">{human_size(s.bytes_sent)} / '
|
|
1195
|
+
f"{human_size(s.total)} ({pct}%)</small>"
|
|
1196
|
+
)
|
|
992
1197
|
|
|
993
1198
|
def _job_row(self, j: Job) -> str:
|
|
994
1199
|
name = os.path.basename(urllib.parse.urlsplit(j.url).path) or j.url
|
|
@@ -1066,6 +1271,7 @@ def main():
|
|
|
1066
1271
|
httpd.auth = auth # type: ignore[attr-defined]
|
|
1067
1272
|
httpd.mgr = mgr # type: ignore[attr-defined]
|
|
1068
1273
|
httpd.auto_fetch = not args.curate # type: ignore[attr-defined]
|
|
1274
|
+
httpd.streams = StreamRegistry() # type: ignore[attr-defined]
|
|
1069
1275
|
print(
|
|
1070
1276
|
f"withcache cache-host on http://{args.host}:{args.port} "
|
|
1071
1277
|
f"(data={store.data_dir}, keep_query={args.keep_query}, workers={args.workers}, "
|
|
@@ -370,6 +370,113 @@ class TestRangeResumeOnTruncation(unittest.TestCase):
|
|
|
370
370
|
self.assertTrue(any(d > half for d, _ in observed))
|
|
371
371
|
|
|
372
372
|
|
|
373
|
+
# --------------------------------------------------------------------------
|
|
374
|
+
# StreamRegistry: in-flight blob-serve registry powering the operator dash's
|
|
375
|
+
# "Streams" tab. Validates thread-safety + snapshot ordering + lifecycle.
|
|
376
|
+
# --------------------------------------------------------------------------
|
|
377
|
+
class TestStreamRegistry(unittest.TestCase):
|
|
378
|
+
def test_start_assigns_unique_ids_and_records_metadata(self):
|
|
379
|
+
reg = server.StreamRegistry()
|
|
380
|
+
a = reg.start(url="http://o/x", client="10.0.0.1:5000", total=1024)
|
|
381
|
+
b = reg.start(url="http://o/y", client="10.0.0.2:5000", total=None)
|
|
382
|
+
self.assertNotEqual(a.id, b.id)
|
|
383
|
+
self.assertEqual(a.url, "http://o/x")
|
|
384
|
+
self.assertEqual(a.client, "10.0.0.1:5000")
|
|
385
|
+
self.assertEqual(a.total, 1024)
|
|
386
|
+
self.assertIsNone(b.total)
|
|
387
|
+
# both visible in snapshot, oldest-first
|
|
388
|
+
snap = reg.snapshot()
|
|
389
|
+
self.assertEqual([s.id for s in snap], [a.id, b.id])
|
|
390
|
+
|
|
391
|
+
def test_bump_updates_bytes_sent_for_known_id(self):
|
|
392
|
+
reg = server.StreamRegistry()
|
|
393
|
+
s = reg.start(url="http://o/x", client="c", total=100)
|
|
394
|
+
reg.bump(s.id, 42)
|
|
395
|
+
self.assertEqual(reg.snapshot()[0].bytes_sent, 42)
|
|
396
|
+
# later bump moves forward; the registry doesn't enforce
|
|
397
|
+
# monotonicity (the handler is the only caller and it is monotonic)
|
|
398
|
+
reg.bump(s.id, 99)
|
|
399
|
+
self.assertEqual(reg.snapshot()[0].bytes_sent, 99)
|
|
400
|
+
|
|
401
|
+
def test_bump_unknown_id_is_a_silent_noop(self):
|
|
402
|
+
"""A finish() that races against a final bump() must not crash:
|
|
403
|
+
the bump arrives, finds the id gone, returns silently. The
|
|
404
|
+
handler relies on this so its tight write loop doesn't have to
|
|
405
|
+
special-case the race."""
|
|
406
|
+
reg = server.StreamRegistry()
|
|
407
|
+
reg.bump(99999, 7)
|
|
408
|
+
self.assertEqual(reg.snapshot(), [])
|
|
409
|
+
|
|
410
|
+
def test_finish_removes_from_snapshot(self):
|
|
411
|
+
reg = server.StreamRegistry()
|
|
412
|
+
s = reg.start(url="http://o/x", client="c", total=10)
|
|
413
|
+
self.assertEqual(len(reg.snapshot()), 1)
|
|
414
|
+
reg.finish(s.id)
|
|
415
|
+
self.assertEqual(reg.snapshot(), [])
|
|
416
|
+
# second finish is a no-op (handler's finally: block can fire twice)
|
|
417
|
+
reg.finish(s.id)
|
|
418
|
+
self.assertEqual(reg.snapshot(), [])
|
|
419
|
+
|
|
420
|
+
def test_snapshot_returns_a_copy_not_the_live_dict(self):
|
|
421
|
+
"""Operator code iterating a snapshot must not see torn state when
|
|
422
|
+
a worker thread starts/finishes a stream mid-iteration."""
|
|
423
|
+
reg = server.StreamRegistry()
|
|
424
|
+
s = reg.start(url="http://o/x", client="c", total=10)
|
|
425
|
+
snap = reg.snapshot()
|
|
426
|
+
reg.finish(s.id)
|
|
427
|
+
reg.start(url="http://o/y", client="c", total=10)
|
|
428
|
+
# snapshot taken before the mutations stays put
|
|
429
|
+
self.assertEqual(len(snap), 1)
|
|
430
|
+
self.assertEqual(snap[0].url, "http://o/x")
|
|
431
|
+
|
|
432
|
+
def test_concurrent_start_finish_under_load(self):
|
|
433
|
+
"""Hammer the lock with 500 starts + 500 finishes from 10 threads;
|
|
434
|
+
the registry must end empty with no exception leaks."""
|
|
435
|
+
reg = server.StreamRegistry()
|
|
436
|
+
errors: list[BaseException] = []
|
|
437
|
+
|
|
438
|
+
def churn():
|
|
439
|
+
try:
|
|
440
|
+
for _ in range(50):
|
|
441
|
+
s = reg.start(url="http://o/x", client="c", total=10)
|
|
442
|
+
reg.bump(s.id, 5)
|
|
443
|
+
reg.finish(s.id)
|
|
444
|
+
except BaseException as e:
|
|
445
|
+
errors.append(e)
|
|
446
|
+
|
|
447
|
+
threads = [threading.Thread(target=churn) for _ in range(10)]
|
|
448
|
+
for t in threads:
|
|
449
|
+
t.start()
|
|
450
|
+
for t in threads:
|
|
451
|
+
t.join()
|
|
452
|
+
self.assertEqual(errors, [])
|
|
453
|
+
self.assertEqual(reg.snapshot(), [])
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class TestAgeHuman(unittest.TestCase):
|
|
457
|
+
"""``_age_human`` renders elapsed seconds into the compact form the
|
|
458
|
+
Streams table cell shows. Inject ``now`` so the test doesn't have
|
|
459
|
+
to monkeypatch ``time.time``."""
|
|
460
|
+
|
|
461
|
+
def test_seconds_only(self):
|
|
462
|
+
self.assertEqual(server._age_human(100.0, now=100.0), "0s")
|
|
463
|
+
self.assertEqual(server._age_human(100.0, now=159.0), "59s")
|
|
464
|
+
|
|
465
|
+
def test_minutes_pad_seconds(self):
|
|
466
|
+
self.assertEqual(server._age_human(100.0, now=160.0), "1m00s")
|
|
467
|
+
self.assertEqual(server._age_human(100.0, now=222.0), "2m02s")
|
|
468
|
+
|
|
469
|
+
def test_hours_pad_minutes(self):
|
|
470
|
+
self.assertEqual(server._age_human(0.0, now=3600.0), "1h00m")
|
|
471
|
+
self.assertEqual(server._age_human(0.0, now=3661.0), "1h01m")
|
|
472
|
+
self.assertEqual(server._age_human(0.0, now=7320.0), "2h02m")
|
|
473
|
+
|
|
474
|
+
def test_negative_clamps_to_zero(self):
|
|
475
|
+
# Started-at in the future (clock skew, replayed snapshot) renders
|
|
476
|
+
# as 0s rather than a confusing negative.
|
|
477
|
+
self.assertEqual(server._age_human(200.0, now=100.0), "0s")
|
|
478
|
+
|
|
479
|
+
|
|
373
480
|
# --------------------------------------------------------------------------
|
|
374
481
|
# _shim: URL detection, rewrite, real-tool resolution, env, path-encoding
|
|
375
482
|
# --------------------------------------------------------------------------
|
|
@@ -536,6 +643,7 @@ def _start_withcache(auto_fetch=False):
|
|
|
536
643
|
httpd.auth = server.Auth(b"k", None) # auth disabled -> read path open
|
|
537
644
|
httpd.mgr = server.DownloadManager(store, workers=1)
|
|
538
645
|
httpd.auto_fetch = auto_fetch
|
|
646
|
+
httpd.streams = server.StreamRegistry()
|
|
539
647
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
|
540
648
|
return httpd, store
|
|
541
649
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|