snailmail 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
snailmail/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ """snailmail — a local HTTP file server with injectable latency and bandwidth limits.
2
+
3
+ For benchmarking range / object-store / virtual-chunk reads under realistic network
4
+ conditions. See :class:`LatencyRangeServer`.
5
+ """
6
+
7
+ from importlib.metadata import PackageNotFoundError, version
8
+
9
+ from snailmail.bandwidth import AsyncSharedPipe
10
+ from snailmail.latency import Exponential, Fixed, LatencyDist, LogNormal, Normal
11
+ from snailmail.server import LatencyRangeServer
12
+
13
+ try:
14
+ __version__ = version("snailmail") # derived from the git tag at build time (hatch-vcs)
15
+ except PackageNotFoundError: # running from a source tree with no install
16
+ __version__ = "0+unknown"
17
+
18
+ __all__ = [
19
+ "LatencyRangeServer",
20
+ "AsyncSharedPipe",
21
+ "LatencyDist",
22
+ "LogNormal",
23
+ "Normal",
24
+ "Exponential",
25
+ "Fixed",
26
+ ]
snailmail/bandwidth.py ADDED
@@ -0,0 +1,35 @@
1
+ """Bandwidth limiting for responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+
8
+ class AsyncSharedPipe:
9
+ """A FIFO bandwidth limiter modelling ONE shared client downlink (async).
10
+
11
+ Every response's byte transfer is reserved through a single pipe of ``B``
12
+ bytes/s, so aggregate egress can't exceed ``B`` no matter how many requests
13
+ overlap, and over-read directly costs pipe time. Per-request latency stays
14
+ parallel (handled separately); only bytes serialize here. ``B is None`` disables.
15
+ """
16
+
17
+ def __init__(self, bytes_per_s: float | None):
18
+ self.B = bytes_per_s if bytes_per_s and bytes_per_s > 0 else None
19
+ self._lock = asyncio.Lock()
20
+ self._free = 0.0 # loop-clock timestamp the pipe is next free
21
+
22
+ async def transfer(self, nbytes: int) -> None:
23
+ if self.B is None or nbytes <= 0:
24
+ return
25
+ loop = asyncio.get_running_loop()
26
+ async with self._lock:
27
+ start = max(loop.time(), self._free)
28
+ self._free = start + nbytes / self.B
29
+ finish = self._free
30
+ delay = finish - loop.time()
31
+ if delay > 0:
32
+ await asyncio.sleep(delay)
33
+
34
+ def reset(self) -> None:
35
+ self._free = 0.0
snailmail/cli.py ADDED
@@ -0,0 +1,130 @@
1
+ """The ``snailmail`` command-line entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import threading
8
+ from importlib.metadata import version
9
+
10
+ from snailmail.latency import Exponential, Fixed, LatencyDist, LogNormal, Normal
11
+ from snailmail.server import LatencyRangeServer
12
+
13
+ # CLI param -> which --dist owns it. A param set for the wrong dist is a user error,
14
+ # not silently ignored, so the realized latency always matches what was asked for.
15
+ _DIST_PARAMS = {
16
+ "mode_ms": "lognormal",
17
+ "sigma": "lognormal",
18
+ "mean_ms": ("normal", "exponential"),
19
+ "std_ms": "normal",
20
+ "value_ms": "fixed",
21
+ }
22
+
23
+
24
+ def _latency_from_args(args: argparse.Namespace, ap: argparse.ArgumentParser) -> LatencyDist | None:
25
+ if args.dist is None:
26
+ for name in _DIST_PARAMS:
27
+ if getattr(args, name) is not None:
28
+ ap.error(f"--{name.replace('_', '-')} requires --dist")
29
+ return None # no injected latency
30
+ for name, owner in _DIST_PARAMS.items():
31
+ owners = (owner,) if isinstance(owner, str) else owner
32
+ if getattr(args, name) is not None and args.dist not in owners:
33
+ ap.error(f"--{name.replace('_', '-')} is not valid with --dist {args.dist}")
34
+ if args.dist == "lognormal":
35
+ if args.mode_ms is None:
36
+ ap.error("--dist lognormal requires --mode-ms")
37
+ opt = {} if args.sigma is None else {"sigma": args.sigma} # else LogNormal's default
38
+ return LogNormal(args.mode_ms, seed=args.seed, **opt)
39
+ if args.dist == "normal":
40
+ if args.mean_ms is None:
41
+ ap.error("--dist normal requires --mean-ms")
42
+ opt = {} if args.std_ms is None else {"std_ms": args.std_ms} # else Normal's default
43
+ return Normal(args.mean_ms, seed=args.seed, **opt)
44
+ if args.dist == "exponential":
45
+ if args.mean_ms is None:
46
+ ap.error("--dist exponential requires --mean-ms")
47
+ return Exponential(args.mean_ms, seed=args.seed)
48
+ if args.value_ms is None: # fixed
49
+ ap.error("--dist fixed requires --value-ms")
50
+ return Fixed(args.value_ms)
51
+
52
+
53
+ def _parser() -> argparse.ArgumentParser:
54
+ ap = argparse.ArgumentParser(
55
+ prog="snailmail",
56
+ description="Serve a directory over HTTP with injected latency "
57
+ "+ bandwidth limits, for benchmarking. Binds 127.0.0.1 and serves until interrupted.",
58
+ epilog="examples:\n"
59
+ " snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5\n"
60
+ " snailmail ./store --dist normal --mean-ms 45 --std-ms 10\n"
61
+ " snailmail ./store --dist exponential --mean-ms 45\n"
62
+ " snailmail ./store --dist fixed --value-ms 20\n"
63
+ " snailmail ./store --bandwidth-mbs 100 --port 8080 --json # no latency; JSON address line",
64
+ formatter_class=argparse.RawDescriptionHelpFormatter,
65
+ )
66
+ ap.add_argument("--version", action="version", version=f"%(prog)s {version('snailmail')}")
67
+ ap.add_argument("root", help="directory to serve")
68
+ ap.add_argument(
69
+ "--dist",
70
+ choices=["lognormal", "normal", "exponential", "fixed"],
71
+ help="latency distribution; omit for no injected latency",
72
+ )
73
+ ap.add_argument("--mode-ms", type=float, help="[lognormal] PDF mode (peak), ms")
74
+ ap.add_argument("--sigma", type=float, help="[lognormal] log-scale shape (default 0.5)")
75
+ ap.add_argument("--mean-ms", type=float, help="[normal, exponential] mean latency, ms")
76
+ ap.add_argument("--std-ms", type=float, help="[normal] standard deviation, ms")
77
+ ap.add_argument("--value-ms", type=float, help="[fixed] deterministic latency, ms")
78
+ ap.add_argument(
79
+ "--seed", type=int, default=None, help="RNG seed for the latency pool (reproducible draws)"
80
+ )
81
+ ap.add_argument(
82
+ "--bandwidth-mbs", type=float, default=None, help="shared-pipe MB/s; omit = unlimited"
83
+ )
84
+ ap.add_argument("--port", type=int, default=0, help="TCP port (0 = ephemeral)")
85
+ ap.add_argument(
86
+ "--json",
87
+ action="store_true",
88
+ help="emit the server description as one line of JSON instead of the banner",
89
+ )
90
+ return ap
91
+
92
+
93
+ def main() -> None:
94
+ ap = _parser()
95
+ args = ap.parse_args()
96
+
97
+ latency = _latency_from_args(args, ap)
98
+ try:
99
+ server = LatencyRangeServer(
100
+ args.root,
101
+ latency=latency,
102
+ bandwidth_mbs=args.bandwidth_mbs,
103
+ port=args.port,
104
+ ).start()
105
+ except NotADirectoryError as exc:
106
+ ap.error(str(exc))
107
+ # Flush so a consumer reading a pipe gets the bound address immediately, not after
108
+ # block-buffering — it can't predict an ephemeral port otherwise.
109
+ if args.json:
110
+ print(json.dumps(server.describe()), flush=True)
111
+ else:
112
+ print(f"serving {server.root}/ ({len(server.files())} files)")
113
+ print(f"base : {server.base}")
114
+ print(f"server : {server.describe()}")
115
+ print(f"realized: {server.realized_percentiles()}")
116
+ print("Ctrl-C to stop.", flush=True)
117
+ try:
118
+ threading.Event().wait() # block until Ctrl-C
119
+ except KeyboardInterrupt:
120
+ pass
121
+ finally:
122
+ server.stop()
123
+ print(
124
+ f"\nstopped. served {server.n_gets} GETs, {server.total_bytes} bytes, "
125
+ f"peak concurrency {server.max_in_flight}."
126
+ )
127
+
128
+
129
+ if __name__ == "__main__":
130
+ main()
snailmail/latency.py ADDED
@@ -0,0 +1,183 @@
1
+ """Per-request latency distributions.
2
+
3
+ Object-store GET RTT is well-modelled by a lognormal (a unimodal hump with a long
4
+ right tail), so :class:`LogNormal` is the recommended default; :class:`Normal`,
5
+ :class:`Exponential`, and :class:`Fixed` are there for comparison.
6
+
7
+ Every distribution pre-generates a sample pool once with vectorised numpy and serves
8
+ it round-robin, so :meth:`LatencyDist.draw_s` is O(1) with no per-request RNG and the
9
+ realised distribution is exactly reproducible for a given seed.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import math
15
+
16
+ import numpy as np
17
+
18
+
19
+ class LatencyDist:
20
+ """Base class: a pre-generated sample pool served round-robin.
21
+
22
+ Subclasses set either ``_pool`` (a numpy array of *seconds*) via :meth:`_fill` or
23
+ ``_const_s`` (a scalar, for a degenerate zero-spread distribution), and implement
24
+ :meth:`describe`.
25
+ """
26
+
27
+ def __init__(self):
28
+ # A None pool means degenerate (zero-spread): draw a constant instead.
29
+ self._pool: np.ndarray | None = None
30
+ self._const_s = 0.0
31
+ self._i = 0
32
+
33
+ def _fill(self, samples_ms: np.ndarray) -> None:
34
+ # Latency can't be negative; truncate at 0 (matters for Normal's left tail).
35
+ self._pool = np.clip(np.asarray(samples_ms, dtype=float), 0.0, None) / 1e3
36
+
37
+ def _set_const_ms(self, ms: float) -> None:
38
+ self._const_s = max(0.0, ms) / 1e3
39
+
40
+ def draw_s(self) -> float:
41
+ """Next latency (seconds). O(1); single-loop-thread, so the index is safe."""
42
+ if self._pool is None:
43
+ return self._const_s
44
+ v = float(self._pool[self._i])
45
+ self._i = (self._i + 1) % self._pool.size
46
+ return v
47
+
48
+ def percentiles(self) -> dict:
49
+ """Realised p10/p50/p90/p99 (ms) of the pool actually served."""
50
+ if self._pool is None:
51
+ v = round(self._const_s * 1e3, 3)
52
+ return {"p10_ms": v, "p50_ms": v, "p90_ms": v, "p99_ms": v, "n": 1 if v else 0}
53
+ p = np.percentile(self._pool * 1e3, [10, 50, 90, 99])
54
+ return {
55
+ "p10_ms": round(float(p[0]), 3),
56
+ "p50_ms": round(float(p[1]), 3),
57
+ "p90_ms": round(float(p[2]), 3),
58
+ "p99_ms": round(float(p[3]), 3),
59
+ "n": int(self._pool.size),
60
+ }
61
+
62
+ def describe(self) -> dict:
63
+ raise NotImplementedError
64
+
65
+
66
+ class Fixed(LatencyDist):
67
+ """Deterministic latency: every request sleeps exactly ``value_ms``."""
68
+
69
+ def __init__(self, value_ms: float = 0.0):
70
+ super().__init__()
71
+ self.value_ms = float(value_ms)
72
+ self._set_const_ms(self.value_ms)
73
+
74
+ def describe(self) -> dict:
75
+ return {"dist": "fixed", "value_ms": round(self.value_ms, 4)}
76
+
77
+
78
+ class LogNormal(LatencyDist):
79
+ """Lognormal latency, parameterised by the PDF **mode** (peak) and shape ``sigma``::
80
+
81
+ mu = ln(mode_ms) + sigma**2 # so the PDF mode == mode_ms
82
+ sleep_ms = LogNormal(mu, sigma)
83
+
84
+ Fits object-store GET RTT well — a unimodal hump with a long right tail. Derived:
85
+ ``median_ms = exp(mu)``; ``mean_ms = exp(mu + sigma**2/2)``. ``mode_ms <= 0`` is
86
+ the degenerate zero-latency reference.
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ mode_ms: float,
92
+ *,
93
+ sigma: float = 0.5,
94
+ seed: int | None = None,
95
+ pool_size: int = 1 << 16,
96
+ ):
97
+ super().__init__()
98
+ self.mode_ms = float(mode_ms)
99
+ self.sigma = float(sigma)
100
+ if self.mode_ms > 0.0:
101
+ self.mu = math.log(self.mode_ms) + self.sigma**2
102
+ self.median_ms = math.exp(self.mu)
103
+ self.mean_ms = math.exp(self.mu + self.sigma**2 / 2.0)
104
+ rng = np.random.default_rng(seed)
105
+ self._fill(rng.lognormal(self.mu, self.sigma, size=pool_size))
106
+ else:
107
+ self.mu = float("nan")
108
+ self.median_ms = self.mean_ms = 0.0
109
+
110
+ def describe(self) -> dict:
111
+ d = {
112
+ "dist": "lognormal",
113
+ "mode_ms": round(self.mode_ms, 4),
114
+ "sigma": self.sigma,
115
+ "degenerate": self._pool is None,
116
+ }
117
+ if self._pool is not None:
118
+ d.update(
119
+ mu=round(self.mu, 6),
120
+ median_ms=round(self.median_ms, 4),
121
+ mean_ms=round(self.mean_ms, 4),
122
+ pool_size=int(self._pool.size),
123
+ )
124
+ return d
125
+
126
+
127
+ class Normal(LatencyDist):
128
+ """Gaussian latency, ``mean_ms`` +/- ``std_ms``, truncated at 0 (no negative draws).
129
+
130
+ ``std_ms <= 0`` is the degenerate deterministic case (sleeps ``mean_ms``).
131
+ """
132
+
133
+ def __init__(
134
+ self,
135
+ mean_ms: float,
136
+ *,
137
+ std_ms: float = 0.0,
138
+ seed: int | None = None,
139
+ pool_size: int = 1 << 16,
140
+ ):
141
+ super().__init__()
142
+ self.mean_ms = float(mean_ms)
143
+ self.std_ms = float(std_ms)
144
+ if self.std_ms > 0.0:
145
+ rng = np.random.default_rng(seed)
146
+ self._fill(rng.normal(self.mean_ms, self.std_ms, size=pool_size))
147
+ else:
148
+ self._set_const_ms(self.mean_ms)
149
+
150
+ def describe(self) -> dict:
151
+ d = {
152
+ "dist": "normal",
153
+ "mean_ms": round(self.mean_ms, 4),
154
+ "std_ms": round(self.std_ms, 4),
155
+ "degenerate": self._pool is None,
156
+ }
157
+ if self._pool is not None:
158
+ d["pool_size"] = int(self._pool.size)
159
+ return d
160
+
161
+
162
+ class Exponential(LatencyDist):
163
+ """Exponential latency with the given ``mean_ms`` (mode at 0; heavy single tail).
164
+
165
+ ``mean_ms <= 0`` is the degenerate zero-latency reference.
166
+ """
167
+
168
+ def __init__(self, mean_ms: float, *, seed: int | None = None, pool_size: int = 1 << 16):
169
+ super().__init__()
170
+ self.mean_ms = float(mean_ms)
171
+ if self.mean_ms > 0.0:
172
+ rng = np.random.default_rng(seed)
173
+ self._fill(rng.exponential(self.mean_ms, size=pool_size))
174
+
175
+ def describe(self) -> dict:
176
+ d = {
177
+ "dist": "exponential",
178
+ "mean_ms": round(self.mean_ms, 4),
179
+ "degenerate": self._pool is None,
180
+ }
181
+ if self._pool is not None:
182
+ d["pool_size"] = int(self._pool.size)
183
+ return d
snailmail/server.py ADDED
@@ -0,0 +1,250 @@
1
+ """A local HTTP server that serves a directory over Range with injected latency.
2
+
3
+ HTTP/Range correctness (206, Content-Range, suffix ranges, 416, conditional
4
+ requests), directory serving, and path-traversal safety are handled by **aiohttp's
5
+ static handler** — snailmail reimplements none of it. It adds only the benchmark
6
+ instrumentation:
7
+
8
+ * per-request latency from a pluggable distribution (:mod:`snailmail.latency`).
9
+ * a shared-pipe BANDWIDTH limiter (:class:`~snailmail.bandwidth.AsyncSharedPipe`)
10
+ modelling one finite client downlink: response bytes are metered through a single
11
+ pipe so aggregate egress is capped and over-read costs real time.
12
+ * server-side counters: GET/request counts, bytes read, requested paths and
13
+ methods, 404 misses, and PEAK concurrency (max requests in flight), so wall-clock
14
+ can be read honestly (not a serial ``n*rtt`` assumption).
15
+
16
+ Serves every file under a root directory by relative path — one object per file, the
17
+ shape of an object-store / Icechunk virtual dataset. Files stream from disk and are
18
+ never loaded into RAM, so arbitrarily large files work. The injected latency is
19
+ *added* to the real (sub-ms, local-SSD) range-read time, so the modelled RTT stays
20
+ dominated by the knob.
21
+
22
+ Consumers must opt into plain HTTP: obstore ``client_options={"allow_http": True}``,
23
+ icechunk ``http_store({"allow_http": "true"})``.
24
+
25
+ LOCAL loopback only (binds 127.0.0.1). Use in-process via :class:`LatencyRangeServer`
26
+ (exposes counters + live :meth:`~LatencyRangeServer.set_latency` /
27
+ :meth:`~LatencyRangeServer.set_bandwidth_mbs`) or as the ``snailmail`` CLI.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import asyncio
33
+ import threading
34
+ from collections import Counter
35
+ from pathlib import Path
36
+
37
+ from aiohttp import web
38
+
39
+ from snailmail.bandwidth import AsyncSharedPipe
40
+ from snailmail.latency import Fixed, LatencyDist
41
+
42
+
43
+ class LatencyRangeServer:
44
+ """Threaded localhost HTTP server: aiohttp Range serving of a directory + latency.
45
+
46
+ Parameters
47
+ ----------
48
+ root: directory to serve. Every file beneath it is reachable at its path
49
+ relative to the root (range- and traversal-safe). To benchmark a
50
+ single file, put it in a directory and serve that.
51
+ latency: per-request latency distribution (a :class:`~snailmail.latency.LatencyDist`,
52
+ e.g. ``LogNormal(mode_ms=45)``); ``None`` injects no latency. Mutable
53
+ via :meth:`set_latency`.
54
+ bandwidth_mbs: shared-pipe bandwidth, MB/s (1 MB = 1e6 bytes); None = unlimited.
55
+ port: TCP port to bind (0 = ephemeral; set a fixed port when a consumer
56
+ mishandles ephemeral ports).
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ root,
62
+ *,
63
+ latency: LatencyDist | None = None,
64
+ bandwidth_mbs: float | None = None,
65
+ port: int = 0,
66
+ ):
67
+ self.root = Path(root)
68
+ if not self.root.is_dir():
69
+ raise NotADirectoryError(f"root must be a directory: {self.root}")
70
+ self._root_resolved = self.root.resolve()
71
+ self.latency = latency if latency is not None else Fixed(0.0)
72
+ self.set_bandwidth_mbs(bandwidth_mbs)
73
+ self._req_port = port
74
+ self.port: int | None = None
75
+ self.total_bytes = 0
76
+ self.n_requests = self.n_gets = self.n_misses = 0
77
+ self.methods: Counter[str] = Counter()
78
+ self.paths: Counter[str] = Counter()
79
+ self._size_cache: dict[str, int] = {} # loop-thread only, so no lock needed
80
+ self._in_flight = self.max_in_flight = 0
81
+ self._lock = threading.Lock()
82
+ self._ready = threading.Event()
83
+ self._loop: asyncio.AbstractEventLoop | None = None
84
+ self._runner: web.AppRunner | None = None
85
+ self._startup_exc: BaseException | None = None
86
+
87
+ # -- accounting (sizes/ranges parsed only for counters/bandwidth; serving is aiohttp's) --
88
+ def _target_size(self, path: str) -> int | None:
89
+ """Size of the file a request path maps to, or None if it resolves to no file.
90
+
91
+ Done here rather than delegated to aiohttp on purpose: its static handler
92
+ computes the same thing but never exposes it before serving, and FileResponse
93
+ only decides a 404 at send time. We need the answer up front — the size to
94
+ meter bandwidth before bytes flow, and the miss to count ``n_misses`` — so we
95
+ do our own safe lookup with the stdlib (no aiohttp internals copied). The
96
+ ``is_relative_to`` guard stops a ``..`` escape from being stat'd and
97
+ mis-counted as a hit when aiohttp would serve it as a 404. Hits are cached by
98
+ request path so repeats skip the filesystem.
99
+ """
100
+ if path in self._size_cache:
101
+ return self._size_cache[path]
102
+ try:
103
+ target = (self.root / path.lstrip("/")).resolve()
104
+ if not target.is_relative_to(self._root_resolved) or not target.is_file():
105
+ return None # traversal escape or missing file => a miss
106
+ size = target.stat().st_size
107
+ except OSError:
108
+ return None
109
+ self._size_cache[path] = size
110
+ return size
111
+
112
+ def _range_bytes(self, request: web.Request, size: int) -> int:
113
+ """Bytes a GET will read against a known file size (pure; no side effects).
114
+
115
+ Uses aiohttp's own ``request.http_range`` parser so this count matches what the
116
+ static handler actually serves; a malformed Range raises ValueError there and
117
+ aiohttp answers 416 (no body), so we count 0.
118
+ """
119
+ try:
120
+ start, stop, _ = request.http_range.indices(size)
121
+ except ValueError:
122
+ return 0
123
+ return max(0, stop - start)
124
+
125
+ def _middleware(self):
126
+ @web.middleware
127
+ async def mw(request: web.Request, handler):
128
+ # FileResponse defers its 404 to send time, so detect misses ourselves up
129
+ # front (it's also the size we need for byte accounting).
130
+ is_read = request.method in ("GET", "HEAD")
131
+ size = self._target_size(request.path) if is_read else None
132
+ with self._lock:
133
+ self.n_requests += 1
134
+ self.methods[request.method] += 1
135
+ self.paths[request.path] += 1
136
+ if request.method == "GET":
137
+ self.n_gets += 1
138
+ if is_read and size is None: # a miss still cost a round trip — count it
139
+ self.n_misses += 1
140
+ self._in_flight += 1
141
+ self.max_in_flight = max(self.max_in_flight, self._in_flight)
142
+ nbytes = 0
143
+ try:
144
+ await asyncio.sleep(self.latency.draw_s()) # the injected RTT
145
+ if request.method == "GET" and size is not None:
146
+ nbytes = self._range_bytes(request, size)
147
+ await self.pipe.transfer(nbytes) # shared-pipe bandwidth
148
+ return await handler(request)
149
+ finally:
150
+ with self._lock:
151
+ self._in_flight -= 1
152
+ self.total_bytes += nbytes
153
+
154
+ return mw
155
+
156
+ async def _start(self):
157
+ app = web.Application(middlewares=[self._middleware()])
158
+ # follow_symlinks=False (the default) keeps serving inside the root, matching
159
+ # the traversal check in _target_size.
160
+ app.router.add_static("/", self.root, follow_symlinks=False)
161
+ self._runner = web.AppRunner(app)
162
+ await self._runner.setup()
163
+ site = web.TCPSite(self._runner, "127.0.0.1", self._req_port)
164
+ await site.start()
165
+ self.port = site._server.sockets[0].getsockname()[1] # aiohttp has no public bound-port API
166
+
167
+ def _serve(self):
168
+ self._loop = asyncio.new_event_loop()
169
+ asyncio.set_event_loop(self._loop)
170
+ try:
171
+ self._loop.run_until_complete(self._start())
172
+ except BaseException as exc: # surface startup failures instead of hanging start()
173
+ self._startup_exc = exc
174
+ self._ready.set()
175
+ return
176
+ self._ready.set()
177
+ self._loop.run_forever()
178
+
179
+ def start(self) -> "LatencyRangeServer":
180
+ threading.Thread(target=self._serve, daemon=True).start()
181
+ self._ready.wait()
182
+ if self._startup_exc is not None:
183
+ raise self._startup_exc
184
+ return self
185
+
186
+ def stop(self) -> None:
187
+ if self._loop:
188
+ self._loop.call_soon_threadsafe(self._loop.stop)
189
+
190
+ @property
191
+ def base(self) -> str:
192
+ """Root URL to point a reader (or object store) at; append a key to it."""
193
+ return f"http://127.0.0.1:{self.port}/"
194
+
195
+ def url(self, key: str) -> str:
196
+ """URL for a single served key, e.g. ``server.url("chunks/0.0.0")``."""
197
+ return f"{self.base}{key.lstrip('/')}"
198
+
199
+ def files(self) -> list[str]:
200
+ """The served keys: relative paths of every file under the root (sorted)."""
201
+ return sorted(str(p.relative_to(self.root)) for p in self.root.rglob("*") if p.is_file())
202
+
203
+ def set_latency(self, latency: LatencyDist) -> None:
204
+ self.latency = latency
205
+
206
+ def set_bandwidth_mbs(self, bandwidth_mbs: float | None) -> None:
207
+ # AsyncSharedPipe owns the "<= 0 means unlimited" rule; read it back for display.
208
+ self.pipe = AsyncSharedPipe(bandwidth_mbs * 1e6 if bandwidth_mbs else None)
209
+ self.bandwidth_mbs = self.pipe.B / 1e6 if self.pipe.B else None
210
+
211
+ def reset_counts(self) -> None:
212
+ with self._lock:
213
+ self.total_bytes = 0
214
+ self.n_requests = self.n_gets = self.n_misses = 0
215
+ self.methods = Counter()
216
+ self.paths = Counter()
217
+ self.max_in_flight = self._in_flight # keep currently-active requests in the new window
218
+ self.pipe.reset()
219
+
220
+ def stats(self) -> dict:
221
+ """Atomic snapshot of the request counters (persists until :meth:`reset_counts`)."""
222
+ with self._lock:
223
+ return {
224
+ "n_gets": self.n_gets,
225
+ "n_requests": self.n_requests,
226
+ "n_misses": self.n_misses,
227
+ "max_in_flight": self.max_in_flight,
228
+ "total_bytes": self.total_bytes,
229
+ "methods": dict(self.methods),
230
+ "paths": dict(self.paths),
231
+ }
232
+
233
+ def describe(self) -> dict:
234
+ return {
235
+ "root": str(self.root),
236
+ "base": self.base,
237
+ "n_files": len(self.files()),
238
+ "port": self.port,
239
+ "latency": self.latency.describe(),
240
+ "bandwidth_mbs": self.bandwidth_mbs,
241
+ }
242
+
243
+ def realized_percentiles(self) -> dict:
244
+ return self.latency.percentiles()
245
+
246
+ def __enter__(self) -> "LatencyRangeServer":
247
+ return self.start()
248
+
249
+ def __exit__(self, *exc) -> None:
250
+ self.stop()
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.4
2
+ Name: snailmail
3
+ Version: 0.1.0
4
+ Summary: A local HTTP server that serves a directory over Range with injectable latency and bandwidth limits, for benchmarking range / object-store / virtual-chunk reads under realistic network conditions.
5
+ Project-URL: Homepage, https://github.com/ianhi/snailmail
6
+ Project-URL: Repository, https://github.com/ianhi/snailmail
7
+ Project-URL: Issues, https://github.com/ianhi/snailmail/issues
8
+ Project-URL: Changelog, https://github.com/ianhi/snailmail/blob/main/CHANGELOG.md
9
+ Author-email: Ian Hunt-Isaak <ian@earthmover.io>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: bandwidth,benchmark,http,icechunk,latency,object-store,range-requests,zarr
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
23
+ Classifier: Topic :: System :: Benchmark
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: aiohttp>=3.9
26
+ Requires-Dist: numpy>=1.24
27
+ Description-Content-Type: text/markdown
28
+
29
+ # snailmail
30
+
31
+ A local HTTP server that serves a directory over HTTP Range, injecting per-request
32
+ latency and a bandwidth cap, and counts GETs and peak concurrency.
33
+
34
+ Use it to benchmark range-based readers — object stores, Zarr/Icechunk virtual
35
+ chunks, tiled image formats — under realistic network conditions, on your laptop,
36
+ with no cloud and no root.
37
+
38
+ ## Why you'd want it
39
+
40
+ Local disk hides the cost that dominates remote reads: network round-trips.
41
+ A read pattern that finishes instantly against a warm page cache can take
42
+ minutes of serial round-trips against object storage. snailmail adds a
43
+ per-request latency draw and a shared bandwidth pipe so you can measure how a
44
+ reader behaves over the wire. `max_in_flight` tells you peak concurrency, which
45
+ wall-clock time alone cannot.
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ uv add snailmail # or: pip install snailmail
51
+ ```
52
+
53
+ ## Use it in a benchmark
54
+
55
+ snailmail serves a directory. Every file under the root is reachable at its path
56
+ relative to the root, which matches the shape of an object store or Icechunk virtual
57
+ dataset (one object per file). Point your reader at `server.base` and have it fetch
58
+ keys like `chunks/0.0.0`.
59
+
60
+ ```python
61
+ from snailmail import LatencyRangeServer, LogNormal
62
+
63
+ with LatencyRangeServer("my_zarr_store/", latency=LogNormal(mode_ms=40), bandwidth_mbs=100) as server:
64
+ server.reset_counts()
65
+ open_and_read(server.base) # your reader: obstore, icechunk, zarr, ...
66
+ print(server.stats())
67
+ # {'n_gets': 312, 'n_requests': 312, 'n_misses': 0, 'max_in_flight': 16,
68
+ # 'total_bytes': .., 'methods': {'GET': 312}, 'paths': {..}}
69
+ ```
70
+
71
+ `open_and_read` stands in for the reader you're benchmarking. It makes HTTP GETs
72
+ (with `Range` headers) against `server.base`; snailmail injects the latency, meters
73
+ the bytes through the bandwidth pipe, and streams the file from disk in response. A
74
+ direct request looks like this:
75
+
76
+ ```python
77
+ import urllib.request
78
+
79
+ with LatencyRangeServer("my_zarr_store/") as server:
80
+ req = urllib.request.Request(server.url("chunks/0.0.0"), headers={"Range": "bytes=0-1023"})
81
+ first_kib = urllib.request.urlopen(req).read()
82
+ ```
83
+
84
+ `server.url(key)` builds the URL for a key; `server.files()` lists the served keys.
85
+ `stats()` is a snapshot of request counters since the last `reset_counts()`:
86
+ `n_requests` counts every request, `n_gets` only the GETs, and `n_misses` the
87
+ requests for keys that don't exist (404, like an object store's NoSuchKey). Tune
88
+ between measurements with `set_latency(dist)`, `set_bandwidth_mbs(x)`, and
89
+ `reset_counts()`.
90
+
91
+ Latency is a pluggable distribution passed as `latency=`:
92
+
93
+ ```python
94
+ from snailmail import LogNormal, Normal, Exponential, Fixed
95
+
96
+ LogNormal(mode_ms=45, sigma=0.5) # unimodal hump with long right tail; fits object-store GET RTT
97
+ Normal(mean_ms=45, std_ms=10) # symmetric, truncated at 0
98
+ Exponential(mean_ms=45) # peak at 0; a poor model for GET RTT
99
+ Fixed(20) # deterministic
100
+ ```
101
+
102
+ `latency=None` (the default) injects no latency.
103
+
104
+ ## From the CLI
105
+
106
+ ```bash
107
+ snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5
108
+ snailmail ./store --dist normal --mean-ms 45 --std-ms 10
109
+ snailmail ./store --dist exponential --mean-ms 45
110
+ snailmail ./store --dist fixed --value-ms 20
111
+ snailmail ./store --bandwidth-mbs 100 --port 8080 --json # no latency; JSON address line
112
+ ```
113
+
114
+ The argument is the directory to serve.
115
+
116
+ `--json` prints a single machine-readable line and flushes it before serving,
117
+ so a script can spawn snailmail, read the bound address from stdout, and proceed.
118
+
119
+ The CLI rejects a flag that doesn't belong to the chosen `--dist`. Omit `--dist`
120
+ for no injected latency.
121
+
122
+ ## What it models
123
+
124
+ **Latency** is a per-request draw from the chosen distribution. `lognormal` is
125
+ the recommended default: parameterise it by the PDF mode (`--mode-ms`) and shape
126
+ (`--sigma`). `normal`, `exponential`, and `fixed` are available for comparison.
127
+
128
+ **Bandwidth** is a single shared FIFO pipe (`--bandwidth-mbs`, MB/s = 1e6 bytes/s).
129
+ Per-request round-trips run in parallel, but response bytes serialize through the
130
+ pipe, so aggregate egress is capped and over-read costs real transfer time. Omit
131
+ for unlimited bandwidth.
132
+
133
+ HTTP correctness (206, `Content-Range`, suffix ranges, 416, conditional requests)
134
+ and on-disk streaming come from aiohttp's `web.FileResponse`. Files are never
135
+ loaded into RAM, so multi-gigabyte files work.
136
+
137
+ Missing keys return 404 and are counted in `n_misses`, matching object-store
138
+ NoSuchKey behavior.
139
+
140
+ ## Notes
141
+
142
+ - Loopback only (binds `127.0.0.1`); nothing leaves the machine.
143
+ - Consumers must opt into plain HTTP: obstore `client_options={"allow_http": True}`,
144
+ icechunk `http_store({"allow_http": "true"})`.
145
+ - The injected latency is added to the real (sub-millisecond, local-SSD)
146
+ range-read time, so the modelled RTT is dominated by the configured value.
147
+ - For transport-accurate shaping on real packets, use `tc netem` (Linux) or
148
+ `dnctl`/`pfctl` (macOS) in front of any file server. snailmail trades that
149
+ for zero-setup, in-process instrumentation.
150
+
151
+ Contributing? See [AGENTS.md](AGENTS.md). MIT licensed.
@@ -0,0 +1,10 @@
1
+ snailmail/__init__.py,sha256=jScOk6l_ClRE70I9WLYv2g_-KMhq_Nsr2P-yhb6TWO4,812
2
+ snailmail/bandwidth.py,sha256=BjDpyCGSWIBtf-zovDNI9X5vyHWttugnHKnQ6QJ6X3M,1229
3
+ snailmail/cli.py,sha256=mH_EWFAiuONJSBNLoY2M569WSFFcgdm13-h6ZMbySRQ,5336
4
+ snailmail/latency.py,sha256=RJm-MZ7YJ9kyO_3_i0GdMkB6ngPS7om8NIfnalb3bVI,6138
5
+ snailmail/server.py,sha256=xfk4hWHbNyI0bxkI4-B9HKDagyQaKKGUx9w-_gSRoac,10794
6
+ snailmail-0.1.0.dist-info/METADATA,sha256=zsTHz1tS-F2g4QlCJKvIRgRQr7YCGZh4lwmiup84odQ,6465
7
+ snailmail-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ snailmail-0.1.0.dist-info/entry_points.txt,sha256=ZXLI1qwesFgqKqObLJxlTLoGtNCfhyZnAdjGfEUq4dc,49
9
+ snailmail-0.1.0.dist-info/licenses/LICENSE,sha256=AX-IUE0hUhrzJtj1-dyW2QB-e0k6a6kdSEK3swHO8bw,1071
10
+ snailmail-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ snailmail = snailmail.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ian Hunt-Isaak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.