PyPI - snailmail - Versions diffs - 0.1.0__py3-none-any.whl - Mend

snailmail 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

snailmail/__init__.py +26 -0
snailmail/bandwidth.py +35 -0
snailmail/cli.py +130 -0
snailmail/latency.py +183 -0
snailmail/server.py +250 -0
snailmail-0.1.0.dist-info/METADATA +151 -0
snailmail-0.1.0.dist-info/RECORD +10 -0
snailmail-0.1.0.dist-info/WHEEL +4 -0
snailmail-0.1.0.dist-info/entry_points.txt +2 -0
snailmail-0.1.0.dist-info/licenses/LICENSE +21 -0

snailmail/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""snailmail — a local HTTP file server with injectable latency and bandwidth limits.
+For benchmarking range / object-store / virtual-chunk reads under realistic network
+conditions. See :class:`LatencyRangeServer`.
+"""
+from importlib.metadata import PackageNotFoundError, version
+from snailmail.bandwidth import AsyncSharedPipe
+from snailmail.latency import Exponential, Fixed, LatencyDist, LogNormal, Normal
+from snailmail.server import LatencyRangeServer
+try:
+    __version__ = version("snailmail")  # derived from the git tag at build time (hatch-vcs)
+except PackageNotFoundError:  # running from a source tree with no install
+    __version__ = "0+unknown"
+__all__ = [
+    "LatencyRangeServer",
+    "AsyncSharedPipe",
+    "LatencyDist",
+    "LogNormal",
+    "Normal",
+    "Exponential",
+    "Fixed",
+]

snailmail/bandwidth.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Bandwidth limiting for responses."""
+from __future__ import annotations
+import asyncio
+class AsyncSharedPipe:
+    """A FIFO bandwidth limiter modelling ONE shared client downlink (async).
+    Every response's byte transfer is reserved through a single pipe of ``B``
+    bytes/s, so aggregate egress can't exceed ``B`` no matter how many requests
+    overlap, and over-read directly costs pipe time. Per-request latency stays
+    parallel (handled separately); only bytes serialize here. ``B is None`` disables.
+    """
+    def __init__(self, bytes_per_s: float | None):
+        self.B = bytes_per_s if bytes_per_s and bytes_per_s > 0 else None
+        self._lock = asyncio.Lock()
+        self._free = 0.0  # loop-clock timestamp the pipe is next free
+    async def transfer(self, nbytes: int) -> None:
+        if self.B is None or nbytes <= 0:
+            return
+        loop = asyncio.get_running_loop()
+        async with self._lock:
+            start = max(loop.time(), self._free)
+            self._free = start + nbytes / self.B
+            finish = self._free
+        delay = finish - loop.time()
+        if delay > 0:
+            await asyncio.sleep(delay)
+    def reset(self) -> None:
+        self._free = 0.0

snailmail/cli.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""The ``snailmail`` command-line entry point."""
+from __future__ import annotations
+import argparse
+import json
+import threading
+from importlib.metadata import version
+from snailmail.latency import Exponential, Fixed, LatencyDist, LogNormal, Normal
+from snailmail.server import LatencyRangeServer
+# CLI param -> which --dist owns it. A param set for the wrong dist is a user error,
+# not silently ignored, so the realized latency always matches what was asked for.
+_DIST_PARAMS = {
+    "mode_ms": "lognormal",
+    "sigma": "lognormal",
+    "mean_ms": ("normal", "exponential"),
+    "std_ms": "normal",
+    "value_ms": "fixed",
+}
+def _latency_from_args(args: argparse.Namespace, ap: argparse.ArgumentParser) -> LatencyDist | None:
+    if args.dist is None:
+        for name in _DIST_PARAMS:
+            if getattr(args, name) is not None:
+                ap.error(f"--{name.replace('_', '-')} requires --dist")
+        return None  # no injected latency
+    for name, owner in _DIST_PARAMS.items():
+        owners = (owner,) if isinstance(owner, str) else owner
+        if getattr(args, name) is not None and args.dist not in owners:
+            ap.error(f"--{name.replace('_', '-')} is not valid with --dist {args.dist}")
+    if args.dist == "lognormal":
+        if args.mode_ms is None:
+            ap.error("--dist lognormal requires --mode-ms")
+        opt = {} if args.sigma is None else {"sigma": args.sigma}  # else LogNormal's default
+        return LogNormal(args.mode_ms, seed=args.seed, **opt)
+    if args.dist == "normal":
+        if args.mean_ms is None:
+            ap.error("--dist normal requires --mean-ms")
+        opt = {} if args.std_ms is None else {"std_ms": args.std_ms}  # else Normal's default
+        return Normal(args.mean_ms, seed=args.seed, **opt)
+    if args.dist == "exponential":
+        if args.mean_ms is None:
+            ap.error("--dist exponential requires --mean-ms")
+        return Exponential(args.mean_ms, seed=args.seed)
+    if args.value_ms is None:  # fixed
+        ap.error("--dist fixed requires --value-ms")
+    return Fixed(args.value_ms)
+def _parser() -> argparse.ArgumentParser:
+    ap = argparse.ArgumentParser(
+        prog="snailmail",
+        description="Serve a directory over HTTP with injected latency "
+        "+ bandwidth limits, for benchmarking. Binds 127.0.0.1 and serves until interrupted.",
+        epilog="examples:\n"
+        "  snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5\n"
+        "  snailmail ./store --dist normal --mean-ms 45 --std-ms 10\n"
+        "  snailmail ./store --dist exponential --mean-ms 45\n"
+        "  snailmail ./store --dist fixed --value-ms 20\n"
+        "  snailmail ./store --bandwidth-mbs 100 --port 8080 --json   # no latency; JSON address line",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    ap.add_argument("--version", action="version", version=f"%(prog)s {version('snailmail')}")
+    ap.add_argument("root", help="directory to serve")
+    ap.add_argument(
+        "--dist",
+        choices=["lognormal", "normal", "exponential", "fixed"],
+        help="latency distribution; omit for no injected latency",
+    )
+    ap.add_argument("--mode-ms", type=float, help="[lognormal] PDF mode (peak), ms")
+    ap.add_argument("--sigma", type=float, help="[lognormal] log-scale shape (default 0.5)")
+    ap.add_argument("--mean-ms", type=float, help="[normal, exponential] mean latency, ms")
+    ap.add_argument("--std-ms", type=float, help="[normal] standard deviation, ms")
+    ap.add_argument("--value-ms", type=float, help="[fixed] deterministic latency, ms")
+    ap.add_argument(
+        "--seed", type=int, default=None, help="RNG seed for the latency pool (reproducible draws)"
+    )
+    ap.add_argument(
+        "--bandwidth-mbs", type=float, default=None, help="shared-pipe MB/s; omit = unlimited"
+    )
+    ap.add_argument("--port", type=int, default=0, help="TCP port (0 = ephemeral)")
+    ap.add_argument(
+        "--json",
+        action="store_true",
+        help="emit the server description as one line of JSON instead of the banner",
+    )
+    return ap
+def main() -> None:
+    ap = _parser()
+    args = ap.parse_args()
+    latency = _latency_from_args(args, ap)
+    try:
+        server = LatencyRangeServer(
+            args.root,
+            latency=latency,
+            bandwidth_mbs=args.bandwidth_mbs,
+            port=args.port,
+        ).start()
+    except NotADirectoryError as exc:
+        ap.error(str(exc))
+    # Flush so a consumer reading a pipe gets the bound address immediately, not after
+    # block-buffering — it can't predict an ephemeral port otherwise.
+    if args.json:
+        print(json.dumps(server.describe()), flush=True)
+    else:
+        print(f"serving {server.root}/ ({len(server.files())} files)")
+        print(f"base    : {server.base}")
+        print(f"server  : {server.describe()}")
+        print(f"realized: {server.realized_percentiles()}")
+        print("Ctrl-C to stop.", flush=True)
+    try:
+        threading.Event().wait()  # block until Ctrl-C
+    except KeyboardInterrupt:
+        pass
+    finally:
+        server.stop()
+        print(
+            f"\nstopped. served {server.n_gets} GETs, {server.total_bytes} bytes, "
+            f"peak concurrency {server.max_in_flight}."
+        )
+if __name__ == "__main__":
+    main()

snailmail/latency.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""Per-request latency distributions.
+Object-store GET RTT is well-modelled by a lognormal (a unimodal hump with a long
+right tail), so :class:`LogNormal` is the recommended default; :class:`Normal`,
+:class:`Exponential`, and :class:`Fixed` are there for comparison.
+Every distribution pre-generates a sample pool once with vectorised numpy and serves
+it round-robin, so :meth:`LatencyDist.draw_s` is O(1) with no per-request RNG and the
+realised distribution is exactly reproducible for a given seed.
+"""
+from __future__ import annotations
+import math
+import numpy as np
+class LatencyDist:
+    """Base class: a pre-generated sample pool served round-robin.
+    Subclasses set either ``_pool`` (a numpy array of *seconds*) via :meth:`_fill` or
+    ``_const_s`` (a scalar, for a degenerate zero-spread distribution), and implement
+    :meth:`describe`.
+    """
+    def __init__(self):
+        # A None pool means degenerate (zero-spread): draw a constant instead.
+        self._pool: np.ndarray | None = None
+        self._const_s = 0.0
+        self._i = 0
+    def _fill(self, samples_ms: np.ndarray) -> None:
+        # Latency can't be negative; truncate at 0 (matters for Normal's left tail).
+        self._pool = np.clip(np.asarray(samples_ms, dtype=float), 0.0, None) / 1e3
+    def _set_const_ms(self, ms: float) -> None:
+        self._const_s = max(0.0, ms) / 1e3
+    def draw_s(self) -> float:
+        """Next latency (seconds). O(1); single-loop-thread, so the index is safe."""
+        if self._pool is None:
+            return self._const_s
+        v = float(self._pool[self._i])
+        self._i = (self._i + 1) % self._pool.size
+        return v
+    def percentiles(self) -> dict:
+        """Realised p10/p50/p90/p99 (ms) of the pool actually served."""
+        if self._pool is None:
+            v = round(self._const_s * 1e3, 3)
+            return {"p10_ms": v, "p50_ms": v, "p90_ms": v, "p99_ms": v, "n": 1 if v else 0}
+        p = np.percentile(self._pool * 1e3, [10, 50, 90, 99])
+        return {
+            "p10_ms": round(float(p[0]), 3),
+            "p50_ms": round(float(p[1]), 3),
+            "p90_ms": round(float(p[2]), 3),
+            "p99_ms": round(float(p[3]), 3),
+            "n": int(self._pool.size),
+        }
+    def describe(self) -> dict:
+        raise NotImplementedError
+class Fixed(LatencyDist):
+    """Deterministic latency: every request sleeps exactly ``value_ms``."""
+    def __init__(self, value_ms: float = 0.0):
+        super().__init__()
+        self.value_ms = float(value_ms)
+        self._set_const_ms(self.value_ms)
+    def describe(self) -> dict:
+        return {"dist": "fixed", "value_ms": round(self.value_ms, 4)}
+class LogNormal(LatencyDist):
+    """Lognormal latency, parameterised by the PDF **mode** (peak) and shape ``sigma``::
+        mu       = ln(mode_ms) + sigma**2          # so the PDF mode == mode_ms
+        sleep_ms = LogNormal(mu, sigma)
+    Fits object-store GET RTT well — a unimodal hump with a long right tail. Derived:
+    ``median_ms = exp(mu)``; ``mean_ms = exp(mu + sigma**2/2)``. ``mode_ms <= 0`` is
+    the degenerate zero-latency reference.
+    """
+    def __init__(
+        self,
+        mode_ms: float,
+        *,
+        sigma: float = 0.5,
+        seed: int | None = None,
+        pool_size: int = 1 << 16,
+    ):
+        super().__init__()
+        self.mode_ms = float(mode_ms)
+        self.sigma = float(sigma)
+        if self.mode_ms > 0.0:
+            self.mu = math.log(self.mode_ms) + self.sigma**2
+            self.median_ms = math.exp(self.mu)
+            self.mean_ms = math.exp(self.mu + self.sigma**2 / 2.0)
+            rng = np.random.default_rng(seed)
+            self._fill(rng.lognormal(self.mu, self.sigma, size=pool_size))
+        else:
+            self.mu = float("nan")
+            self.median_ms = self.mean_ms = 0.0
+    def describe(self) -> dict:
+        d = {
+            "dist": "lognormal",
+            "mode_ms": round(self.mode_ms, 4),
+            "sigma": self.sigma,
+            "degenerate": self._pool is None,
+        }
+        if self._pool is not None:
+            d.update(
+                mu=round(self.mu, 6),
+                median_ms=round(self.median_ms, 4),
+                mean_ms=round(self.mean_ms, 4),
+                pool_size=int(self._pool.size),
+            )
+        return d
+class Normal(LatencyDist):
+    """Gaussian latency, ``mean_ms`` +/- ``std_ms``, truncated at 0 (no negative draws).
+    ``std_ms <= 0`` is the degenerate deterministic case (sleeps ``mean_ms``).
+    """
+    def __init__(
+        self,
+        mean_ms: float,
+        *,
+        std_ms: float = 0.0,
+        seed: int | None = None,
+        pool_size: int = 1 << 16,
+    ):
+        super().__init__()
+        self.mean_ms = float(mean_ms)
+        self.std_ms = float(std_ms)
+        if self.std_ms > 0.0:
+            rng = np.random.default_rng(seed)
+            self._fill(rng.normal(self.mean_ms, self.std_ms, size=pool_size))
+        else:
+            self._set_const_ms(self.mean_ms)
+    def describe(self) -> dict:
+        d = {
+            "dist": "normal",
+            "mean_ms": round(self.mean_ms, 4),
+            "std_ms": round(self.std_ms, 4),
+            "degenerate": self._pool is None,
+        }
+        if self._pool is not None:
+            d["pool_size"] = int(self._pool.size)
+        return d
+class Exponential(LatencyDist):
+    """Exponential latency with the given ``mean_ms`` (mode at 0; heavy single tail).
+    ``mean_ms <= 0`` is the degenerate zero-latency reference.
+    """
+    def __init__(self, mean_ms: float, *, seed: int | None = None, pool_size: int = 1 << 16):
+        super().__init__()
+        self.mean_ms = float(mean_ms)
+        if self.mean_ms > 0.0:
+            rng = np.random.default_rng(seed)
+            self._fill(rng.exponential(self.mean_ms, size=pool_size))
+    def describe(self) -> dict:
+        d = {
+            "dist": "exponential",
+            "mean_ms": round(self.mean_ms, 4),
+            "degenerate": self._pool is None,
+        }
+        if self._pool is not None:
+            d["pool_size"] = int(self._pool.size)
+        return d

snailmail/server.py ADDED Viewed

@@ -0,0 +1,250 @@
+"""A local HTTP server that serves a directory over Range with injected latency.
+HTTP/Range correctness (206, Content-Range, suffix ranges, 416, conditional
+requests), directory serving, and path-traversal safety are handled by **aiohttp's
+static handler** — snailmail reimplements none of it. It adds only the benchmark
+instrumentation:
+  * per-request latency from a pluggable distribution (:mod:`snailmail.latency`).
+  * a shared-pipe BANDWIDTH limiter (:class:`~snailmail.bandwidth.AsyncSharedPipe`)
+    modelling one finite client downlink: response bytes are metered through a single
+    pipe so aggregate egress is capped and over-read costs real time.
+  * server-side counters: GET/request counts, bytes read, requested paths and
+    methods, 404 misses, and PEAK concurrency (max requests in flight), so wall-clock
+    can be read honestly (not a serial ``n*rtt`` assumption).
+Serves every file under a root directory by relative path — one object per file, the
+shape of an object-store / Icechunk virtual dataset. Files stream from disk and are
+never loaded into RAM, so arbitrarily large files work. The injected latency is
+*added* to the real (sub-ms, local-SSD) range-read time, so the modelled RTT stays
+dominated by the knob.
+Consumers must opt into plain HTTP: obstore ``client_options={"allow_http": True}``,
+icechunk ``http_store({"allow_http": "true"})``.
+LOCAL loopback only (binds 127.0.0.1). Use in-process via :class:`LatencyRangeServer`
+(exposes counters + live :meth:`~LatencyRangeServer.set_latency` /
+:meth:`~LatencyRangeServer.set_bandwidth_mbs`) or as the ``snailmail`` CLI.
+"""
+from __future__ import annotations
+import asyncio
+import threading
+from collections import Counter
+from pathlib import Path
+from aiohttp import web
+from snailmail.bandwidth import AsyncSharedPipe
+from snailmail.latency import Fixed, LatencyDist
+class LatencyRangeServer:
+    """Threaded localhost HTTP server: aiohttp Range serving of a directory + latency.
+    Parameters
+    ----------
+    root:        directory to serve. Every file beneath it is reachable at its path
+                 relative to the root (range- and traversal-safe). To benchmark a
+                 single file, put it in a directory and serve that.
+    latency:     per-request latency distribution (a :class:`~snailmail.latency.LatencyDist`,
+                 e.g. ``LogNormal(mode_ms=45)``); ``None`` injects no latency. Mutable
+                 via :meth:`set_latency`.
+    bandwidth_mbs:  shared-pipe bandwidth, MB/s (1 MB = 1e6 bytes); None = unlimited.
+    port:        TCP port to bind (0 = ephemeral; set a fixed port when a consumer
+                 mishandles ephemeral ports).
+    """
+    def __init__(
+        self,
+        root,
+        *,
+        latency: LatencyDist | None = None,
+        bandwidth_mbs: float | None = None,
+        port: int = 0,
+    ):
+        self.root = Path(root)
+        if not self.root.is_dir():
+            raise NotADirectoryError(f"root must be a directory: {self.root}")
+        self._root_resolved = self.root.resolve()
+        self.latency = latency if latency is not None else Fixed(0.0)
+        self.set_bandwidth_mbs(bandwidth_mbs)
+        self._req_port = port
+        self.port: int | None = None
+        self.total_bytes = 0
+        self.n_requests = self.n_gets = self.n_misses = 0
+        self.methods: Counter[str] = Counter()
+        self.paths: Counter[str] = Counter()
+        self._size_cache: dict[str, int] = {}  # loop-thread only, so no lock needed
+        self._in_flight = self.max_in_flight = 0
+        self._lock = threading.Lock()
+        self._ready = threading.Event()
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._runner: web.AppRunner | None = None
+        self._startup_exc: BaseException | None = None
+    # -- accounting (sizes/ranges parsed only for counters/bandwidth; serving is aiohttp's) --
+    def _target_size(self, path: str) -> int | None:
+        """Size of the file a request path maps to, or None if it resolves to no file.
+        Done here rather than delegated to aiohttp on purpose: its static handler
+        computes the same thing but never exposes it before serving, and FileResponse
+        only decides a 404 at send time. We need the answer up front — the size to
+        meter bandwidth before bytes flow, and the miss to count ``n_misses`` — so we
+        do our own safe lookup with the stdlib (no aiohttp internals copied). The
+        ``is_relative_to`` guard stops a ``..`` escape from being stat'd and
+        mis-counted as a hit when aiohttp would serve it as a 404. Hits are cached by
+        request path so repeats skip the filesystem.
+        """
+        if path in self._size_cache:
+            return self._size_cache[path]
+        try:
+            target = (self.root / path.lstrip("/")).resolve()
+            if not target.is_relative_to(self._root_resolved) or not target.is_file():
+                return None  # traversal escape or missing file => a miss
+            size = target.stat().st_size
+        except OSError:
+            return None
+        self._size_cache[path] = size
+        return size
+    def _range_bytes(self, request: web.Request, size: int) -> int:
+        """Bytes a GET will read against a known file size (pure; no side effects).
+        Uses aiohttp's own ``request.http_range`` parser so this count matches what the
+        static handler actually serves; a malformed Range raises ValueError there and
+        aiohttp answers 416 (no body), so we count 0.
+        """
+        try:
+            start, stop, _ = request.http_range.indices(size)
+        except ValueError:
+            return 0
+        return max(0, stop - start)
+    def _middleware(self):
+        @web.middleware
+        async def mw(request: web.Request, handler):
+            # FileResponse defers its 404 to send time, so detect misses ourselves up
+            # front (it's also the size we need for byte accounting).
+            is_read = request.method in ("GET", "HEAD")
+            size = self._target_size(request.path) if is_read else None
+            with self._lock:
+                self.n_requests += 1
+                self.methods[request.method] += 1
+                self.paths[request.path] += 1
+                if request.method == "GET":
+                    self.n_gets += 1
+                if is_read and size is None:  # a miss still cost a round trip — count it
+                    self.n_misses += 1
+                self._in_flight += 1
+                self.max_in_flight = max(self.max_in_flight, self._in_flight)
+            nbytes = 0
+            try:
+                await asyncio.sleep(self.latency.draw_s())  # the injected RTT
+                if request.method == "GET" and size is not None:
+                    nbytes = self._range_bytes(request, size)
+                    await self.pipe.transfer(nbytes)  # shared-pipe bandwidth
+                return await handler(request)
+            finally:
+                with self._lock:
+                    self._in_flight -= 1
+                    self.total_bytes += nbytes
+        return mw
+    async def _start(self):
+        app = web.Application(middlewares=[self._middleware()])
+        # follow_symlinks=False (the default) keeps serving inside the root, matching
+        # the traversal check in _target_size.
+        app.router.add_static("/", self.root, follow_symlinks=False)
+        self._runner = web.AppRunner(app)
+        await self._runner.setup()
+        site = web.TCPSite(self._runner, "127.0.0.1", self._req_port)
+        await site.start()
+        self.port = site._server.sockets[0].getsockname()[1]  # aiohttp has no public bound-port API
+    def _serve(self):
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        try:
+            self._loop.run_until_complete(self._start())
+        except BaseException as exc:  # surface startup failures instead of hanging start()
+            self._startup_exc = exc
+            self._ready.set()
+            return
+        self._ready.set()
+        self._loop.run_forever()
+    def start(self) -> "LatencyRangeServer":
+        threading.Thread(target=self._serve, daemon=True).start()
+        self._ready.wait()
+        if self._startup_exc is not None:
+            raise self._startup_exc
+        return self
+    def stop(self) -> None:
+        if self._loop:
+            self._loop.call_soon_threadsafe(self._loop.stop)
+    @property
+    def base(self) -> str:
+        """Root URL to point a reader (or object store) at; append a key to it."""
+        return f"http://127.0.0.1:{self.port}/"
+    def url(self, key: str) -> str:
+        """URL for a single served key, e.g. ``server.url("chunks/0.0.0")``."""
+        return f"{self.base}{key.lstrip('/')}"
+    def files(self) -> list[str]:
+        """The served keys: relative paths of every file under the root (sorted)."""
+        return sorted(str(p.relative_to(self.root)) for p in self.root.rglob("*") if p.is_file())
+    def set_latency(self, latency: LatencyDist) -> None:
+        self.latency = latency
+    def set_bandwidth_mbs(self, bandwidth_mbs: float | None) -> None:
+        # AsyncSharedPipe owns the "<= 0 means unlimited" rule; read it back for display.
+        self.pipe = AsyncSharedPipe(bandwidth_mbs * 1e6 if bandwidth_mbs else None)
+        self.bandwidth_mbs = self.pipe.B / 1e6 if self.pipe.B else None
+    def reset_counts(self) -> None:
+        with self._lock:
+            self.total_bytes = 0
+            self.n_requests = self.n_gets = self.n_misses = 0
+            self.methods = Counter()
+            self.paths = Counter()
+            self.max_in_flight = self._in_flight  # keep currently-active requests in the new window
+        self.pipe.reset()
+    def stats(self) -> dict:
+        """Atomic snapshot of the request counters (persists until :meth:`reset_counts`)."""
+        with self._lock:
+            return {
+                "n_gets": self.n_gets,
+                "n_requests": self.n_requests,
+                "n_misses": self.n_misses,
+                "max_in_flight": self.max_in_flight,
+                "total_bytes": self.total_bytes,
+                "methods": dict(self.methods),
+                "paths": dict(self.paths),
+            }
+    def describe(self) -> dict:
+        return {
+            "root": str(self.root),
+            "base": self.base,
+            "n_files": len(self.files()),
+            "port": self.port,
+            "latency": self.latency.describe(),
+            "bandwidth_mbs": self.bandwidth_mbs,
+        }
+    def realized_percentiles(self) -> dict:
+        return self.latency.percentiles()
+    def __enter__(self) -> "LatencyRangeServer":
+        return self.start()
+    def __exit__(self, *exc) -> None:
+        self.stop()

snailmail-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,151 @@
+Metadata-Version: 2.4
+Name: snailmail
+Version: 0.1.0
+Summary: A local HTTP server that serves a directory over Range with injectable latency and bandwidth limits, for benchmarking range / object-store / virtual-chunk reads under realistic network conditions.
+Project-URL: Homepage, https://github.com/ianhi/snailmail
+Project-URL: Repository, https://github.com/ianhi/snailmail
+Project-URL: Issues, https://github.com/ianhi/snailmail/issues
+Project-URL: Changelog, https://github.com/ianhi/snailmail/blob/main/CHANGELOG.md
+Author-email: Ian Hunt-Isaak <ian@earthmover.io>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: bandwidth,benchmark,http,icechunk,latency,object-store,range-requests,zarr
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
+Classifier: Topic :: System :: Benchmark
+Requires-Python: >=3.10
+Requires-Dist: aiohttp>=3.9
+Requires-Dist: numpy>=1.24
+Description-Content-Type: text/markdown
+# snailmail
+A local HTTP server that serves a directory over HTTP Range, injecting per-request
+latency and a bandwidth cap, and counts GETs and peak concurrency.
+Use it to benchmark range-based readers — object stores, Zarr/Icechunk virtual
+chunks, tiled image formats — under realistic network conditions, on your laptop,
+with no cloud and no root.
+## Why you'd want it
+Local disk hides the cost that dominates remote reads: network round-trips.
+A read pattern that finishes instantly against a warm page cache can take
+minutes of serial round-trips against object storage. snailmail adds a
+per-request latency draw and a shared bandwidth pipe so you can measure how a
+reader behaves over the wire. `max_in_flight` tells you peak concurrency, which
+wall-clock time alone cannot.
+## Install
+```bash
+uv add snailmail        # or: pip install snailmail
+```
+## Use it in a benchmark
+snailmail serves a directory. Every file under the root is reachable at its path
+relative to the root, which matches the shape of an object store or Icechunk virtual
+dataset (one object per file). Point your reader at `server.base` and have it fetch
+keys like `chunks/0.0.0`.
+```python
+from snailmail import LatencyRangeServer, LogNormal
+with LatencyRangeServer("my_zarr_store/", latency=LogNormal(mode_ms=40), bandwidth_mbs=100) as server:
+    server.reset_counts()
+    open_and_read(server.base)         # your reader: obstore, icechunk, zarr, ...
+    print(server.stats())
+    # {'n_gets': 312, 'n_requests': 312, 'n_misses': 0, 'max_in_flight': 16,
+    #  'total_bytes': .., 'methods': {'GET': 312}, 'paths': {..}}
+```
+`open_and_read` stands in for the reader you're benchmarking. It makes HTTP GETs
+(with `Range` headers) against `server.base`; snailmail injects the latency, meters
+the bytes through the bandwidth pipe, and streams the file from disk in response. A
+direct request looks like this:
+```python
+import urllib.request
+with LatencyRangeServer("my_zarr_store/") as server:
+    req = urllib.request.Request(server.url("chunks/0.0.0"), headers={"Range": "bytes=0-1023"})
+    first_kib = urllib.request.urlopen(req).read()
+```
+`server.url(key)` builds the URL for a key; `server.files()` lists the served keys.
+`stats()` is a snapshot of request counters since the last `reset_counts()`:
+`n_requests` counts every request, `n_gets` only the GETs, and `n_misses` the
+requests for keys that don't exist (404, like an object store's NoSuchKey). Tune
+between measurements with `set_latency(dist)`, `set_bandwidth_mbs(x)`, and
+`reset_counts()`.
+Latency is a pluggable distribution passed as `latency=`:
+```python
+from snailmail import LogNormal, Normal, Exponential, Fixed
+LogNormal(mode_ms=45, sigma=0.5)   # unimodal hump with long right tail; fits object-store GET RTT
+Normal(mean_ms=45, std_ms=10)      # symmetric, truncated at 0
+Exponential(mean_ms=45)            # peak at 0; a poor model for GET RTT
+Fixed(20)                          # deterministic
+```
+`latency=None` (the default) injects no latency.
+## From the CLI
+```bash
+snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5
+snailmail ./store --dist normal --mean-ms 45 --std-ms 10
+snailmail ./store --dist exponential --mean-ms 45
+snailmail ./store --dist fixed --value-ms 20
+snailmail ./store --bandwidth-mbs 100 --port 8080 --json   # no latency; JSON address line
+```
+The argument is the directory to serve.
+`--json` prints a single machine-readable line and flushes it before serving,
+so a script can spawn snailmail, read the bound address from stdout, and proceed.
+The CLI rejects a flag that doesn't belong to the chosen `--dist`. Omit `--dist`
+for no injected latency.
+## What it models
+**Latency** is a per-request draw from the chosen distribution. `lognormal` is
+the recommended default: parameterise it by the PDF mode (`--mode-ms`) and shape
+(`--sigma`). `normal`, `exponential`, and `fixed` are available for comparison.
+**Bandwidth** is a single shared FIFO pipe (`--bandwidth-mbs`, MB/s = 1e6 bytes/s).
+Per-request round-trips run in parallel, but response bytes serialize through the
+pipe, so aggregate egress is capped and over-read costs real transfer time. Omit
+for unlimited bandwidth.
+HTTP correctness (206, `Content-Range`, suffix ranges, 416, conditional requests)
+and on-disk streaming come from aiohttp's `web.FileResponse`. Files are never
+loaded into RAM, so multi-gigabyte files work.
+Missing keys return 404 and are counted in `n_misses`, matching object-store
+NoSuchKey behavior.
+## Notes
+- Loopback only (binds `127.0.0.1`); nothing leaves the machine.
+- Consumers must opt into plain HTTP: obstore `client_options={"allow_http": True}`,
+  icechunk `http_store({"allow_http": "true"})`.
+- The injected latency is added to the real (sub-millisecond, local-SSD)
+  range-read time, so the modelled RTT is dominated by the configured value.
+- For transport-accurate shaping on real packets, use `tc netem` (Linux) or
+  `dnctl`/`pfctl` (macOS) in front of any file server. snailmail trades that
+  for zero-setup, in-process instrumentation.
+Contributing? See [AGENTS.md](AGENTS.md). MIT licensed.

snailmail-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+snailmail/__init__.py,sha256=jScOk6l_ClRE70I9WLYv2g_-KMhq_Nsr2P-yhb6TWO4,812
+snailmail/bandwidth.py,sha256=BjDpyCGSWIBtf-zovDNI9X5vyHWttugnHKnQ6QJ6X3M,1229
+snailmail/cli.py,sha256=mH_EWFAiuONJSBNLoY2M569WSFFcgdm13-h6ZMbySRQ,5336
+snailmail/latency.py,sha256=RJm-MZ7YJ9kyO_3_i0GdMkB6ngPS7om8NIfnalb3bVI,6138
+snailmail/server.py,sha256=xfk4hWHbNyI0bxkI4-B9HKDagyQaKKGUx9w-_gSRoac,10794
+snailmail-0.1.0.dist-info/METADATA,sha256=zsTHz1tS-F2g4QlCJKvIRgRQr7YCGZh4lwmiup84odQ,6465
+snailmail-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+snailmail-0.1.0.dist-info/entry_points.txt,sha256=ZXLI1qwesFgqKqObLJxlTLoGtNCfhyZnAdjGfEUq4dc,49
+snailmail-0.1.0.dist-info/licenses/LICENSE,sha256=AX-IUE0hUhrzJtj1-dyW2QB-e0k6a6kdSEK3swHO8bw,1071
+snailmail-0.1.0.dist-info/RECORD,,

snailmail-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

snailmail-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ snailmail = snailmail.cli:main

snailmail-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Ian Hunt-Isaak
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.