snailmail 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ dist/
9
+ wheels/
10
+ *.egg-info/
11
+ *.egg
12
+ .eggs/
13
+
14
+ # Virtual environments / uv
15
+ .venv/
16
+ venv/
17
+ env/
18
+ .uv/
19
+
20
+ # Packaging / build backends
21
+ *.whl
22
+ *.tar.gz
23
+
24
+ # Testing / coverage / type / lint caches
25
+ .pytest_cache/
26
+ .ruff_cache/
27
+ .mypy_cache/
28
+ .pyright/
29
+ .coverage
30
+ .coverage.*
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .hypothesis/
35
+
36
+ # Tooling caches
37
+ .cache/
38
+
39
+ # Editors / OS
40
+ .vscode/
41
+ .idea/
42
+ *.swp
43
+ *~
44
+ .DS_Store
45
+
46
+ # Jupyter
47
+ .ipynb_checkpoints/
48
+ .jupyter/
49
+ *.ipynb_ystore.db
50
+
51
+ # Local scratch data the server might be pointed at (don't commit big test files)
52
+ /data/
53
+ *.h5ad
54
+ *.tiff
55
+ *.zarr/
@@ -0,0 +1,122 @@
1
+ # AGENTS.md
2
+
3
+ Onboarding for an agent or contributor picking up snailmail. Read the
4
+ [README](README.md) first for what it is and why; this file is the "how it works,
5
+ how to work on it" layer.
6
+
7
+ ## Goal
8
+
9
+ A zero-setup, in-process harness to benchmark range-based readers under realistic
10
+ network conditions, and to answer concurrency questions honestly. The product is the
11
+ measurement: GET count, bytes, and **peak concurrency** (`max_in_flight`) — that
12
+ last one is the whole point. Wall-clock can't distinguish "fast because cached" from
13
+ "fast because concurrent"; `max_in_flight` can.
14
+
15
+ ## Layout
16
+
17
+ ```
18
+ src/snailmail/
19
+ __init__.py # public exports
20
+ latency.py # LatencyDist + LogNormal / Normal / Exponential / Fixed
21
+ bandwidth.py # AsyncSharedPipe
22
+ server.py # LatencyRangeServer (the threaded aiohttp wrapper)
23
+ cli.py # the `snailmail` CLI (main, --dist arg wiring)
24
+ tests/
25
+ test_server.py # range correctness, latency, bandwidth, concurrency, counters
26
+ test_directory.py # directory serving, misses, traversal, stats, set_latency, --version
27
+ test_latency.py # distributions + CLI --dist wiring
28
+ ```
29
+
30
+ One file per concern; keep each small and single-purpose. The split is to stay
31
+ easily editable, not an invitation to grow a framework — the whole thing should stay
32
+ readable in a sitting.
33
+
34
+ ## Develop
35
+
36
+ ```bash
37
+ uv sync # aiohttp, numpy + dev: pytest, ruff, mypy
38
+ uv run pytest # all green
39
+ uv run ruff check src tests
40
+ uv run mypy # type gate (config in pyproject)
41
+ ```
42
+
43
+ Pre-commit hooks (ruff lint + ruff format + mypy + file hygiene) run via
44
+ [prek](https://github.com/j178/prek): `prek install` once, then they fire on commit;
45
+ `prek run --all-files` to run them by hand.
46
+
47
+ ## Conventions
48
+
49
+ - **Commits:** do not co-sign — no `Co-Authored-By` / tool trailers.
50
+ - **Comments:** tight and useful — explain *why*, not *what*. No session- or
51
+ conversation-specific notes ("as we discussed", change logs, dates); a comment
52
+ must make sense to someone reading the file cold a year from now.
53
+
54
+ ## Design decisions (read before changing things)
55
+
56
+ - **aiohttp `web.FileResponse` owns all HTTP correctness** — 206, `Content-Range`,
57
+ suffix ranges, 416, conditional requests — and streams from disk. Do **not**
58
+ reimplement range handling; that was the whole reason to rewrite off the original
59
+ hand-rolled `BaseHTTPRequestHandler`. The file is never read into RAM, so multi-GB
60
+ files work. Our consumers issue single-range GETs only, so multi-range responses
61
+ are out of scope.
62
+
63
+ - **Serves a directory, always.** The root is served with aiohttp's `add_static`
64
+ (range-correct *and* traversal-safe — don't hand-roll path joining). One object per
65
+ file is the shape that matters for the Icechunk/object-store use case; to benchmark
66
+ a single file, point at the directory containing it. There is deliberately no
67
+ single-file mode — it added a `url`-vs-`base` duality and a custom handler for no
68
+ real benefit. `base` is the root; `url(key)` builds a key URL. `FileResponse` defers
69
+ its 404 to send time, so **misses are detected up front** via `_target_size()`
70
+ (which also yields the size for byte accounting), not by inspecting the response
71
+ status — a miss is a read whose path resolves to no file under the root, counted in
72
+ `n_misses`.
73
+ - **Latency = a pluggable `LatencyDist`** (`latency.py`): `LogNormal`, `Normal`,
74
+ `Exponential`, `Fixed`. **Lognormal is the recommended default and the one to reach
75
+ for** — object-store GET RTT is a unimodal hump with a long right tail, which it
76
+ fits; it's parameterised by the PDF **mode** (`mode_ms`) and shape `sigma`. The
77
+ others exist for comparison, not because they model object stores well — notably
78
+ `Exponential`'s peak sits at the floor, which is *wrong* for GET RTT; offer it, but
79
+ don't recommend it. Every dist **pre-generates its pool once with numpy and serves
80
+ it round-robin** — O(1) in the hot path, no per-request RNG, exactly reproducible
81
+ per seed. The pool index is unsynchronised on purpose: all requests run on one
82
+ event-loop thread, so it's safe. If you ever move to multiple loops/threads, that
83
+ assumption breaks. Negative draws (Normal's left tail) are truncated at 0.
84
+ - **Bandwidth = one shared FIFO pipe** (`AsyncSharedPipe`): per-request RTTs stay
85
+ parallel; response *bytes* serialize through the pipe, so egress is capped and
86
+ over-read costs real time.
87
+ - **Async, in a background thread.** One event loop means many requests' latency
88
+ sleeps overlap with no thread-pool ceiling — exactly what makes the
89
+ peak-concurrency measurement clean. `start()` spawns the loop thread; `stop()`
90
+ stops it. Don't reintroduce thread-per-request.
91
+ - **Counters under a lock.** `stats()` is a post-hoc, atomic snapshot (counts, total
92
+ bytes, 404 misses, peak `max_in_flight`, and per-method / per-path breakdowns) that
93
+ persists until `reset_counts()`. For accounting only, `_range_bytes` reuses aiohttp's
94
+ own `request.http_range` parser (not a hand-rolled one) so the counted bytes match
95
+ what the static handler serves; serving correctness still comes entirely from
96
+ aiohttp. See `_target_size`'s docstring for why size/miss are resolved up front
97
+ rather than read back from aiohttp.
98
+
99
+ - **Compose aiohttp, don't subclass it.** aiohttp has no server base class meant for
100
+ extension (its docs steer you to middlewares/signals over subclassing
101
+ `web.Application`). `LatencyRangeServer` is a threaded lifecycle + counters facade
102
+ around `web.Application` + `AppRunner`/`TCPSite`; keep it that way. The one private
103
+ touch is reading the bound ephemeral port off `site._server.sockets` — aiohttp
104
+ exposes no public API for it.
105
+ - **Injected latency is added on top** of the real (sub-ms, local-SSD) range read, so
106
+ the modelled RTT stays dominated by the knob. Revisit for spinning disks or very
107
+ large single ranges.
108
+
109
+ ## Non-goals
110
+
111
+ - **Transport-accurate shaping.** snailmail models latency and bandwidth at the
112
+ application layer (a `sleep()` plus a byte pipe), not on real packets. For
113
+ kernel-level RTT/bandwidth use `tc netem` (Linux) or `dnctl`/`pfctl` (macOS) in
114
+ front of any file server. Don't grow snailmail toward packet shaping.
115
+ - **A general-purpose web server.** It serves a directory on loopback for benchmarks.
116
+
117
+ ## Working notes
118
+
119
+ Current status, open tasks, and origin/context live in
120
+ [docs/NOTES.md](docs/NOTES.md) — the mutable worklog agents update. Keep *this* file
121
+ durable (purpose, design, conventions, non-goals); put anything time-specific in the
122
+ worklog.
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to
5
+ [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [0.1.0] - 2026-06-18
8
+
9
+ Initial public release.
10
+
11
+ ### Added
12
+ - `LatencyRangeServer`: a loopback HTTP server that serves a **directory tree** over
13
+ HTTP Range with injectable latency and bandwidth limits, for benchmarking range /
14
+ object-store / virtual-chunk reads. One object per file (the shape of an Icechunk
15
+ virtual dataset), range- and traversal-safe; `base` is the root, `url(key)` builds a
16
+ key URL, and `files()` lists the served keys.
17
+ - Pluggable per-request latency distributions: `LogNormal` (the recommended default),
18
+ `Normal`, `Exponential`, and `Fixed`, each with explicit, distribution-specific
19
+ parameters. Draws are pre-generated and served round-robin (O(1), reproducible per
20
+ seed).
21
+ - Shared FIFO bandwidth pipe (`AsyncSharedPipe`) so response bytes serialize through a
22
+ capped egress while round-trips stay parallel.
23
+ - Request accounting via `stats()`: GET/request counts, 404 misses (`n_misses`), total
24
+ bytes, peak concurrency (`max_in_flight`), and per-method / per-path breakdowns;
25
+ persists until `reset_counts()`.
26
+ - `snailmail` CLI with a `--dist` selector and explicit per-distribution flags, a
27
+ `--json` machine-readable address line (flushed before serving), and `--version`.
28
+
29
+ [0.1.0]: https://github.com/ianhi/snailmail/releases/tag/v0.1.0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ian Hunt-Isaak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.4
2
+ Name: snailmail
3
+ Version: 0.1.0
4
+ Summary: A local HTTP server that serves a directory over Range with injectable latency and bandwidth limits, for benchmarking range / object-store / virtual-chunk reads under realistic network conditions.
5
+ Project-URL: Homepage, https://github.com/ianhi/snailmail
6
+ Project-URL: Repository, https://github.com/ianhi/snailmail
7
+ Project-URL: Issues, https://github.com/ianhi/snailmail/issues
8
+ Project-URL: Changelog, https://github.com/ianhi/snailmail/blob/main/CHANGELOG.md
9
+ Author-email: Ian Hunt-Isaak <ian@earthmover.io>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: bandwidth,benchmark,http,icechunk,latency,object-store,range-requests,zarr
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
23
+ Classifier: Topic :: System :: Benchmark
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: aiohttp>=3.9
26
+ Requires-Dist: numpy>=1.24
27
+ Description-Content-Type: text/markdown
28
+
29
+ # snailmail
30
+
31
+ A local HTTP server that serves a directory over HTTP Range, injecting per-request
32
+ latency and a bandwidth cap, and counts GETs and peak concurrency.
33
+
34
+ Use it to benchmark range-based readers — object stores, Zarr/Icechunk virtual
35
+ chunks, tiled image formats — under realistic network conditions, on your laptop,
36
+ with no cloud and no root.
37
+
38
+ ## Why you'd want it
39
+
40
+ Local disk hides the cost that dominates remote reads: network round-trips.
41
+ A read pattern that finishes instantly against a warm page cache can take
42
+ minutes of serial round-trips against object storage. snailmail adds a
43
+ per-request latency draw and a shared bandwidth pipe so you can measure how a
44
+ reader behaves over the wire. `max_in_flight` tells you peak concurrency, which
45
+ wall-clock time alone cannot.
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ uv add snailmail # or: pip install snailmail
51
+ ```
52
+
53
+ ## Use it in a benchmark
54
+
55
+ snailmail serves a directory. Every file under the root is reachable at its path
56
+ relative to the root, which matches the shape of an object store or Icechunk virtual
57
+ dataset (one object per file). Point your reader at `server.base` and have it fetch
58
+ keys like `chunks/0.0.0`.
59
+
60
+ ```python
61
+ from snailmail import LatencyRangeServer, LogNormal
62
+
63
+ with LatencyRangeServer("my_zarr_store/", latency=LogNormal(mode_ms=40), bandwidth_mbs=100) as server:
64
+ server.reset_counts()
65
+ open_and_read(server.base) # your reader: obstore, icechunk, zarr, ...
66
+ print(server.stats())
67
+ # {'n_gets': 312, 'n_requests': 312, 'n_misses': 0, 'max_in_flight': 16,
68
+ # 'total_bytes': .., 'methods': {'GET': 312}, 'paths': {..}}
69
+ ```
70
+
71
+ `open_and_read` stands in for the reader you're benchmarking. It makes HTTP GETs
72
+ (with `Range` headers) against `server.base`; snailmail injects the latency, meters
73
+ the bytes through the bandwidth pipe, and streams the file from disk in response. A
74
+ direct request looks like this:
75
+
76
+ ```python
77
+ import urllib.request
78
+
79
+ with LatencyRangeServer("my_zarr_store/") as server:
80
+ req = urllib.request.Request(server.url("chunks/0.0.0"), headers={"Range": "bytes=0-1023"})
81
+ first_kib = urllib.request.urlopen(req).read()
82
+ ```
83
+
84
+ `server.url(key)` builds the URL for a key; `server.files()` lists the served keys.
85
+ `stats()` is a snapshot of request counters since the last `reset_counts()`:
86
+ `n_requests` counts every request, `n_gets` only the GETs, and `n_misses` the
87
+ requests for keys that don't exist (404, like an object store's NoSuchKey). Tune
88
+ between measurements with `set_latency(dist)`, `set_bandwidth_mbs(x)`, and
89
+ `reset_counts()`.
90
+
91
+ Latency is a pluggable distribution passed as `latency=`:
92
+
93
+ ```python
94
+ from snailmail import LogNormal, Normal, Exponential, Fixed
95
+
96
+ LogNormal(mode_ms=45, sigma=0.5) # unimodal hump with long right tail; fits object-store GET RTT
97
+ Normal(mean_ms=45, std_ms=10) # symmetric, truncated at 0
98
+ Exponential(mean_ms=45) # peak at 0; a poor model for GET RTT
99
+ Fixed(20) # deterministic
100
+ ```
101
+
102
+ `latency=None` (the default) injects no latency.
103
+
104
+ ## From the CLI
105
+
106
+ ```bash
107
+ snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5
108
+ snailmail ./store --dist normal --mean-ms 45 --std-ms 10
109
+ snailmail ./store --dist exponential --mean-ms 45
110
+ snailmail ./store --dist fixed --value-ms 20
111
+ snailmail ./store --bandwidth-mbs 100 --port 8080 --json # no latency; JSON address line
112
+ ```
113
+
114
+ The argument is the directory to serve.
115
+
116
+ `--json` prints a single machine-readable line and flushes it before serving,
117
+ so a script can spawn snailmail, read the bound address from stdout, and proceed.
118
+
119
+ The CLI rejects a flag that doesn't belong to the chosen `--dist`. Omit `--dist`
120
+ for no injected latency.
121
+
122
+ ## What it models
123
+
124
+ **Latency** is a per-request draw from the chosen distribution. `lognormal` is
125
+ the recommended default: parameterise it by the PDF mode (`--mode-ms`) and shape
126
+ (`--sigma`). `normal`, `exponential`, and `fixed` are available for comparison.
127
+
128
+ **Bandwidth** is a single shared FIFO pipe (`--bandwidth-mbs`, MB/s = 1e6 bytes/s).
129
+ Per-request round-trips run in parallel, but response bytes serialize through the
130
+ pipe, so aggregate egress is capped and over-read costs real transfer time. Omit
131
+ for unlimited bandwidth.
132
+
133
+ HTTP correctness (206, `Content-Range`, suffix ranges, 416, conditional requests)
134
+ and on-disk streaming come from aiohttp's `web.FileResponse`. Files are never
135
+ loaded into RAM, so multi-gigabyte files work.
136
+
137
+ Missing keys return 404 and are counted in `n_misses`, matching object-store
138
+ NoSuchKey behavior.
139
+
140
+ ## Notes
141
+
142
+ - Loopback only (binds `127.0.0.1`); nothing leaves the machine.
143
+ - Consumers must opt into plain HTTP: obstore `client_options={"allow_http": True}`,
144
+ icechunk `http_store({"allow_http": "true"})`.
145
+ - The injected latency is added to the real (sub-millisecond, local-SSD)
146
+ range-read time, so the modelled RTT is dominated by the configured value.
147
+ - For transport-accurate shaping on real packets, use `tc netem` (Linux) or
148
+ `dnctl`/`pfctl` (macOS) in front of any file server. snailmail trades that
149
+ for zero-setup, in-process instrumentation.
150
+
151
+ Contributing? See [AGENTS.md](AGENTS.md). MIT licensed.
@@ -0,0 +1,123 @@
1
+ # snailmail
2
+
3
+ A local HTTP server that serves a directory over HTTP Range, injecting per-request
4
+ latency and a bandwidth cap, and counts GETs and peak concurrency.
5
+
6
+ Use it to benchmark range-based readers — object stores, Zarr/Icechunk virtual
7
+ chunks, tiled image formats — under realistic network conditions, on your laptop,
8
+ with no cloud and no root.
9
+
10
+ ## Why you'd want it
11
+
12
+ Local disk hides the cost that dominates remote reads: network round-trips.
13
+ A read pattern that finishes instantly against a warm page cache can take
14
+ minutes of serial round-trips against object storage. snailmail adds a
15
+ per-request latency draw and a shared bandwidth pipe so you can measure how a
16
+ reader behaves over the wire. `max_in_flight` tells you peak concurrency, which
17
+ wall-clock time alone cannot.
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ uv add snailmail # or: pip install snailmail
23
+ ```
24
+
25
+ ## Use it in a benchmark
26
+
27
+ snailmail serves a directory. Every file under the root is reachable at its path
28
+ relative to the root, which matches the shape of an object store or Icechunk virtual
29
+ dataset (one object per file). Point your reader at `server.base` and have it fetch
30
+ keys like `chunks/0.0.0`.
31
+
32
+ ```python
33
+ from snailmail import LatencyRangeServer, LogNormal
34
+
35
+ with LatencyRangeServer("my_zarr_store/", latency=LogNormal(mode_ms=40), bandwidth_mbs=100) as server:
36
+ server.reset_counts()
37
+ open_and_read(server.base) # your reader: obstore, icechunk, zarr, ...
38
+ print(server.stats())
39
+ # {'n_gets': 312, 'n_requests': 312, 'n_misses': 0, 'max_in_flight': 16,
40
+ # 'total_bytes': .., 'methods': {'GET': 312}, 'paths': {..}}
41
+ ```
42
+
43
+ `open_and_read` stands in for the reader you're benchmarking. It makes HTTP GETs
44
+ (with `Range` headers) against `server.base`; snailmail injects the latency, meters
45
+ the bytes through the bandwidth pipe, and streams the file from disk in response. A
46
+ direct request looks like this:
47
+
48
+ ```python
49
+ import urllib.request
50
+
51
+ with LatencyRangeServer("my_zarr_store/") as server:
52
+ req = urllib.request.Request(server.url("chunks/0.0.0"), headers={"Range": "bytes=0-1023"})
53
+ first_kib = urllib.request.urlopen(req).read()
54
+ ```
55
+
56
+ `server.url(key)` builds the URL for a key; `server.files()` lists the served keys.
57
+ `stats()` is a snapshot of request counters since the last `reset_counts()`:
58
+ `n_requests` counts every request, `n_gets` only the GETs, and `n_misses` the
59
+ requests for keys that don't exist (404, like an object store's NoSuchKey). Tune
60
+ between measurements with `set_latency(dist)`, `set_bandwidth_mbs(x)`, and
61
+ `reset_counts()`.
62
+
63
+ Latency is a pluggable distribution passed as `latency=`:
64
+
65
+ ```python
66
+ from snailmail import LogNormal, Normal, Exponential, Fixed
67
+
68
+ LogNormal(mode_ms=45, sigma=0.5) # unimodal hump with long right tail; fits object-store GET RTT
69
+ Normal(mean_ms=45, std_ms=10) # symmetric, truncated at 0
70
+ Exponential(mean_ms=45) # peak at 0; a poor model for GET RTT
71
+ Fixed(20) # deterministic
72
+ ```
73
+
74
+ `latency=None` (the default) injects no latency.
75
+
76
+ ## From the CLI
77
+
78
+ ```bash
79
+ snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5
80
+ snailmail ./store --dist normal --mean-ms 45 --std-ms 10
81
+ snailmail ./store --dist exponential --mean-ms 45
82
+ snailmail ./store --dist fixed --value-ms 20
83
+ snailmail ./store --bandwidth-mbs 100 --port 8080 --json # no latency; JSON address line
84
+ ```
85
+
86
+ The argument is the directory to serve.
87
+
88
+ `--json` prints a single machine-readable line and flushes it before serving,
89
+ so a script can spawn snailmail, read the bound address from stdout, and proceed.
90
+
91
+ The CLI rejects a flag that doesn't belong to the chosen `--dist`. Omit `--dist`
92
+ for no injected latency.
93
+
94
+ ## What it models
95
+
96
+ **Latency** is a per-request draw from the chosen distribution. `lognormal` is
97
+ the recommended default: parameterise it by the PDF mode (`--mode-ms`) and shape
98
+ (`--sigma`). `normal`, `exponential`, and `fixed` are available for comparison.
99
+
100
+ **Bandwidth** is a single shared FIFO pipe (`--bandwidth-mbs`, MB/s = 1e6 bytes/s).
101
+ Per-request round-trips run in parallel, but response bytes serialize through the
102
+ pipe, so aggregate egress is capped and over-read costs real transfer time. Omit
103
+ for unlimited bandwidth.
104
+
105
+ HTTP correctness (206, `Content-Range`, suffix ranges, 416, conditional requests)
106
+ and on-disk streaming come from aiohttp's `web.FileResponse`. Files are never
107
+ loaded into RAM, so multi-gigabyte files work.
108
+
109
+ Missing keys return 404 and are counted in `n_misses`, matching object-store
110
+ NoSuchKey behavior.
111
+
112
+ ## Notes
113
+
114
+ - Loopback only (binds `127.0.0.1`); nothing leaves the machine.
115
+ - Consumers must opt into plain HTTP: obstore `client_options={"allow_http": True}`,
116
+ icechunk `http_store({"allow_http": "true"})`.
117
+ - The injected latency is added to the real (sub-millisecond, local-SSD)
118
+ range-read time, so the modelled RTT is dominated by the configured value.
119
+ - For transport-accurate shaping on real packets, use `tc netem` (Linux) or
120
+ `dnctl`/`pfctl` (macOS) in front of any file server. snailmail trades that
121
+ for zero-setup, in-process instrumentation.
122
+
123
+ Contributing? See [AGENTS.md](AGENTS.md). MIT licensed.
@@ -0,0 +1,61 @@
1
+ [project]
2
+ name = "snailmail"
3
+ dynamic = ["version"]
4
+ description = "A local HTTP server that serves a directory over Range with injectable latency and bandwidth limits, for benchmarking range / object-store / virtual-chunk reads under realistic network conditions."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = "MIT"
8
+ license-files = ["LICENSE"]
9
+ authors = [{ name = "Ian Hunt-Isaak", email = "ian@earthmover.io" }]
10
+ keywords = ["http", "latency", "bandwidth", "benchmark", "range-requests", "object-store", "zarr", "icechunk"]
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "Intended Audience :: Developers",
14
+ "Operating System :: POSIX :: Linux",
15
+ "Operating System :: MacOS",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Topic :: Internet :: WWW/HTTP :: HTTP Servers",
22
+ "Topic :: System :: Benchmark",
23
+ ]
24
+ dependencies = [
25
+ "aiohttp>=3.9",
26
+ "numpy>=1.24",
27
+ ]
28
+
29
+ [project.scripts]
30
+ snailmail = "snailmail.cli:main"
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/ianhi/snailmail"
34
+ Repository = "https://github.com/ianhi/snailmail"
35
+ Issues = "https://github.com/ianhi/snailmail/issues"
36
+ Changelog = "https://github.com/ianhi/snailmail/blob/main/CHANGELOG.md"
37
+
38
+ [dependency-groups]
39
+ dev = ["pytest>=8", "ruff>=0.6", "mypy>=1.11"]
40
+
41
+ [build-system]
42
+ requires = ["hatchling", "hatch-vcs"]
43
+ build-backend = "hatchling.build"
44
+
45
+ [tool.hatch.version]
46
+ source = "vcs"
47
+
48
+ [tool.hatch.build.targets.wheel]
49
+ packages = ["src/snailmail"]
50
+
51
+ [tool.hatch.build.targets.sdist]
52
+ # Ship source + the docs a consumer might want; leave out the internal worklog and
53
+ # repo/CI plumbing.
54
+ include = ["src", "tests", "README.md", "CHANGELOG.md", "LICENSE", "AGENTS.md"]
55
+
56
+ [tool.ruff]
57
+ line-length = 100
58
+
59
+ [tool.mypy]
60
+ files = ["src/snailmail"]
61
+ python_version = "3.10"
@@ -0,0 +1,26 @@
1
+ """snailmail — a local HTTP file server with injectable latency and bandwidth limits.
2
+
3
+ For benchmarking range / object-store / virtual-chunk reads under realistic network
4
+ conditions. See :class:`LatencyRangeServer`.
5
+ """
6
+
7
+ from importlib.metadata import PackageNotFoundError, version
8
+
9
+ from snailmail.bandwidth import AsyncSharedPipe
10
+ from snailmail.latency import Exponential, Fixed, LatencyDist, LogNormal, Normal
11
+ from snailmail.server import LatencyRangeServer
12
+
13
+ try:
14
+ __version__ = version("snailmail") # derived from the git tag at build time (hatch-vcs)
15
+ except PackageNotFoundError: # running from a source tree with no install
16
+ __version__ = "0+unknown"
17
+
18
+ __all__ = [
19
+ "LatencyRangeServer",
20
+ "AsyncSharedPipe",
21
+ "LatencyDist",
22
+ "LogNormal",
23
+ "Normal",
24
+ "Exponential",
25
+ "Fixed",
26
+ ]
@@ -0,0 +1,35 @@
1
+ """Bandwidth limiting for responses."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+
8
+ class AsyncSharedPipe:
9
+ """A FIFO bandwidth limiter modelling ONE shared client downlink (async).
10
+
11
+ Every response's byte transfer is reserved through a single pipe of ``B``
12
+ bytes/s, so aggregate egress can't exceed ``B`` no matter how many requests
13
+ overlap, and over-read directly costs pipe time. Per-request latency stays
14
+ parallel (handled separately); only bytes serialize here. ``B is None`` disables.
15
+ """
16
+
17
+ def __init__(self, bytes_per_s: float | None):
18
+ self.B = bytes_per_s if bytes_per_s and bytes_per_s > 0 else None
19
+ self._lock = asyncio.Lock()
20
+ self._free = 0.0 # loop-clock timestamp the pipe is next free
21
+
22
+ async def transfer(self, nbytes: int) -> None:
23
+ if self.B is None or nbytes <= 0:
24
+ return
25
+ loop = asyncio.get_running_loop()
26
+ async with self._lock:
27
+ start = max(loop.time(), self._free)
28
+ self._free = start + nbytes / self.B
29
+ finish = self._free
30
+ delay = finish - loop.time()
31
+ if delay > 0:
32
+ await asyncio.sleep(delay)
33
+
34
+ def reset(self) -> None:
35
+ self._free = 0.0