snailmail 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snailmail-0.1.0/.gitignore +55 -0
- snailmail-0.1.0/AGENTS.md +122 -0
- snailmail-0.1.0/CHANGELOG.md +29 -0
- snailmail-0.1.0/LICENSE +21 -0
- snailmail-0.1.0/PKG-INFO +151 -0
- snailmail-0.1.0/README.md +123 -0
- snailmail-0.1.0/pyproject.toml +61 -0
- snailmail-0.1.0/src/snailmail/__init__.py +26 -0
- snailmail-0.1.0/src/snailmail/bandwidth.py +35 -0
- snailmail-0.1.0/src/snailmail/cli.py +130 -0
- snailmail-0.1.0/src/snailmail/latency.py +183 -0
- snailmail-0.1.0/src/snailmail/server.py +250 -0
- snailmail-0.1.0/tests/test_directory.py +300 -0
- snailmail-0.1.0/tests/test_latency.py +67 -0
- snailmail-0.1.0/tests/test_server.py +107 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
wheels/
|
|
10
|
+
*.egg-info/
|
|
11
|
+
*.egg
|
|
12
|
+
.eggs/
|
|
13
|
+
|
|
14
|
+
# Virtual environments / uv
|
|
15
|
+
.venv/
|
|
16
|
+
venv/
|
|
17
|
+
env/
|
|
18
|
+
.uv/
|
|
19
|
+
|
|
20
|
+
# Packaging / build backends
|
|
21
|
+
*.whl
|
|
22
|
+
*.tar.gz
|
|
23
|
+
|
|
24
|
+
# Testing / coverage / type / lint caches
|
|
25
|
+
.pytest_cache/
|
|
26
|
+
.ruff_cache/
|
|
27
|
+
.mypy_cache/
|
|
28
|
+
.pyright/
|
|
29
|
+
.coverage
|
|
30
|
+
.coverage.*
|
|
31
|
+
htmlcov/
|
|
32
|
+
.tox/
|
|
33
|
+
.nox/
|
|
34
|
+
.hypothesis/
|
|
35
|
+
|
|
36
|
+
# Tooling caches
|
|
37
|
+
.cache/
|
|
38
|
+
|
|
39
|
+
# Editors / OS
|
|
40
|
+
.vscode/
|
|
41
|
+
.idea/
|
|
42
|
+
*.swp
|
|
43
|
+
*~
|
|
44
|
+
.DS_Store
|
|
45
|
+
|
|
46
|
+
# Jupyter
|
|
47
|
+
.ipynb_checkpoints/
|
|
48
|
+
.jupyter/
|
|
49
|
+
*.ipynb_ystore.db
|
|
50
|
+
|
|
51
|
+
# Local scratch data the server might be pointed at (don't commit big test files)
|
|
52
|
+
/data/
|
|
53
|
+
*.h5ad
|
|
54
|
+
*.tiff
|
|
55
|
+
*.zarr/
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
Onboarding for an agent or contributor picking up snailmail. Read the
|
|
4
|
+
[README](README.md) first for what it is and why; this file is the "how it works,
|
|
5
|
+
how to work on it" layer.
|
|
6
|
+
|
|
7
|
+
## Goal
|
|
8
|
+
|
|
9
|
+
A zero-setup, in-process harness to benchmark range-based readers under realistic
|
|
10
|
+
network conditions, and to answer concurrency questions honestly. The product is the
|
|
11
|
+
measurement: GET count, bytes, and **peak concurrency** (`max_in_flight`) — that
|
|
12
|
+
last one is the whole point. Wall-clock can't distinguish "fast because cached" from
|
|
13
|
+
"fast because concurrent"; `max_in_flight` can.
|
|
14
|
+
|
|
15
|
+
## Layout
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
src/snailmail/
|
|
19
|
+
__init__.py # public exports
|
|
20
|
+
latency.py # LatencyDist + LogNormal / Normal / Exponential / Fixed
|
|
21
|
+
bandwidth.py # AsyncSharedPipe
|
|
22
|
+
server.py # LatencyRangeServer (the threaded aiohttp wrapper)
|
|
23
|
+
cli.py # the `snailmail` CLI (main, --dist arg wiring)
|
|
24
|
+
tests/
|
|
25
|
+
test_server.py # range correctness, latency, bandwidth, concurrency, counters
|
|
26
|
+
test_directory.py # directory serving, misses, traversal, stats, set_latency, --version
|
|
27
|
+
test_latency.py # distributions + CLI --dist wiring
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
One file per concern; keep each small and single-purpose. The split is to stay
|
|
31
|
+
easily editable, not an invitation to grow a framework — the whole thing should stay
|
|
32
|
+
readable in a sitting.
|
|
33
|
+
|
|
34
|
+
## Develop
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
uv sync # aiohttp, numpy + dev: pytest, ruff, mypy
|
|
38
|
+
uv run pytest # all green
|
|
39
|
+
uv run ruff check src tests
|
|
40
|
+
uv run mypy # type gate (config in pyproject)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Pre-commit hooks (ruff lint + ruff format + mypy + file hygiene) run via
|
|
44
|
+
[prek](https://github.com/j178/prek): `prek install` once, then they fire on commit;
|
|
45
|
+
`prek run --all-files` to run them by hand.
|
|
46
|
+
|
|
47
|
+
## Conventions
|
|
48
|
+
|
|
49
|
+
- **Commits:** do not co-sign — no `Co-Authored-By` / tool trailers.
|
|
50
|
+
- **Comments:** tight and useful — explain *why*, not *what*. No session- or
|
|
51
|
+
conversation-specific notes ("as we discussed", change logs, dates); a comment
|
|
52
|
+
must make sense to someone reading the file cold a year from now.
|
|
53
|
+
|
|
54
|
+
## Design decisions (read before changing things)
|
|
55
|
+
|
|
56
|
+
- **aiohttp `web.FileResponse` owns all HTTP correctness** — 206, `Content-Range`,
|
|
57
|
+
suffix ranges, 416, conditional requests — and streams from disk. Do **not**
|
|
58
|
+
reimplement range handling; that was the whole reason to rewrite off the original
|
|
59
|
+
hand-rolled `BaseHTTPRequestHandler`. The file is never read into RAM, so multi-GB
|
|
60
|
+
files work. Our consumers issue single-range GETs only, so multi-range responses
|
|
61
|
+
are out of scope.
|
|
62
|
+
|
|
63
|
+
- **Serves a directory, always.** The root is served with aiohttp's `add_static`
|
|
64
|
+
(range-correct *and* traversal-safe — don't hand-roll path joining). One object per
|
|
65
|
+
file is the shape that matters for the Icechunk/object-store use case; to benchmark
|
|
66
|
+
a single file, point at the directory containing it. There is deliberately no
|
|
67
|
+
single-file mode — it added a `url`-vs-`base` duality and a custom handler for no
|
|
68
|
+
real benefit. `base` is the root; `url(key)` builds a key URL. `FileResponse` defers
|
|
69
|
+
its 404 to send time, so **misses are detected up front** via `_target_size()`
|
|
70
|
+
(which also yields the size for byte accounting), not by inspecting the response
|
|
71
|
+
status — a miss is a read whose path resolves to no file under the root, counted in
|
|
72
|
+
`n_misses`.
|
|
73
|
+
- **Latency = a pluggable `LatencyDist`** (`latency.py`): `LogNormal`, `Normal`,
|
|
74
|
+
`Exponential`, `Fixed`. **Lognormal is the recommended default and the one to reach
|
|
75
|
+
for** — object-store GET RTT is a unimodal hump with a long right tail, which it
|
|
76
|
+
fits; it's parameterised by the PDF **mode** (`mode_ms`) and shape `sigma`. The
|
|
77
|
+
others exist for comparison, not because they model object stores well — notably
|
|
78
|
+
`Exponential`'s peak sits at the floor, which is *wrong* for GET RTT; offer it, but
|
|
79
|
+
don't recommend it. Every dist **pre-generates its pool once with numpy and serves
|
|
80
|
+
it round-robin** — O(1) in the hot path, no per-request RNG, exactly reproducible
|
|
81
|
+
per seed. The pool index is unsynchronised on purpose: all requests run on one
|
|
82
|
+
event-loop thread, so it's safe. If you ever move to multiple loops/threads, that
|
|
83
|
+
assumption breaks. Negative draws (Normal's left tail) are truncated at 0.
|
|
84
|
+
- **Bandwidth = one shared FIFO pipe** (`AsyncSharedPipe`): per-request RTTs stay
|
|
85
|
+
parallel; response *bytes* serialize through the pipe, so egress is capped and
|
|
86
|
+
over-read costs real time.
|
|
87
|
+
- **Async, in a background thread.** One event loop means many requests' latency
|
|
88
|
+
sleeps overlap with no thread-pool ceiling — exactly what makes the
|
|
89
|
+
peak-concurrency measurement clean. `start()` spawns the loop thread; `stop()`
|
|
90
|
+
stops it. Don't reintroduce thread-per-request.
|
|
91
|
+
- **Counters under a lock.** `stats()` is a post-hoc, atomic snapshot (counts, total
|
|
92
|
+
bytes, 404 misses, peak `max_in_flight`, and per-method / per-path breakdowns) that
|
|
93
|
+
persists until `reset_counts()`. For accounting only, `_range_bytes` reuses aiohttp's
|
|
94
|
+
own `request.http_range` parser (not a hand-rolled one) so the counted bytes match
|
|
95
|
+
what the static handler serves; serving correctness still comes entirely from
|
|
96
|
+
aiohttp. See `_target_size`'s docstring for why size/miss are resolved up front
|
|
97
|
+
rather than read back from aiohttp.
|
|
98
|
+
|
|
99
|
+
- **Compose aiohttp, don't subclass it.** aiohttp has no server base class meant for
|
|
100
|
+
extension (its docs steer you to middlewares/signals over subclassing
|
|
101
|
+
`web.Application`). `LatencyRangeServer` is a threaded lifecycle + counters facade
|
|
102
|
+
around `web.Application` + `AppRunner`/`TCPSite`; keep it that way. The one private
|
|
103
|
+
touch is reading the bound ephemeral port off `site._server.sockets` — aiohttp
|
|
104
|
+
exposes no public API for it.
|
|
105
|
+
- **Injected latency is added on top** of the real (sub-ms, local-SSD) range read, so
|
|
106
|
+
the modelled RTT stays dominated by the knob. Revisit for spinning disks or very
|
|
107
|
+
large single ranges.
|
|
108
|
+
|
|
109
|
+
## Non-goals
|
|
110
|
+
|
|
111
|
+
- **Transport-accurate shaping.** snailmail models latency and bandwidth at the
|
|
112
|
+
application layer (a `sleep()` plus a byte pipe), not on real packets. For
|
|
113
|
+
kernel-level RTT/bandwidth use `tc netem` (Linux) or `dnctl`/`pfctl` (macOS) in
|
|
114
|
+
front of any file server. Don't grow snailmail toward packet shaping.
|
|
115
|
+
- **A general-purpose web server.** It serves a directory on loopback for benchmarks.
|
|
116
|
+
|
|
117
|
+
## Working notes
|
|
118
|
+
|
|
119
|
+
Current status, open tasks, and origin/context live in
|
|
120
|
+
[docs/NOTES.md](docs/NOTES.md) — the mutable worklog agents update. Keep *this* file
|
|
121
|
+
durable (purpose, design, conventions, non-goals); put anything time-specific in the
|
|
122
|
+
worklog.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format is based on
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to
|
|
5
|
+
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-06-18
|
|
8
|
+
|
|
9
|
+
Initial public release.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- `LatencyRangeServer`: a loopback HTTP server that serves a **directory tree** over
|
|
13
|
+
HTTP Range with injectable latency and bandwidth limits, for benchmarking range /
|
|
14
|
+
object-store / virtual-chunk reads. One object per file (the shape of an Icechunk
|
|
15
|
+
virtual dataset), range- and traversal-safe; `base` is the root, `url(key)` builds a
|
|
16
|
+
key URL, and `files()` lists the served keys.
|
|
17
|
+
- Pluggable per-request latency distributions: `LogNormal` (the recommended default),
|
|
18
|
+
`Normal`, `Exponential`, and `Fixed`, each with explicit, distribution-specific
|
|
19
|
+
parameters. Draws are pre-generated and served round-robin (O(1), reproducible per
|
|
20
|
+
seed).
|
|
21
|
+
- Shared FIFO bandwidth pipe (`AsyncSharedPipe`) so response bytes serialize through a
|
|
22
|
+
capped egress while round-trips stay parallel.
|
|
23
|
+
- Request accounting via `stats()`: GET/request counts, 404 misses (`n_misses`), total
|
|
24
|
+
bytes, peak concurrency (`max_in_flight`), and per-method / per-path breakdowns;
|
|
25
|
+
persists until `reset_counts()`.
|
|
26
|
+
- `snailmail` CLI with a `--dist` selector and explicit per-distribution flags, a
|
|
27
|
+
`--json` machine-readable address line (flushed before serving), and `--version`.
|
|
28
|
+
|
|
29
|
+
[0.1.0]: https://github.com/ianhi/snailmail/releases/tag/v0.1.0
|
snailmail-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ian Hunt-Isaak
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
snailmail-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: snailmail
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A local HTTP server that serves a directory over Range with injectable latency and bandwidth limits, for benchmarking range / object-store / virtual-chunk reads under realistic network conditions.
|
|
5
|
+
Project-URL: Homepage, https://github.com/ianhi/snailmail
|
|
6
|
+
Project-URL: Repository, https://github.com/ianhi/snailmail
|
|
7
|
+
Project-URL: Issues, https://github.com/ianhi/snailmail/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/ianhi/snailmail/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Ian Hunt-Isaak <ian@earthmover.io>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: bandwidth,benchmark,http,icechunk,latency,object-store,range-requests,zarr
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: MacOS
|
|
16
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
23
|
+
Classifier: Topic :: System :: Benchmark
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: aiohttp>=3.9
|
|
26
|
+
Requires-Dist: numpy>=1.24
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# snailmail
|
|
30
|
+
|
|
31
|
+
A local HTTP server that serves a directory over HTTP Range, injecting per-request
|
|
32
|
+
latency and a bandwidth cap, and counts GETs and peak concurrency.
|
|
33
|
+
|
|
34
|
+
Use it to benchmark range-based readers — object stores, Zarr/Icechunk virtual
|
|
35
|
+
chunks, tiled image formats — under realistic network conditions, on your laptop,
|
|
36
|
+
with no cloud and no root.
|
|
37
|
+
|
|
38
|
+
## Why you'd want it
|
|
39
|
+
|
|
40
|
+
Local disk hides the cost that dominates remote reads: network round-trips.
|
|
41
|
+
A read pattern that finishes instantly against a warm page cache can take
|
|
42
|
+
minutes of serial round-trips against object storage. snailmail adds a
|
|
43
|
+
per-request latency draw and a shared bandwidth pipe so you can measure how a
|
|
44
|
+
reader behaves over the wire. `max_in_flight` tells you peak concurrency, which
|
|
45
|
+
wall-clock time alone cannot.
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uv add snailmail # or: pip install snailmail
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Use it in a benchmark
|
|
54
|
+
|
|
55
|
+
snailmail serves a directory. Every file under the root is reachable at its path
|
|
56
|
+
relative to the root, which matches the shape of an object store or Icechunk virtual
|
|
57
|
+
dataset (one object per file). Point your reader at `server.base` and have it fetch
|
|
58
|
+
keys like `chunks/0.0.0`.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from snailmail import LatencyRangeServer, LogNormal
|
|
62
|
+
|
|
63
|
+
with LatencyRangeServer("my_zarr_store/", latency=LogNormal(mode_ms=40), bandwidth_mbs=100) as server:
|
|
64
|
+
server.reset_counts()
|
|
65
|
+
open_and_read(server.base) # your reader: obstore, icechunk, zarr, ...
|
|
66
|
+
print(server.stats())
|
|
67
|
+
# {'n_gets': 312, 'n_requests': 312, 'n_misses': 0, 'max_in_flight': 16,
|
|
68
|
+
# 'total_bytes': .., 'methods': {'GET': 312}, 'paths': {..}}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
`open_and_read` stands in for the reader you're benchmarking. It makes HTTP GETs
|
|
72
|
+
(with `Range` headers) against `server.base`; snailmail injects the latency, meters
|
|
73
|
+
the bytes through the bandwidth pipe, and streams the file from disk in response. A
|
|
74
|
+
direct request looks like this:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import urllib.request
|
|
78
|
+
|
|
79
|
+
with LatencyRangeServer("my_zarr_store/") as server:
|
|
80
|
+
req = urllib.request.Request(server.url("chunks/0.0.0"), headers={"Range": "bytes=0-1023"})
|
|
81
|
+
first_kib = urllib.request.urlopen(req).read()
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
`server.url(key)` builds the URL for a key; `server.files()` lists the served keys.
|
|
85
|
+
`stats()` is a snapshot of request counters since the last `reset_counts()`:
|
|
86
|
+
`n_requests` counts every request, `n_gets` only the GETs, and `n_misses` the
|
|
87
|
+
requests for keys that don't exist (404, like an object store's NoSuchKey). Tune
|
|
88
|
+
between measurements with `set_latency(dist)`, `set_bandwidth_mbs(x)`, and
|
|
89
|
+
`reset_counts()`.
|
|
90
|
+
|
|
91
|
+
Latency is a pluggable distribution passed as `latency=`:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from snailmail import LogNormal, Normal, Exponential, Fixed
|
|
95
|
+
|
|
96
|
+
LogNormal(mode_ms=45, sigma=0.5) # unimodal hump with long right tail; fits object-store GET RTT
|
|
97
|
+
Normal(mean_ms=45, std_ms=10) # symmetric, truncated at 0
|
|
98
|
+
Exponential(mean_ms=45) # peak at 0; a poor model for GET RTT
|
|
99
|
+
Fixed(20) # deterministic
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
`latency=None` (the default) injects no latency.
|
|
103
|
+
|
|
104
|
+
## From the CLI
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5
|
|
108
|
+
snailmail ./store --dist normal --mean-ms 45 --std-ms 10
|
|
109
|
+
snailmail ./store --dist exponential --mean-ms 45
|
|
110
|
+
snailmail ./store --dist fixed --value-ms 20
|
|
111
|
+
snailmail ./store --bandwidth-mbs 100 --port 8080 --json # no latency; JSON address line
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The argument is the directory to serve.
|
|
115
|
+
|
|
116
|
+
`--json` prints a single machine-readable line and flushes it before serving,
|
|
117
|
+
so a script can spawn snailmail, read the bound address from stdout, and proceed.
|
|
118
|
+
|
|
119
|
+
The CLI rejects a flag that doesn't belong to the chosen `--dist`. Omit `--dist`
|
|
120
|
+
for no injected latency.
|
|
121
|
+
|
|
122
|
+
## What it models
|
|
123
|
+
|
|
124
|
+
**Latency** is a per-request draw from the chosen distribution. `lognormal` is
|
|
125
|
+
the recommended default: parameterise it by the PDF mode (`--mode-ms`) and shape
|
|
126
|
+
(`--sigma`). `normal`, `exponential`, and `fixed` are available for comparison.
|
|
127
|
+
|
|
128
|
+
**Bandwidth** is a single shared FIFO pipe (`--bandwidth-mbs`, MB/s = 1e6 bytes/s).
|
|
129
|
+
Per-request round-trips run in parallel, but response bytes serialize through the
|
|
130
|
+
pipe, so aggregate egress is capped and over-read costs real transfer time. Omit
|
|
131
|
+
for unlimited bandwidth.
|
|
132
|
+
|
|
133
|
+
HTTP correctness (206, `Content-Range`, suffix ranges, 416, conditional requests)
|
|
134
|
+
and on-disk streaming come from aiohttp's `web.FileResponse`. Files are never
|
|
135
|
+
loaded into RAM, so multi-gigabyte files work.
|
|
136
|
+
|
|
137
|
+
Missing keys return 404 and are counted in `n_misses`, matching object-store
|
|
138
|
+
NoSuchKey behavior.
|
|
139
|
+
|
|
140
|
+
## Notes
|
|
141
|
+
|
|
142
|
+
- Loopback only (binds `127.0.0.1`); nothing leaves the machine.
|
|
143
|
+
- Consumers must opt into plain HTTP: obstore `client_options={"allow_http": True}`,
|
|
144
|
+
icechunk `http_store({"allow_http": "true"})`.
|
|
145
|
+
- The injected latency is added to the real (sub-millisecond, local-SSD)
|
|
146
|
+
range-read time, so the modelled RTT is dominated by the configured value.
|
|
147
|
+
- For transport-accurate shaping on real packets, use `tc netem` (Linux) or
|
|
148
|
+
`dnctl`/`pfctl` (macOS) in front of any file server. snailmail trades that
|
|
149
|
+
for zero-setup, in-process instrumentation.
|
|
150
|
+
|
|
151
|
+
Contributing? See [AGENTS.md](AGENTS.md). MIT licensed.
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# snailmail
|
|
2
|
+
|
|
3
|
+
A local HTTP server that serves a directory over HTTP Range, injecting per-request
|
|
4
|
+
latency and a bandwidth cap, and counts GETs and peak concurrency.
|
|
5
|
+
|
|
6
|
+
Use it to benchmark range-based readers — object stores, Zarr/Icechunk virtual
|
|
7
|
+
chunks, tiled image formats — under realistic network conditions, on your laptop,
|
|
8
|
+
with no cloud and no root.
|
|
9
|
+
|
|
10
|
+
## Why you'd want it
|
|
11
|
+
|
|
12
|
+
Local disk hides the cost that dominates remote reads: network round-trips.
|
|
13
|
+
A read pattern that finishes instantly against a warm page cache can take
|
|
14
|
+
minutes of serial round-trips against object storage. snailmail adds a
|
|
15
|
+
per-request latency draw and a shared bandwidth pipe so you can measure how a
|
|
16
|
+
reader behaves over the wire. `max_in_flight` tells you peak concurrency, which
|
|
17
|
+
wall-clock time alone cannot.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uv add snailmail # or: pip install snailmail
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Use it in a benchmark
|
|
26
|
+
|
|
27
|
+
snailmail serves a directory. Every file under the root is reachable at its path
|
|
28
|
+
relative to the root, which matches the shape of an object store or Icechunk virtual
|
|
29
|
+
dataset (one object per file). Point your reader at `server.base` and have it fetch
|
|
30
|
+
keys like `chunks/0.0.0`.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from snailmail import LatencyRangeServer, LogNormal
|
|
34
|
+
|
|
35
|
+
with LatencyRangeServer("my_zarr_store/", latency=LogNormal(mode_ms=40), bandwidth_mbs=100) as server:
|
|
36
|
+
server.reset_counts()
|
|
37
|
+
open_and_read(server.base) # your reader: obstore, icechunk, zarr, ...
|
|
38
|
+
print(server.stats())
|
|
39
|
+
# {'n_gets': 312, 'n_requests': 312, 'n_misses': 0, 'max_in_flight': 16,
|
|
40
|
+
# 'total_bytes': .., 'methods': {'GET': 312}, 'paths': {..}}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
`open_and_read` stands in for the reader you're benchmarking. It makes HTTP GETs
|
|
44
|
+
(with `Range` headers) against `server.base`; snailmail injects the latency, meters
|
|
45
|
+
the bytes through the bandwidth pipe, and streams the file from disk in response. A
|
|
46
|
+
direct request looks like this:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import urllib.request
|
|
50
|
+
|
|
51
|
+
with LatencyRangeServer("my_zarr_store/") as server:
|
|
52
|
+
req = urllib.request.Request(server.url("chunks/0.0.0"), headers={"Range": "bytes=0-1023"})
|
|
53
|
+
first_kib = urllib.request.urlopen(req).read()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
`server.url(key)` builds the URL for a key; `server.files()` lists the served keys.
|
|
57
|
+
`stats()` is a snapshot of request counters since the last `reset_counts()`:
|
|
58
|
+
`n_requests` counts every request, `n_gets` only the GETs, and `n_misses` the
|
|
59
|
+
requests for keys that don't exist (404, like an object store's NoSuchKey). Tune
|
|
60
|
+
between measurements with `set_latency(dist)`, `set_bandwidth_mbs(x)`, and
|
|
61
|
+
`reset_counts()`.
|
|
62
|
+
|
|
63
|
+
Latency is a pluggable distribution passed as `latency=`:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from snailmail import LogNormal, Normal, Exponential, Fixed
|
|
67
|
+
|
|
68
|
+
LogNormal(mode_ms=45, sigma=0.5) # unimodal hump with long right tail; fits object-store GET RTT
|
|
69
|
+
Normal(mean_ms=45, std_ms=10) # symmetric, truncated at 0
|
|
70
|
+
Exponential(mean_ms=45) # peak at 0; a poor model for GET RTT
|
|
71
|
+
Fixed(20) # deterministic
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
`latency=None` (the default) injects no latency.
|
|
75
|
+
|
|
76
|
+
## From the CLI
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
snailmail ./store --dist lognormal --mode-ms 45 --sigma 0.5
|
|
80
|
+
snailmail ./store --dist normal --mean-ms 45 --std-ms 10
|
|
81
|
+
snailmail ./store --dist exponential --mean-ms 45
|
|
82
|
+
snailmail ./store --dist fixed --value-ms 20
|
|
83
|
+
snailmail ./store --bandwidth-mbs 100 --port 8080 --json # no latency; JSON address line
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
The argument is the directory to serve.
|
|
87
|
+
|
|
88
|
+
`--json` prints a single machine-readable line and flushes it before serving,
|
|
89
|
+
so a script can spawn snailmail, read the bound address from stdout, and proceed.
|
|
90
|
+
|
|
91
|
+
The CLI rejects a flag that doesn't belong to the chosen `--dist`. Omit `--dist`
|
|
92
|
+
for no injected latency.
|
|
93
|
+
|
|
94
|
+
## What it models
|
|
95
|
+
|
|
96
|
+
**Latency** is a per-request draw from the chosen distribution. `lognormal` is
|
|
97
|
+
the recommended default: parameterise it by the PDF mode (`--mode-ms`) and shape
|
|
98
|
+
(`--sigma`). `normal`, `exponential`, and `fixed` are available for comparison.
|
|
99
|
+
|
|
100
|
+
**Bandwidth** is a single shared FIFO pipe (`--bandwidth-mbs`, MB/s = 1e6 bytes/s).
|
|
101
|
+
Per-request round-trips run in parallel, but response bytes serialize through the
|
|
102
|
+
pipe, so aggregate egress is capped and over-read costs real transfer time. Omit
|
|
103
|
+
for unlimited bandwidth.
|
|
104
|
+
|
|
105
|
+
HTTP correctness (206, `Content-Range`, suffix ranges, 416, conditional requests)
|
|
106
|
+
and on-disk streaming come from aiohttp's `web.FileResponse`. Files are never
|
|
107
|
+
loaded into RAM, so multi-gigabyte files work.
|
|
108
|
+
|
|
109
|
+
Missing keys return 404 and are counted in `n_misses`, matching object-store
|
|
110
|
+
NoSuchKey behavior.
|
|
111
|
+
|
|
112
|
+
## Notes
|
|
113
|
+
|
|
114
|
+
- Loopback only (binds `127.0.0.1`); nothing leaves the machine.
|
|
115
|
+
- Consumers must opt into plain HTTP: obstore `client_options={"allow_http": True}`,
|
|
116
|
+
icechunk `http_store({"allow_http": "true"})`.
|
|
117
|
+
- The injected latency is added to the real (sub-millisecond, local-SSD)
|
|
118
|
+
range-read time, so the modelled RTT is dominated by the configured value.
|
|
119
|
+
- For transport-accurate shaping on real packets, use `tc netem` (Linux) or
|
|
120
|
+
`dnctl`/`pfctl` (macOS) in front of any file server. snailmail trades that
|
|
121
|
+
for zero-setup, in-process instrumentation.
|
|
122
|
+
|
|
123
|
+
Contributing? See [AGENTS.md](AGENTS.md). MIT licensed.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "snailmail"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "A local HTTP server that serves a directory over Range with injectable latency and bandwidth limits, for benchmarking range / object-store / virtual-chunk reads under realistic network conditions."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [{ name = "Ian Hunt-Isaak", email = "ian@earthmover.io" }]
|
|
10
|
+
keywords = ["http", "latency", "bandwidth", "benchmark", "range-requests", "object-store", "zarr", "icechunk"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"Operating System :: POSIX :: Linux",
|
|
15
|
+
"Operating System :: MacOS",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Internet :: WWW/HTTP :: HTTP Servers",
|
|
22
|
+
"Topic :: System :: Benchmark",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"aiohttp>=3.9",
|
|
26
|
+
"numpy>=1.24",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
snailmail = "snailmail.cli:main"
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/ianhi/snailmail"
|
|
34
|
+
Repository = "https://github.com/ianhi/snailmail"
|
|
35
|
+
Issues = "https://github.com/ianhi/snailmail/issues"
|
|
36
|
+
Changelog = "https://github.com/ianhi/snailmail/blob/main/CHANGELOG.md"
|
|
37
|
+
|
|
38
|
+
[dependency-groups]
|
|
39
|
+
dev = ["pytest>=8", "ruff>=0.6", "mypy>=1.11"]
|
|
40
|
+
|
|
41
|
+
[build-system]
|
|
42
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
43
|
+
build-backend = "hatchling.build"
|
|
44
|
+
|
|
45
|
+
[tool.hatch.version]
|
|
46
|
+
source = "vcs"
|
|
47
|
+
|
|
48
|
+
[tool.hatch.build.targets.wheel]
|
|
49
|
+
packages = ["src/snailmail"]
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.sdist]
|
|
52
|
+
# Ship source + the docs a consumer might want; leave out the internal worklog and
|
|
53
|
+
# repo/CI plumbing.
|
|
54
|
+
include = ["src", "tests", "README.md", "CHANGELOG.md", "LICENSE", "AGENTS.md"]
|
|
55
|
+
|
|
56
|
+
[tool.ruff]
|
|
57
|
+
line-length = 100
|
|
58
|
+
|
|
59
|
+
[tool.mypy]
|
|
60
|
+
files = ["src/snailmail"]
|
|
61
|
+
python_version = "3.10"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""snailmail — a local HTTP file server with injectable latency and bandwidth limits.
|
|
2
|
+
|
|
3
|
+
For benchmarking range / object-store / virtual-chunk reads under realistic network
|
|
4
|
+
conditions. See :class:`LatencyRangeServer`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
|
+
|
|
9
|
+
from snailmail.bandwidth import AsyncSharedPipe
|
|
10
|
+
from snailmail.latency import Exponential, Fixed, LatencyDist, LogNormal, Normal
|
|
11
|
+
from snailmail.server import LatencyRangeServer
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
__version__ = version("snailmail") # derived from the git tag at build time (hatch-vcs)
|
|
15
|
+
except PackageNotFoundError: # running from a source tree with no install
|
|
16
|
+
__version__ = "0+unknown"
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"LatencyRangeServer",
|
|
20
|
+
"AsyncSharedPipe",
|
|
21
|
+
"LatencyDist",
|
|
22
|
+
"LogNormal",
|
|
23
|
+
"Normal",
|
|
24
|
+
"Exponential",
|
|
25
|
+
"Fixed",
|
|
26
|
+
]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Bandwidth limiting for responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AsyncSharedPipe:
|
|
9
|
+
"""A FIFO bandwidth limiter modelling ONE shared client downlink (async).
|
|
10
|
+
|
|
11
|
+
Every response's byte transfer is reserved through a single pipe of ``B``
|
|
12
|
+
bytes/s, so aggregate egress can't exceed ``B`` no matter how many requests
|
|
13
|
+
overlap, and over-read directly costs pipe time. Per-request latency stays
|
|
14
|
+
parallel (handled separately); only bytes serialize here. ``B is None`` disables.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, bytes_per_s: float | None):
|
|
18
|
+
self.B = bytes_per_s if bytes_per_s and bytes_per_s > 0 else None
|
|
19
|
+
self._lock = asyncio.Lock()
|
|
20
|
+
self._free = 0.0 # loop-clock timestamp the pipe is next free
|
|
21
|
+
|
|
22
|
+
async def transfer(self, nbytes: int) -> None:
|
|
23
|
+
if self.B is None or nbytes <= 0:
|
|
24
|
+
return
|
|
25
|
+
loop = asyncio.get_running_loop()
|
|
26
|
+
async with self._lock:
|
|
27
|
+
start = max(loop.time(), self._free)
|
|
28
|
+
self._free = start + nbytes / self.B
|
|
29
|
+
finish = self._free
|
|
30
|
+
delay = finish - loop.time()
|
|
31
|
+
if delay > 0:
|
|
32
|
+
await asyncio.sleep(delay)
|
|
33
|
+
|
|
34
|
+
def reset(self) -> None:
|
|
35
|
+
self._free = 0.0
|