veilrender 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. veilrender-0.1.0/PKG-INFO +129 -0
  2. veilrender-0.1.0/README.md +109 -0
  3. veilrender-0.1.0/pyproject.toml +61 -0
  4. veilrender-0.1.0/setup.cfg +4 -0
  5. veilrender-0.1.0/src/veilrender/__init__.py +3 -0
  6. veilrender-0.1.0/src/veilrender/__main__.py +6 -0
  7. veilrender-0.1.0/src/veilrender/_vendor/__init__.py +2 -0
  8. veilrender-0.1.0/src/veilrender/_vendor/benchmark_compare.py +323 -0
  9. veilrender-0.1.0/src/veilrender/_vendor/cache.py +1023 -0
  10. veilrender-0.1.0/src/veilrender/_vendor/config.py +713 -0
  11. veilrender-0.1.0/src/veilrender/_vendor/dotenv.py +514 -0
  12. veilrender-0.1.0/src/veilrender/_vendor/httpserver.py +1007 -0
  13. veilrender-0.1.0/src/veilrender/_vendor/jsonc.py +352 -0
  14. veilrender-0.1.0/src/veilrender/_vendor/markdown.py +904 -0
  15. veilrender-0.1.0/src/veilrender/_vendor/readability.py +1002 -0
  16. veilrender-0.1.0/src/veilrender/_vendor/retry.py +503 -0
  17. veilrender-0.1.0/src/veilrender/_vendor/soup.py +998 -0
  18. veilrender-0.1.0/src/veilrender/_vendor/structlog.py +888 -0
  19. veilrender-0.1.0/src/veilrender/_vendor/useragent.py +475 -0
  20. veilrender-0.1.0/src/veilrender/_vendor/yaml.py +1124 -0
  21. veilrender-0.1.0/src/veilrender/app.py +158 -0
  22. veilrender-0.1.0/src/veilrender/auth.py +39 -0
  23. veilrender-0.1.0/src/veilrender/browser.py +172 -0
  24. veilrender-0.1.0/src/veilrender/cdp_proxy.py +314 -0
  25. veilrender-0.1.0/src/veilrender/config.py +25 -0
  26. veilrender-0.1.0/src/veilrender/models.py +109 -0
  27. veilrender-0.1.0/src/veilrender/routes/__init__.py +1 -0
  28. veilrender-0.1.0/src/veilrender/routes/health.py +17 -0
  29. veilrender-0.1.0/src/veilrender/routes/render.py +122 -0
  30. veilrender-0.1.0/src/veilrender/routes/screenshot.py +65 -0
  31. veilrender-0.1.0/src/veilrender.egg-info/PKG-INFO +129 -0
  32. veilrender-0.1.0/src/veilrender.egg-info/SOURCES.txt +34 -0
  33. veilrender-0.1.0/src/veilrender.egg-info/dependency_links.txt +1 -0
  34. veilrender-0.1.0/src/veilrender.egg-info/entry_points.txt +2 -0
  35. veilrender-0.1.0/src/veilrender.egg-info/requires.txt +7 -0
  36. veilrender-0.1.0/src/veilrender.egg-info/top_level.txt +1 -0
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: veilrender
3
+ Version: 0.1.0
4
+ Summary: Headless browser rendering API — self-hostable on HF Spaces, Docker, or bare metal
5
+ Author: Peng Ding
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/Oaklight/veilrender
8
+ Project-URL: Issues, https://github.com/Oaklight/veilrender/issues
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Internet :: WWW/HTTP
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: cloakbrowser>=0.3.0
15
+ Provides-Extra: dev
16
+ Requires-Dist: ruff>=0.11.0; extra == "dev"
17
+ Requires-Dist: ty>=0.0.1a0; extra == "dev"
18
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
19
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
20
+
21
+ # VeilRender
22
+
23
+ [![PyPI version](https://img.shields.io/pypi/v/veilrender?color=green)](https://pypi.org/project/veilrender/)
24
+ [![GitHub release](https://img.shields.io/github/v/release/Oaklight/veilrender?color=green)](https://github.com/Oaklight/veilrender/releases/latest)
25
+ [![CI](https://github.com/Oaklight/veilrender/actions/workflows/ci.yml/badge.svg)](https://github.com/Oaklight/veilrender/actions/workflows/ci.yml)
26
+ [![Docker Pulls](https://img.shields.io/docker/pulls/oaklight/veilrender)](https://hub.docker.com/r/oaklight/veilrender)
27
+ [![Docker Image](https://img.shields.io/docker/v/oaklight/veilrender?sort=semver&label=docker)](https://hub.docker.com/r/oaklight/veilrender)
28
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
29
+ [![HF Spaces](https://img.shields.io/badge/%F0%9F%A4%97-Spaces-blue)](https://huggingface.co/spaces/oaklight/veilrender)
30
+
31
+ [中文](README_zh.md) | **English**
32
+
33
+ Headless browser rendering API — self-hostable on HF Spaces, Docker, or bare metal.
34
+
35
+ VeilRender accepts a URL and returns the fully rendered page content (HTML, Markdown, readability-extracted article) using a headless Chromium browser. Designed as a fallback for fetch tools that fail on JavaScript-rendered pages.
36
+
37
+ ## Quick Start
38
+
39
+ ### Docker
40
+
41
+ ```bash
42
+ docker run -p 7860:7860 -e VEILRENDER_API_TOKEN=your-secret ghcr.io/oaklight/veilrender
43
+ ```
44
+
45
+ ### Local Development
46
+
47
+ ```bash
48
+ pip install -e ".[dev]"
49
+ playwright install chromium
50
+ python -m veilrender
51
+ ```
52
+
53
+ ## API
54
+
55
+ ### GET /health
56
+
57
+ Returns `{"status": "ok"}` if the service is running.
58
+
59
+ ### POST /render
60
+
61
+ Render a URL and return the page content.
62
+
63
+ ```bash
64
+ curl -X POST http://localhost:7860/render \
65
+ -H "Authorization: Bearer your-secret" \
66
+ -H "Content-Type: application/json" \
67
+ -d '{"url": "https://example.com"}'
68
+ ```
69
+
70
+ Response:
71
+
72
+ ```json
73
+ {
74
+ "content": {
75
+ "html": "...",
76
+ "markdown": "...",
77
+ "readability": "..."
78
+ },
79
+ "metadata": {
80
+ "title": "Example Domain",
81
+ "url": "https://example.com",
82
+ "status_code": 200
83
+ },
84
+ "links": [{"url": "https://www.iana.org/domains/example", "text": "More information..."}]
85
+ }
86
+ ```
87
+
88
+ ### POST /screenshot
89
+
90
+ Capture a screenshot of a URL.
91
+
92
+ ```bash
93
+ curl -X POST http://localhost:7860/screenshot \
94
+ -H "Authorization: Bearer your-secret" \
95
+ -H "Content-Type: application/json" \
96
+ -d '{"url": "https://example.com"}' \
97
+ -o screenshot.png
98
+ ```
99
+
100
+ ## Configuration
101
+
102
+ All settings are configured via environment variables with the `VEILRENDER_` prefix:
103
+
104
+ | Variable | Default | Description |
105
+ |----------|---------|-------------|
106
+ | `VEILRENDER_API_TOKEN` | *(none)* | API token for authentication. If unset, auth is disabled. |
107
+ | `VEILRENDER_PORT` | `7860` | Server port |
108
+ | `VEILRENDER_HOST` | `0.0.0.0` | Server bind address |
109
+ | `VEILRENDER_TIMEOUT` | `30000` | Browser navigation timeout (ms) |
110
+ | `VEILRENDER_VIEWPORT_WIDTH` | `1280` | Browser viewport width |
111
+ | `VEILRENDER_VIEWPORT_HEIGHT` | `720` | Browser viewport height |
112
+ | `VEILRENDER_MAX_CONCURRENT` | `3` | Max concurrent browser contexts |
113
+
114
+ ## Benchmark
115
+
116
+ Tested on HF Spaces (free tier, 2 vCPU) and a self-hosted VPS (3 vCPU, 1 GB container). **100% success rate** across all 46 requests per target.
117
+
118
+ | Test | HF Spaces | Self-hosted |
119
+ |------|-----------|-------------|
120
+ | Sequential × 5 (mixed URLs) | 8.72 s total | 11.81 s total |
121
+ | Concurrent × 10 (mixed URLs) | 1.40 – 9.37 s | 1.29 – 13.45 s |
122
+ | Rapid-fire × 20 (sequential) | 0.885 s avg | 1.029 s avg |
123
+ | Peak container memory | — | 614 MiB / 1 GB |
124
+
125
+ Full results: [BENCHMARK.md](BENCHMARK.md)
126
+
127
+ ## License
128
+
129
+ MIT
@@ -0,0 +1,109 @@
1
+ # VeilRender
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/veilrender?color=green)](https://pypi.org/project/veilrender/)
4
+ [![GitHub release](https://img.shields.io/github/v/release/Oaklight/veilrender?color=green)](https://github.com/Oaklight/veilrender/releases/latest)
5
+ [![CI](https://github.com/Oaklight/veilrender/actions/workflows/ci.yml/badge.svg)](https://github.com/Oaklight/veilrender/actions/workflows/ci.yml)
6
+ [![Docker Pulls](https://img.shields.io/docker/pulls/oaklight/veilrender)](https://hub.docker.com/r/oaklight/veilrender)
7
+ [![Docker Image](https://img.shields.io/docker/v/oaklight/veilrender?sort=semver&label=docker)](https://hub.docker.com/r/oaklight/veilrender)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
9
+ [![HF Spaces](https://img.shields.io/badge/%F0%9F%A4%97-Spaces-blue)](https://huggingface.co/spaces/oaklight/veilrender)
10
+
11
+ [中文](README_zh.md) | **English**
12
+
13
+ Headless browser rendering API — self-hostable on HF Spaces, Docker, or bare metal.
14
+
15
+ VeilRender accepts a URL and returns the fully rendered page content (HTML, Markdown, readability-extracted article) using a headless Chromium browser. Designed as a fallback for fetch tools that fail on JavaScript-rendered pages.
16
+
17
+ ## Quick Start
18
+
19
+ ### Docker
20
+
21
+ ```bash
22
+ docker run -p 7860:7860 -e VEILRENDER_API_TOKEN=your-secret ghcr.io/oaklight/veilrender
23
+ ```
24
+
25
+ ### Local Development
26
+
27
+ ```bash
28
+ pip install -e ".[dev]"
29
+ playwright install chromium
30
+ python -m veilrender
31
+ ```
32
+
33
+ ## API
34
+
35
+ ### GET /health
36
+
37
+ Returns `{"status": "ok"}` if the service is running.
38
+
39
+ ### POST /render
40
+
41
+ Render a URL and return the page content.
42
+
43
+ ```bash
44
+ curl -X POST http://localhost:7860/render \
45
+ -H "Authorization: Bearer your-secret" \
46
+ -H "Content-Type: application/json" \
47
+ -d '{"url": "https://example.com"}'
48
+ ```
49
+
50
+ Response:
51
+
52
+ ```json
53
+ {
54
+ "content": {
55
+ "html": "...",
56
+ "markdown": "...",
57
+ "readability": "..."
58
+ },
59
+ "metadata": {
60
+ "title": "Example Domain",
61
+ "url": "https://example.com",
62
+ "status_code": 200
63
+ },
64
+ "links": [{"url": "https://www.iana.org/domains/example", "text": "More information..."}]
65
+ }
66
+ ```
67
+
68
+ ### POST /screenshot
69
+
70
+ Capture a screenshot of a URL.
71
+
72
+ ```bash
73
+ curl -X POST http://localhost:7860/screenshot \
74
+ -H "Authorization: Bearer your-secret" \
75
+ -H "Content-Type: application/json" \
76
+ -d '{"url": "https://example.com"}' \
77
+ -o screenshot.png
78
+ ```
79
+
80
+ ## Configuration
81
+
82
+ All settings are configured via environment variables with the `VEILRENDER_` prefix:
83
+
84
+ | Variable | Default | Description |
85
+ |----------|---------|-------------|
86
+ | `VEILRENDER_API_TOKEN` | *(none)* | API token for authentication. If unset, auth is disabled. |
87
+ | `VEILRENDER_PORT` | `7860` | Server port |
88
+ | `VEILRENDER_HOST` | `0.0.0.0` | Server bind address |
89
+ | `VEILRENDER_TIMEOUT` | `30000` | Browser navigation timeout (ms) |
90
+ | `VEILRENDER_VIEWPORT_WIDTH` | `1280` | Browser viewport width |
91
+ | `VEILRENDER_VIEWPORT_HEIGHT` | `720` | Browser viewport height |
92
+ | `VEILRENDER_MAX_CONCURRENT` | `3` | Max concurrent browser contexts |
93
+
94
+ ## Benchmark
95
+
96
+ Tested on HF Spaces (free tier, 2 vCPU) and a self-hosted VPS (3 vCPU, 1 GB container). **100% success rate** across all 46 requests per target.
97
+
98
+ | Test | HF Spaces | Self-hosted |
99
+ |------|-----------|-------------|
100
+ | Sequential × 5 (mixed URLs) | 8.72 s total | 11.81 s total |
101
+ | Concurrent × 10 (mixed URLs) | 1.40 – 9.37 s | 1.29 – 13.45 s |
102
+ | Rapid-fire × 20 (sequential) | 0.885 s avg | 1.029 s avg |
103
+ | Peak container memory | — | 614 MiB / 1 GB |
104
+
105
+ Full results: [BENCHMARK.md](BENCHMARK.md)
106
+
107
+ ## License
108
+
109
+ MIT
@@ -0,0 +1,61 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "veilrender"
7
+ dynamic = ["version"]
8
+ description = "Headless browser rendering API — self-hostable on HF Spaces, Docker, or bare metal"
9
+ authors = [{ name = "Peng Ding" }]
10
+ readme = "README.md"
11
+ requires-python = ">=3.10"
12
+ license = "MIT"
13
+ classifiers = [
14
+ "Intended Audience :: Developers",
15
+ "Programming Language :: Python :: 3",
16
+ "Topic :: Internet :: WWW/HTTP",
17
+ ]
18
+
19
+ dependencies = [
20
+ "cloakbrowser>=0.3.0",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dev = [
25
+ "ruff>=0.11.0",
26
+ "ty>=0.0.1a0",
27
+ "pytest>=7.0.0",
28
+ "pytest-asyncio>=0.21.0",
29
+ ]
30
+
31
+ [project.scripts]
32
+ veilrender = "veilrender.app:main"
33
+
34
+ [project.urls]
35
+ Repository = "https://github.com/Oaklight/veilrender"
36
+ Issues = "https://github.com/Oaklight/veilrender/issues"
37
+
38
+ [tool.setuptools.packages.find]
39
+ where = ["src"]
40
+
41
+ [tool.setuptools.dynamic]
42
+ version = { attr = "veilrender.__version__" }
43
+
44
+ [tool.setuptools.package-data]
45
+ "veilrender" = ["py.typed"]
46
+
47
+ [tool.ruff]
48
+ target-version = "py310"
49
+
50
+ [tool.ruff.lint]
51
+ select = ["E", "F", "UP"]
52
+ ignore = ["UP007", "E501"]
53
+
54
+ [tool.ty.environment]
55
+ python-version = "3.10"
56
+
57
+ [tool.ty.src]
58
+ exclude = ["src/veilrender/_vendor/**"]
59
+
60
+ [tool.ty.rules]
61
+ unresolved-import = "ignore"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """VeilRender — headless browser rendering API."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,6 @@
1
+ """Entry point for ``python -m veilrender``."""
2
+
3
+ from veilrender.app import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,2 @@
1
+ # Vendored zerodep modules — do not edit manually.
2
+ # Update via: make vendor
@@ -0,0 +1,323 @@
1
+ #!/usr/bin/env python3
2
+ """Three-way readability benchmark: zerodep vs readability-lxml vs Mozilla JS.
3
+
4
+ Runs each implementation on Mozilla's test fixtures and prints a comparison
5
+ table. JS timing is measured internally by bench_mozilla.js (no subprocess
6
+ overhead in the numbers).
7
+
8
+ Usage:
9
+ python benchmark_compare.py # all fixtures
10
+ python benchmark_compare.py 001 bbc-1 # specific fixtures
11
+ python benchmark_compare.py --rounds 20 # more rounds
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import json
18
+ import os
19
+ import shutil
20
+ import subprocess
21
+ import sys
22
+ import timeit
23
+
24
+ # ── Setup paths ──────────────────────────────────────────────────────────────
25
+
26
+ _THIS_DIR = os.path.dirname(os.path.abspath(__file__))
27
+ _TEST_PAGES_DIR = os.path.join(_THIS_DIR, "test-pages")
28
+ _BENCH_JS = os.path.join(_THIS_DIR, "bench_mozilla.js")
29
+
30
+ sys.path.insert(0, _THIS_DIR)
31
+
32
+
33
+ # ── Discover fixtures ────────────────────────────────────────────────────────
34
+
35
+
36
+ def discover_fixtures() -> list[str]:
37
+ """Return sorted list of available fixture names."""
38
+ if not os.path.isdir(_TEST_PAGES_DIR):
39
+ return []
40
+ return sorted(
41
+ d
42
+ for d in os.listdir(_TEST_PAGES_DIR)
43
+ if os.path.isdir(os.path.join(_TEST_PAGES_DIR, d))
44
+ and os.path.isfile(os.path.join(_TEST_PAGES_DIR, d, "source.html"))
45
+ )
46
+
47
+
48
+ def load_source(name: str) -> str:
49
+ """Load source HTML for a fixture."""
50
+ path = os.path.join(_TEST_PAGES_DIR, name, "source.html")
51
+ with open(path, encoding="utf-8") as f:
52
+ return f.read()
53
+
54
+
55
+ # ── Python: zerodep readability ──────────────────────────────────────────────
56
+
57
+
58
+ def bench_zerodep(html: str, rounds: int) -> dict:
59
+ """Benchmark our readability.extract() and return timing dict."""
60
+ from readability import extract
61
+
62
+ # Warm-up.
63
+ result = extract(html)
64
+
65
+ times = []
66
+ for _ in range(rounds):
67
+ t0 = timeit.default_timer()
68
+ extract(html)
69
+ t1 = timeit.default_timer()
70
+ times.append((t1 - t0) * 1000) # ms
71
+
72
+ return {
73
+ "times_ms": times,
74
+ "min_ms": min(times),
75
+ "mean_ms": sum(times) / len(times),
76
+ "max_ms": max(times),
77
+ "title": result.title,
78
+ "length": result.length,
79
+ }
80
+
81
+
82
+ # ── Python: readability-lxml ────────────────────────────────────────────────
83
+
84
+
85
+ def _load_readability_lxml():
86
+ """Load readability-lxml's Document class, working around name clash."""
87
+ import importlib
88
+ import importlib.metadata
89
+
90
+ try:
91
+ importlib.metadata.version("readability-lxml")
92
+ except importlib.metadata.PackageNotFoundError:
93
+ return None
94
+
95
+ saved_path = sys.path[:]
96
+ saved_modules = {
97
+ k: sys.modules.pop(k)
98
+ for k in list(sys.modules)
99
+ if k == "readability" or k.startswith("readability.")
100
+ }
101
+ try:
102
+ sys.path = [
103
+ p for p in sys.path if os.path.abspath(p) != os.path.abspath(_THIS_DIR)
104
+ ]
105
+ mod = importlib.import_module("readability")
106
+ return mod.Document
107
+ finally:
108
+ sys.path = saved_path
109
+ for k in list(sys.modules):
110
+ if k == "readability" or k.startswith("readability."):
111
+ del sys.modules[k]
112
+ sys.modules.update(saved_modules)
113
+
114
+
115
+ _RefDocument = _load_readability_lxml()
116
+
117
+
118
+ def bench_readability_lxml(html: str, rounds: int) -> dict | None:
119
+ """Benchmark readability-lxml and return timing dict, or None."""
120
+ if _RefDocument is None:
121
+ return None
122
+
123
+ # Warm-up.
124
+ doc = _RefDocument(html)
125
+ summary = doc.summary()
126
+
127
+ times = []
128
+ for _ in range(rounds):
129
+ t0 = timeit.default_timer()
130
+ doc = _RefDocument(html)
131
+ doc.summary()
132
+ t1 = timeit.default_timer()
133
+ times.append((t1 - t0) * 1000)
134
+
135
+ # Extract title from summary HTML (basic).
136
+ title = doc.short_title() if hasattr(doc, "short_title") else ""
137
+ length = len(summary) if summary else 0
138
+
139
+ return {
140
+ "times_ms": times,
141
+ "min_ms": min(times),
142
+ "mean_ms": sum(times) / len(times),
143
+ "max_ms": max(times),
144
+ "title": title,
145
+ "length": length,
146
+ }
147
+
148
+
149
+ # ── JavaScript: Mozilla Readability.js ───────────────────────────────────────
150
+
151
+
152
+ def bench_mozilla_js(fixture_name: str, rounds: int) -> dict | None:
153
+ """Benchmark Mozilla Readability.js via Node.js subprocess.
154
+
155
+ Timing is measured internally by bench_mozilla.js — no subprocess
156
+ overhead in the reported numbers.
157
+ """
158
+ if not shutil.which("node"):
159
+ return None
160
+ if not os.path.isfile(_BENCH_JS):
161
+ return None
162
+
163
+ source_path = os.path.join(_TEST_PAGES_DIR, fixture_name, "source.html")
164
+ try:
165
+ result = subprocess.run(
166
+ ["node", _BENCH_JS, source_path, str(rounds)],
167
+ capture_output=True,
168
+ text=True,
169
+ timeout=120,
170
+ cwd=_THIS_DIR,
171
+ )
172
+ if result.returncode != 0:
173
+ print(f" [JS error: {result.stderr.strip()[:100]}]", file=sys.stderr)
174
+ return None
175
+ return json.loads(result.stdout)
176
+ except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
177
+ return None
178
+
179
+
180
+ # ── Output formatting ────────────────────────────────────────────────────────
181
+
182
+ # ANSI colors (disabled if not a terminal).
183
+ if sys.stdout.isatty():
184
+ _BOLD = "\033[1m"
185
+ _GREEN = "\033[32m"
186
+ _YELLOW = "\033[33m"
187
+ _CYAN = "\033[36m"
188
+ _RESET = "\033[0m"
189
+ _DIM = "\033[2m"
190
+ else:
191
+ _BOLD = _GREEN = _YELLOW = _CYAN = _RESET = _DIM = ""
192
+
193
+
194
+ def _fmt_ms(ms: float) -> str:
195
+ """Format milliseconds with appropriate unit."""
196
+ if ms < 1:
197
+ return f"{ms * 1000:.0f} µs"
198
+ if ms < 1000:
199
+ return f"{ms:.1f} ms"
200
+ return f"{ms / 1000:.2f} s"
201
+
202
+
203
+ def _ratio_str(ms: float, baseline: float) -> str:
204
+ """Format a ratio relative to baseline."""
205
+ if baseline <= 0:
206
+ return ""
207
+ ratio = ms / baseline
208
+ if ratio < 1.05:
209
+ return f"{_GREEN}1.00x{_RESET}"
210
+ return f"{_YELLOW}{ratio:.2f}x{_RESET}"
211
+
212
+
213
+ def print_results(
214
+ fixture_name: str,
215
+ html_size: int,
216
+ zd: dict,
217
+ lxml: dict | None,
218
+ js: dict | None,
219
+ ) -> None:
220
+ """Print a single fixture's results as a formatted row."""
221
+ baseline = zd["mean_ms"]
222
+
223
+ cols = [
224
+ f" {_BOLD}{fixture_name:<28s}{_RESET}",
225
+ f"{_DIM}{html_size / 1024:>7.1f} KB{_RESET}",
226
+ f"{_CYAN}zerodep{_RESET} {_fmt_ms(zd['mean_ms']):>10s}"
227
+ f" {_ratio_str(zd['mean_ms'], baseline)}",
228
+ ]
229
+
230
+ if lxml is not None:
231
+ cols.append(
232
+ f"{_CYAN}lxml{_RESET} {_fmt_ms(lxml['mean_ms']):>10s}"
233
+ f" {_ratio_str(lxml['mean_ms'], baseline)}"
234
+ )
235
+ else:
236
+ cols.append(f"{_DIM}lxml {'n/a':>10s}{_RESET}")
237
+
238
+ if js is not None:
239
+ cols.append(
240
+ f"{_CYAN}mozilla{_RESET} {_fmt_ms(js['mean_ms']):>10s}"
241
+ f" {_ratio_str(js['mean_ms'], baseline)}"
242
+ )
243
+ else:
244
+ cols.append(f"{_DIM}mozilla {'n/a':>10s}{_RESET}")
245
+
246
+ print(" ".join(cols))
247
+
248
+
249
+ # ── Main ─────────────────────────────────────────────────────────────────────
250
+
251
+
252
+ def main() -> None:
253
+ parser = argparse.ArgumentParser(
254
+ description="Three-way readability benchmark comparison."
255
+ )
256
+ parser.add_argument(
257
+ "fixtures",
258
+ nargs="*",
259
+ help="Fixture names to benchmark (default: all).",
260
+ )
261
+ parser.add_argument(
262
+ "--rounds",
263
+ type=int,
264
+ default=10,
265
+ help="Number of timing rounds per fixture (default: 10).",
266
+ )
267
+ args = parser.parse_args()
268
+
269
+ all_fixtures = discover_fixtures()
270
+ if not all_fixtures:
271
+ print("No test fixtures found in test-pages/", file=sys.stderr)
272
+ sys.exit(1)
273
+
274
+ fixtures = args.fixtures if args.fixtures else all_fixtures
275
+ # Validate fixture names.
276
+ for name in fixtures:
277
+ if name not in all_fixtures:
278
+ print(f"Unknown fixture: {name}", file=sys.stderr)
279
+ print(f"Available: {', '.join(all_fixtures)}", file=sys.stderr)
280
+ sys.exit(1)
281
+
282
+ rounds = args.rounds
283
+
284
+ # Header.
285
+ print()
286
+ print(f"{_BOLD}Readability Benchmark ({rounds} rounds per fixture){_RESET}")
287
+ has_node = shutil.which("node") is not None
288
+ has_lxml = _RefDocument is not None
289
+ status = []
290
+ status.append(f"zerodep: {_GREEN}yes{_RESET}")
291
+ lxml_status = _GREEN + "yes" + _RESET if has_lxml else _DIM + "no" + _RESET
292
+ status.append(f"readability-lxml: {lxml_status}")
293
+ status.append(
294
+ f"mozilla js: {_GREEN + 'yes' + _RESET if has_node else _DIM + 'no' + _RESET}"
295
+ )
296
+ print(f" Implementations: {' | '.join(status)}")
297
+ print(f" {_DIM}Times shown are mean. Ratios relative to zerodep.{_RESET}")
298
+ print()
299
+
300
+ # Column headers.
301
+ print(
302
+ f" {'Fixture':<28s} {'Size':>9s} "
303
+ f"{'zerodep':>19s} {'readability-lxml':>19s} "
304
+ f"{'mozilla js':>19s}"
305
+ )
306
+ print(" " + "─" * 110)
307
+
308
+ for name in fixtures:
309
+ html = load_source(name)
310
+ html_size = len(html.encode("utf-8"))
311
+
312
+ # Benchmark all three.
313
+ zd = bench_zerodep(html, rounds)
314
+ lxml = bench_readability_lxml(html, rounds)
315
+ js = bench_mozilla_js(name, rounds)
316
+
317
+ print_results(name, html_size, zd, lxml, js)
318
+
319
+ print()
320
+
321
+
322
+ if __name__ == "__main__":
323
+ main()