stealthfetch 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. stealthfetch-0.2.0/.github/workflows/ci.yml +47 -0
  2. stealthfetch-0.2.0/.github/workflows/publish.yml +34 -0
  3. stealthfetch-0.2.0/.gitignore +37 -0
  4. stealthfetch-0.2.0/CHANGELOG.md +24 -0
  5. stealthfetch-0.2.0/CLAUDE.md +51 -0
  6. stealthfetch-0.2.0/LICENSE +21 -0
  7. stealthfetch-0.2.0/PKG-INFO +257 -0
  8. stealthfetch-0.2.0/README.md +215 -0
  9. stealthfetch-0.2.0/examples/async_usage.py +23 -0
  10. stealthfetch-0.2.0/examples/basic_usage.py +8 -0
  11. stealthfetch-0.2.0/examples/browser_mode.py +21 -0
  12. stealthfetch-0.2.0/examples/mcp_config.json +7 -0
  13. stealthfetch-0.2.0/pyproject.toml +94 -0
  14. stealthfetch-0.2.0/skill/SKILL.md +69 -0
  15. stealthfetch-0.2.0/skill/reference.md +95 -0
  16. stealthfetch-0.2.0/src/stealthfetch/__init__.py +32 -0
  17. stealthfetch-0.2.0/src/stealthfetch/_browsers/__init__.py +75 -0
  18. stealthfetch-0.2.0/src/stealthfetch/_browsers/_camoufox.py +73 -0
  19. stealthfetch-0.2.0/src/stealthfetch/_browsers/_constants.py +16 -0
  20. stealthfetch-0.2.0/src/stealthfetch/_browsers/_patchright.py +67 -0
  21. stealthfetch-0.2.0/src/stealthfetch/_compat.py +50 -0
  22. stealthfetch-0.2.0/src/stealthfetch/_core.py +482 -0
  23. stealthfetch-0.2.0/src/stealthfetch/_detect.py +90 -0
  24. stealthfetch-0.2.0/src/stealthfetch/_errors.py +134 -0
  25. stealthfetch-0.2.0/src/stealthfetch/cli.py +140 -0
  26. stealthfetch-0.2.0/src/stealthfetch/mcp_server.py +112 -0
  27. stealthfetch-0.2.0/src/stealthfetch/py.typed +0 -0
  28. stealthfetch-0.2.0/tests/conftest.py +34 -0
  29. stealthfetch-0.2.0/tests/fixtures/article.html +20 -0
  30. stealthfetch-0.2.0/tests/fixtures/captcha.html +13 -0
  31. stealthfetch-0.2.0/tests/fixtures/cloudflare_block.html +12 -0
  32. stealthfetch-0.2.0/tests/fixtures/reddit_challenge.html +221 -0
  33. stealthfetch-0.2.0/tests/fixtures/tables.html +23 -0
  34. stealthfetch-0.2.0/tests/integration/conftest.py +24 -0
  35. stealthfetch-0.2.0/tests/integration/test_live.py +32 -0
  36. stealthfetch-0.2.0/tests/test_browsers.py +153 -0
  37. stealthfetch-0.2.0/tests/test_cli.py +119 -0
  38. stealthfetch-0.2.0/tests/test_compat.py +36 -0
  39. stealthfetch-0.2.0/tests/test_convert.py +27 -0
  40. stealthfetch-0.2.0/tests/test_core.py +373 -0
  41. stealthfetch-0.2.0/tests/test_detect.py +126 -0
  42. stealthfetch-0.2.0/tests/test_extract.py +71 -0
  43. stealthfetch-0.2.0/tests/test_mcp.py +197 -0
  44. stealthfetch-0.2.0/tests/test_validation.py +144 -0
@@ -0,0 +1,47 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+ cache: pip
18
+ - run: pip install -e ".[dev]"
19
+ - run: ruff check src/ tests/
20
+ - run: mypy src/
21
+
22
+ test:
23
+ runs-on: ${{ matrix.os }}
24
+ strategy:
25
+ matrix:
26
+ os: [ubuntu-latest, macos-latest]
27
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+ - uses: actions/setup-python@v5
31
+ with:
32
+ python-version: ${{ matrix.python-version }}
33
+ cache: pip
34
+ - run: pip install -e ".[dev]"
35
+ - run: pytest tests/ -v
36
+
37
+ integration:
38
+ if: github.event_name == 'push'
39
+ runs-on: ubuntu-latest
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+ - uses: actions/setup-python@v5
43
+ with:
44
+ python-version: "3.12"
45
+ cache: pip
46
+ - run: pip install -e ".[dev]"
47
+ - run: pytest tests/integration/ --run-integration -v
@@ -0,0 +1,34 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ test:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+ - run: pip install -e ".[dev]"
19
+ - run: ruff check src/ tests/
20
+ - run: mypy src/
21
+ - run: pytest tests/ -v
22
+
23
+ publish:
24
+ needs: test
25
+ runs-on: ubuntu-latest
26
+ environment: pypi
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+ - uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.12"
32
+ - run: pip install build
33
+ - run: python -m build
34
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,37 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ *.egg
6
+ dist/
7
+ build/
8
+
9
+ # Type checkers / linters
10
+ .mypy_cache/
11
+ .ruff_cache/
12
+
13
+ # Testing
14
+ .pytest_cache/
15
+ htmlcov/
16
+ .coverage
17
+ coverage.xml
18
+
19
+ # Environments
20
+ .env
21
+ .venv/
22
+ venv/
23
+
24
+ # IDE
25
+ .idea/
26
+ .vscode/
27
+ *.swp
28
+ *.swo
29
+ *~
30
+
31
+ # OS
32
+ .DS_Store
33
+ Thumbs.db
34
+
35
+ # Project
36
+ sacredtexts.md
37
+ .claude/
@@ -0,0 +1,24 @@
1
+ # Changelog
2
+
3
+ ## 0.2.0 (2026-02-24)
4
+
5
+ - Add `fetch_result()` / `afetch_result()` — same pipeline as `fetch_markdown`, returns `FetchResult` dataclass with `markdown` + metadata fields (`title`, `author`, `date`, `description`, `url`, `hostname`, `sitename`) extracted as a free side-effect of trafilatura parsing
6
+ - Add `FetchResult` dataclass, exported from the top-level package
7
+ - MCP server: add `include_metadata` parameter to `fetch_markdown` tool — when `True`, returns JSON with markdown and metadata instead of plain string
8
+
9
+ ## 0.1.0 (2026-02-24)
10
+
11
+ Initial release.
12
+
13
+ - 3-layer pipeline: fetch (curl_cffi) → extract (trafilatura) → convert (html-to-markdown)
14
+ - Auto-escalation from HTTP to stealth browser on block detection
15
+ - Browser backends: Camoufox (default) and Patchright (fallback)
16
+ - Block detection: HTTP status codes, content-type awareness, pattern matching (Cloudflare, DataDome, PerimeterX, Akamai)
17
+ - SSRF protection: rejects private IPs, non-http(s) schemes, DNS rebinding, redirect-chain exploits
18
+ - CLI: `stealthfetch <url>` with proxy, timeout, headers, and output options
19
+ - MCP server: `stealthfetch-mcp` with full parameter support
20
+ - Async support: `afetch_markdown()`
21
+ - Proxy support with optional authentication
22
+ - Custom HTTP headers
23
+ - Response size limit (50 MB)
24
+ - Strict type hints (mypy strict) and full linting (ruff)
@@ -0,0 +1,51 @@
1
+ # StealthFetch
2
+
3
+ URL in, LLM-ready markdown out. Orchestration layer over curl_cffi, trafilatura, html-to-markdown, Camoufox, and Patchright.
4
+
5
+ ## Architecture
6
+
7
+ Three-layer pipeline in `src/stealthfetch/_core.py`: **fetch → extract → convert**.
8
+
9
+ - **Fetch** — HTTP via curl_cffi with Chrome TLS fingerprint. Auto-escalates to stealth browser on block detection.
10
+ - **Extract** — trafilatura strips nav, ads, boilerplate. Returns clean HTML.
11
+ - **Convert** — html-to-markdown (Rust) produces final markdown.
12
+
13
+ Key modules:
14
+ - `_core.py` — pipeline + public API (`fetch_markdown`, `afetch_markdown`, `fetch_result`, `afetch_result`, `FetchResult`)
15
+ - `_detect.py` — block detection heuristics. Strong patterns (vendor-specific, always checked) vs weak patterns (generic, checked only on small pages <15k chars)
16
+ - `_errors.py` — exception hierarchy + SSRF URL/proxy validation (pre- and post-redirect)
17
+ - `_compat.py` — feature detection for optional browser deps (non-cached, allows mid-process install)
18
+ - `_browsers/` — browser backend abstraction. Dispatcher resolves "auto" → camoufox (preferred) or patchright
19
+ - `cli.py` — CLI entry point
20
+ - `mcp_server.py` — MCP server entry point (FastMCP, single `fetch_markdown` tool)
21
+
22
+ ## Public API
23
+
24
+ 4 functions: `fetch_markdown`, `afetch_markdown`, `fetch_result`, `afetch_result`
25
+ 1 dataclass: `FetchResult` (markdown, title, author, date, description, url, hostname, sitename)
26
+ 3 exceptions: `FetchError`, `ExtractionError`, `BrowserNotAvailable` (all inherit `StealthFetchError`)
27
+
28
+ ## Conventions
29
+
30
+ - Strict mypy (`--strict` equivalent via pyproject.toml)
31
+ - Ruff linting: E, F, W, I, UP, B, SIM, C4, RUF, PERF, LOG
32
+ - Lazy imports for optional deps (browser backends, mcp) — keep startup fast
33
+ - `_` prefix for all private modules
34
+ - Async variants use `a` prefix (`afetch_markdown`)
35
+ - CPU-bound work runs off the event loop via `asyncio.to_thread` in async paths
36
+
37
+ ## Commands
38
+
39
+ ```bash
40
+ pytest # unit tests (156 tests)
41
+ pytest --run-integration # + live HTTP tests
42
+ ruff check src/ tests/ # lint
43
+ mypy src/ # type check
44
+ ```
45
+
46
+ ## Design Decisions
47
+
48
+ - HTTP-first, browser-only-when-needed — browsers are slow and detectable
49
+ - Strong vs weak pattern split in `_detect.py` prevents false-positive escalation on large articles
50
+ - SSRF validated twice: before request (literal IP + DNS resolution) and after redirects
51
+ - `FetchResult` metadata comes free from trafilatura's existing parse — no extra HTTP calls
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 leba01
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,257 @@
1
+ Metadata-Version: 2.4
2
+ Name: stealthfetch
3
+ Version: 0.2.0
4
+ Summary: URL in, LLM-ready markdown out. Stealth fetch with anti-bot bypass.
5
+ Project-URL: Homepage, https://github.com/leba01/stealthfetch
6
+ Project-URL: Repository, https://github.com/leba01/stealthfetch
7
+ Project-URL: Issues, https://github.com/leba01/stealthfetch/issues
8
+ Author: leba01
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: anti-bot,llm,markdown,scraping,stealth
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Internet :: WWW/HTTP
21
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: curl-cffi>=0.14.0
25
+ Requires-Dist: html-to-markdown>=2.25.0
26
+ Requires-Dist: trafilatura>=1.8.0
27
+ Provides-Extra: browser
28
+ Requires-Dist: camoufox[geoip]>=0.4.11; extra == 'browser'
29
+ Requires-Dist: patchright>=1.50; extra == 'browser'
30
+ Provides-Extra: camoufox
31
+ Requires-Dist: camoufox[geoip]>=0.4.11; extra == 'camoufox'
32
+ Provides-Extra: dev
33
+ Requires-Dist: mypy>=1.13; extra == 'dev'
34
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
35
+ Requires-Dist: pytest>=8.0; extra == 'dev'
36
+ Requires-Dist: ruff>=0.9.0; extra == 'dev'
37
+ Provides-Extra: mcp
38
+ Requires-Dist: mcp>=1.26.0; extra == 'mcp'
39
+ Provides-Extra: patchright
40
+ Requires-Dist: patchright>=1.50; extra == 'patchright'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # StealthFetch
44
+
45
+ [![CI](https://github.com/leba01/stealthfetch/actions/workflows/ci.yml/badge.svg)](https://github.com/leba01/stealthfetch/actions/workflows/ci.yml)
46
+ [![PyPI](https://img.shields.io/pypi/v/stealthfetch)](https://pypi.org/project/stealthfetch/)
47
+ [![Python](https://img.shields.io/pypi/pyversions/stealthfetch)](https://pypi.org/project/stealthfetch/)
48
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
49
+
50
+ URL in, LLM-ready markdown out.
51
+
52
+ ```python
53
+ from stealthfetch import fetch_markdown
54
+
55
+ md = fetch_markdown("https://en.wikipedia.org/wiki/Web_scraping")
56
+ ```
57
+
58
+ Fetches any web page, strips nav, ads, and boilerplate, returns clean markdown. If the site blocks you, it auto-escalates to a stealth browser. One function, no config.
59
+
60
+ StealthFetch doesn't reinvent the hard parts: [curl_cffi](https://github.com/lexiforest/curl_cffi), [trafilatura](https://github.com/adbar/trafilatura), [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown), [Camoufox](https://github.com/daijro/camoufox), and [Patchright](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright) do the heavy lifting. StealthFetch is the orchestration layer: wiring them together, detecting blocks, deciding when to escalate, and handling the security concerns most tools skip.
61
+
62
+ ## How It Works
63
+
64
+ ```
65
+ URL
66
+
67
+
68
+ ┌───────────────────────────────────────────┐
69
+ │ FETCH curl_cffi │
70
+ │ Chrome TLS fingerprint │
71
+ │ ↓ blocked? │
72
+ │ auto-escalate to stealth │
73
+ │ browser (Camoufox / │
74
+ │ Patchright) │
75
+ └─────────────────┬─────────────────────────┘
76
+
77
+ ┌─────────────────▼─────────────────────────┐
78
+ │ EXTRACT trafilatura │
79
+ │ strips nav, ads, │
80
+ │ boilerplate │
81
+ └─────────────────┬─────────────────────────┘
82
+
83
+ ┌─────────────────▼─────────────────────────┐
84
+ │ CONVERT html-to-markdown (Rust) │
85
+ └─────────────────┬─────────────────────────┘
86
+
87
+
88
+ markdown
89
+ ```
90
+
91
+ Each layer is one library call. The libraries do the hard work.
92
+
93
+ ## What StealthFetch Owns
94
+
95
+ ### Block Detection
96
+
97
+ Most anti-bot systems give themselves away before you ever see a captcha. StealthFetch uses status codes (403, 429, 503) as a fast first pass, then pattern-matches HTML signatures from Cloudflare, DataDome, PerimeterX, and Akamai. The trick is knowing when *not* to check: vendor-specific signatures (like `_cf_chl_opt` or `perimeterx`) are always checked because they never appear in real content. Generic phrases like "just a moment" or "access denied" are only checked on small pages (< 15k chars) since on a real article those strings are just words.
98
+
99
+ ### Auto-Escalation
100
+
101
+ Headless browsers are slow, heavy, and detectable in their own right. An HTTP request with a Chrome TLS fingerprint (via curl_cffi) gets through most sites just fine. So StealthFetch tries HTTP first always. It only spins up a stealth browser when the response actually looks blocked. The interesting part isn't the browser itself, it's the decision of *when* to use it.
102
+
103
+ ### SSRF Protection
104
+
105
+ Most scraping tools — [including ones with 60-85k GitHub stars](https://www.bluerock.io/post/mcp-furi-microsoft-markitdown-vulnerabilities) — trust whatever URL you hand them. StealthFetch doesn't. A hostname that resolves to `127.0.0.1`? Rejected. A redirect chain that bounces through three domains and lands on a private IP? Caught. IPv6-mapped IPv4 bypasses, link-local addresses are all validated before the request goes out, and again after redirects resolve.
106
+
107
+ ## Works On
108
+
109
+ Most sites return clean markdown in **under a second**. Sites that fight back (Reddit, Amazon) get auto-escalated to a stealth browser — takes **5–8 seconds** but you don't have to think about it.
110
+
111
+ | Site | What You Get |
112
+ |------|-------------|
113
+ | Wikipedia, Reuters, BBC News, TechCrunch | Articles and news — straight through |
114
+ | Hacker News | Threads and comments |
115
+ | Stack Overflow | Q&A with code blocks |
116
+ | Medium | Articles — Cloudflare-protected, but no false-positive escalation (passive JS, not a block page) |
117
+ | Reddit | Blocked by challenge page → auto-escalates to browser |
118
+ | Amazon | Blocked by CAPTCHA → auto-escalates to browser |
119
+
120
+ ## Install
121
+
122
+ Try it — no install needed (requires [uv](https://docs.astral.sh/uv/getting-started/installation/)):
123
+
124
+ ```bash
125
+ uvx stealthfetch https://en.wikipedia.org/wiki/Web_scraping
126
+ ```
127
+
128
+ Install as a library:
129
+
130
+ ```bash
131
+ pip install stealthfetch
132
+ ```
133
+
134
+ > **Note:** trafilatura brings ~20 transitive dependencies (lxml, charset-normalizer, etc.). Total install is ~50 packages.
135
+
136
+ Add stealth browser support (necessary for escalation logic):
137
+
138
+ ```bash
139
+ pip install "stealthfetch[browser]"
140
+ camoufox fetch
141
+ ```
142
+
143
+ ## CLI
144
+
145
+ ```bash
146
+ stealthfetch https://en.wikipedia.org/wiki/Web_scraping
147
+ stealthfetch https://spa-app.com -m browser
148
+ stealthfetch https://example.com --no-links --no-tables
149
+ stealthfetch https://example.com --header "Cookie: session=abc"
150
+ ```
151
+
152
+ ## MCP Server
153
+
154
+ StealthFetch is an [MCP](https://modelcontextprotocol.io/) server — any MCP client (Claude Desktop, Claude Code, Cursor, etc.) can call it as a tool to fetch web pages as markdown.
155
+
156
+ No install needed — add this to your MCP client config:
157
+
158
+ ```json
159
+ {
160
+ "mcpServers": {
161
+ "stealthfetch": {
162
+ "command": "uvx",
163
+ "args": ["--from", "stealthfetch[mcp]", "stealthfetch-mcp"]
164
+ }
165
+ }
166
+ }
167
+ ```
168
+
169
+ Or if you prefer a persistent install:
170
+
171
+ ```bash
172
+ pip install "stealthfetch[mcp]"
173
+ ```
174
+
175
+ ```json
176
+ {
177
+ "mcpServers": {
178
+ "stealthfetch": {
179
+ "command": "stealthfetch-mcp"
180
+ }
181
+ }
182
+ }
183
+ ```
184
+
185
+ ## API
186
+
187
+ ### `fetch_markdown(url, **kwargs) -> str`
188
+
189
+ Also available as `afetch_markdown` — same signature, async. Extraction and conversion run off the event loop via `asyncio.to_thread`.
190
+
191
+ | Parameter | Type | Default | Description |
192
+ |-----------|------|---------|-------------|
193
+ | `url` | `str` | required | URL to fetch |
194
+ | `method` | `str` | `"auto"` | `"auto"`, `"http"`, or `"browser"` |
195
+ | `browser_backend` | `str` | `"auto"` | `"auto"`, `"camoufox"`, or `"patchright"` |
196
+ | `include_links` | `bool` | `True` | Preserve hyperlinks |
197
+ | `include_images` | `bool` | `False` | Preserve image references |
198
+ | `include_tables` | `bool` | `True` | Preserve tables |
199
+ | `timeout` | `int` | `30` | Timeout in seconds |
200
+ | `proxy` | `dict` | `None` | `{"server": "...", "username": "...", "password": "..."}` |
201
+ | `headers` | `dict` | `None` | Additional HTTP headers |
202
+
203
+
204
+ ### `fetch_result(url, **kwargs) -> FetchResult`
205
+
206
+ Same fetch/extract/convert pipeline as `fetch_markdown`, but returns a structured dataclass with the markdown **and** page metadata extracted as a free side-effect of parsing.
207
+
208
+ ```python
209
+ from stealthfetch import fetch_result
210
+
211
+ r = fetch_result("https://en.wikipedia.org/wiki/Web_scraping", method="http")
212
+ print(r.title) # "Web scraping"
213
+ print(r.author) # "Wikipedia contributors" (when available)
214
+ print(r.date) # ISO 8601 date (when available)
215
+ print(r.markdown[:200])
216
+ ```
217
+
218
+ `FetchResult` fields:
219
+
220
+ | Field | Type | Description |
221
+ |-------|------|-------------|
222
+ | `markdown` | `str` | Cleaned markdown content |
223
+ | `title` | `str \| None` | Page title |
224
+ | `author` | `str \| None` | Author name |
225
+ | `date` | `str \| None` | Publication date (ISO 8601 when available) |
226
+ | `description` | `str \| None` | Meta description |
227
+ | `url` | `str \| None` | Canonical URL (may differ from input) |
228
+ | `hostname` | `str \| None` | Hostname |
229
+ | `sitename` | `str \| None` | Publisher name |
230
+
231
+ To get a plain dict: `dataclasses.asdict(result)`.
232
+
233
+ `afetch_result` has the same signature, async.
234
+
235
+ ## Optional Dependencies
236
+
237
+ | Extra | What it adds |
238
+ |-------|-------------|
239
+ | `stealthfetch[camoufox]` | Camoufox stealth Firefox |
240
+ | `stealthfetch[patchright]` | Patchright stealth Chromium |
241
+ | `stealthfetch[browser]` | Both |
242
+ | `stealthfetch[mcp]` | MCP server |
243
+
244
+ Python 3.10+. Tested on 3.10–3.13, Linux and macOS.
245
+
246
+ ## Roadmap
247
+
248
+ Things that would make sense if this gets traction:
249
+
250
+ - **Homebrew tap** — `brew install stealthfetch` for people who don't want to think about Python
251
+ - **Docker image** — bundle browser backends pre-installed, no `camoufox fetch` step, plays well with [Docker's MCP Catalog](https://docs.docker.com/ai/mcp-catalog-and-toolkit/)
252
+
253
+ Contributions welcome.
254
+
255
+ ## License
256
+
257
+ MIT