toapi 2.2.0__tar.gz → 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. toapi-2.2.1/.claude/settings.local.json +9 -0
  2. {toapi-2.2.0 → toapi-2.2.1}/.gitignore +1 -0
  3. toapi-2.2.1/PKG-INFO +145 -0
  4. toapi-2.2.1/README.md +124 -0
  5. toapi-2.2.1/docs/about/contributing.md +51 -0
  6. toapi-2.2.1/docs/about/installation.md +49 -0
  7. toapi-2.2.1/docs/about/license.md +27 -0
  8. toapi-2.2.1/docs/about/release-notes.md +39 -0
  9. toapi-2.2.1/docs/index.md +49 -0
  10. toapi-2.2.1/docs/quickstart.md +84 -0
  11. toapi-2.2.1/docs/topics/api.md +83 -0
  12. toapi-2.2.1/docs/topics/item.md +71 -0
  13. toapi-2.2.1/docs/topics/selector.md +45 -0
  14. toapi-2.2.1/mkdocs.yml +61 -0
  15. {toapi-2.2.0 → toapi-2.2.1}/pyproject.toml +1 -1
  16. {toapi-2.2.0 → toapi-2.2.1}/toapi/api.py +6 -6
  17. toapi-2.2.1/toapi/item.py +36 -0
  18. toapi-2.2.1/toapi/log.py +25 -0
  19. {toapi-2.2.0 → toapi-2.2.1}/uv.lock +1 -1
  20. toapi-2.2.0/.omc/project-memory.json +0 -174
  21. toapi-2.2.0/.omc/state/agent-replay-86181004-c476-471f-90d2-1c64e40fb749.jsonl +0 -4
  22. toapi-2.2.0/.omc/state/hud-stdin-cache.json +0 -1
  23. toapi-2.2.0/.omc/state/idle-notif-cooldown.json +0 -5
  24. toapi-2.2.0/.omc/state/last-tool-error.json +0 -7
  25. toapi-2.2.0/.omc/state/mission-state.json +0 -79
  26. toapi-2.2.0/.omc/state/sessions/86181004-c476-471f-90d2-1c64e40fb749/hud-state.json +0 -6
  27. toapi-2.2.0/.omc/state/subagent-tracking.json +0 -26
  28. toapi-2.2.0/PKG-INFO +0 -117
  29. toapi-2.2.0/README.md +0 -96
  30. toapi-2.2.0/docs/about/contributing.md +0 -62
  31. toapi-2.2.0/docs/about/installation.md +0 -86
  32. toapi-2.2.0/docs/about/license.md +0 -13
  33. toapi-2.2.0/docs/about/release-notes.md +0 -32
  34. toapi-2.2.0/docs/articles/index.md +0 -0
  35. toapi-2.2.0/docs/articles/release.md +0 -96
  36. toapi-2.2.0/docs/index.md +0 -227
  37. toapi-2.2.0/docs/quickstart.md +0 -0
  38. toapi-2.2.0/docs/topics/api.md +0 -90
  39. toapi-2.2.0/docs/topics/cache.md +0 -136
  40. toapi-2.2.0/docs/topics/item.md +0 -45
  41. toapi-2.2.0/docs/topics/selector.md +0 -57
  42. toapi-2.2.0/docs/topics/settings.md +0 -48
  43. toapi-2.2.0/docs/topics/storage.md +0 -67
  44. toapi-2.2.0/docs/tutorials/introducing.md +0 -28
  45. toapi-2.2.0/docs/tutorials/step0-creating-new-project.md +0 -41
  46. toapi-2.2.0/docs/tutorials/step1-global-settings.md +0 -39
  47. toapi-2.2.0/docs/tutorials/step2-redis.md +0 -48
  48. toapi-2.2.0/docs/tutorials/step3-sqlite3.md +0 -50
  49. toapi-2.2.0/docs/tutorials/step4-defining-items.md +0 -61
  50. toapi-2.2.0/docs/tutorials/step5-deploy.md +0 -95
  51. toapi-2.2.0/mkdocs.yml +0 -56
  52. toapi-2.2.0/toapi/item.py +0 -42
  53. toapi-2.2.0/toapi/log.py +0 -34
  54. {toapi-2.2.0 → toapi-2.2.1}/.github/workflows/ci.yml +0 -0
  55. {toapi-2.2.0 → toapi-2.2.1}/.pre-commit-config.yaml +0 -0
  56. {toapi-2.2.0 → toapi-2.2.1}/LICENSE +0 -0
  57. {toapi-2.2.0 → toapi-2.2.1}/docs/CNAME +0 -0
  58. {toapi-2.2.0 → toapi-2.2.1}/docs/diagram.png +0 -0
  59. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/introducing-1.png +0 -0
  60. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/introducing-2.png +0 -0
  61. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/introducing-3.png +0 -0
  62. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/introducing-4.png +0 -0
  63. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/runinglog.png +0 -0
  64. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/runningitems.png +0 -0
  65. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/runningresult.png +0 -0
  66. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/runningstatus.png +0 -0
  67. {toapi-2.2.0 → toapi-2.2.1}/docs/imgs/step-0-1.png +0 -0
  68. {toapi-2.2.0 → toapi-2.2.1}/docs/logo.png +0 -0
  69. {toapi-2.2.0 → toapi-2.2.1}/examples/click/app.py +0 -0
  70. {toapi-2.2.0 → toapi-2.2.1}/examples/click/static/main.js +0 -0
  71. {toapi-2.2.0 → toapi-2.2.1}/examples/click/templates/index.html +0 -0
  72. {toapi-2.2.0 → toapi-2.2.1}/examples/hackernews_page.py +0 -0
  73. {toapi-2.2.0 → toapi-2.2.1}/tests/test_toapi.py +0 -0
  74. {toapi-2.2.0 → toapi-2.2.1}/toapi/__init__.py +0 -0
  75. {toapi-2.2.0 → toapi-2.2.1}/toapi/cli.py +0 -0
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv run *)",
5
+ "Bash(uv sync *)",
6
+ "Bash(uv build *)"
7
+ ]
8
+ }
9
+ }
@@ -1,4 +1,5 @@
1
1
  .idea/
2
+ .omc/
2
3
  # Byte-compiled / optimized / DLL files
3
4
  __pycache__/
4
5
  *.py[cod]
toapi-2.2.1/PKG-INFO ADDED
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: toapi
3
+ Version: 2.2.1
4
+ Summary: Every web site provides APIs.
5
+ Project-URL: homepage, https://github.com/gaojiuli/toapi
6
+ Project-URL: repository, https://github.com/gaojiuli/toapi
7
+ Project-URL: documentation, https://gaojiuli.github.io/toapi/
8
+ Author-email: Elliot Gao <gaojiuli@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Requires-Python: >=3.10
12
+ Requires-Dist: charset-normalizer>=3.3
13
+ Requires-Dist: click>=8.1
14
+ Requires-Dist: colorama>=0.4.6
15
+ Requires-Dist: cssselect>=1.2
16
+ Requires-Dist: flask>=3.0
17
+ Requires-Dist: htmlfetcher>=0.0.6
18
+ Requires-Dist: htmlparsing>=0.1.5
19
+ Requires-Dist: requests>=2.32
20
+ Description-Content-Type: text/markdown
21
+
22
+ # toapi
23
+
24
+ [![CI](https://github.com/elliotgao2/toapi/actions/workflows/ci.yml/badge.svg)](https://github.com/elliotgao2/toapi/actions/workflows/ci.yml)
25
+ [![PyPI](https://img.shields.io/pypi/v/toapi.svg)](https://pypi.org/project/toapi/)
26
+ [![Python](https://img.shields.io/pypi/pyversions/toapi.svg)](https://pypi.org/project/toapi/)
27
+ [![License](https://img.shields.io/pypi/l/toapi.svg)](https://pypi.org/project/toapi/)
28
+
29
+ > Turn any website into a JSON API — declaratively.
30
+
31
+ `toapi` lets you point at a web page, declare the fields you want with CSS
32
+ selectors, and get back a clean JSON API. No crawler to babysit, no database to
33
+ maintain — pages are fetched and parsed on demand, with built‑in caching.
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install toapi
39
+ ```
40
+
41
+ Requires Python 3.10+.
42
+
43
+ ## Quickstart
44
+
45
+ ```python
46
+ from htmlparsing import Attr, Text
47
+ from toapi import Api, Item
48
+
49
+ api = Api()
50
+
51
+
52
+ @api.site("https://news.ycombinator.com")
53
+ @api.list(".athing")
54
+ @api.route("/posts", "/news")
55
+ @api.route("/posts?page={page}", "/news?p={page}")
56
+ class Post(Item):
57
+ title = Text(".titleline > a")
58
+ url = Attr(".titleline > a", "href")
59
+
60
+
61
+ api.run(host="127.0.0.1", port=5000)
62
+ ```
63
+
64
+ Run it:
65
+
66
+ ```bash
67
+ python app.py
68
+ ```
69
+
70
+ Then visit <http://127.0.0.1:5000/posts> and you get:
71
+
72
+ ```json
73
+ {
74
+ "Post": [
75
+ {"title": "Mathematicians Crack the Cursed Curve", "url": "https://www.quantamagazine.org/..."},
76
+ {"title": "Stuffing a Tesla Drivetrain into a 1981 Honda Accord", "url": "https://jalopnik.com/..."}
77
+ ]
78
+ }
79
+ ```
80
+
81
+ ## How it works
82
+
83
+ ```
84
+ ┌────────────┐ ┌────────────┐ ┌────────────┐
85
+ │ /posts │ ─▶ │ fetch │ ─▶ │ parse │ ─▶ JSON
86
+ │ (route) │ │ (cache) │ │ (Item) │
87
+ └────────────┘ └────────────┘ └────────────┘
88
+ ```
89
+
90
+ 1. **Route** — `@api.route("/posts", "/news")` maps your API path to a source URL.
91
+ 2. **Fetch** — pages are fetched with `requests` (or a headless browser if you pass `browser=`) and cached in memory.
92
+ 3. **Parse** — each `Item` extracts fields with CSS selectors via `htmlparsing`.
93
+ 4. **Serve** — Flask returns the result as JSON; subsequent calls hit the cache.
94
+
95
+ ## Features
96
+
97
+ - **Declarative** — describe data, not scraping logic.
98
+ - **Routes** — map clean API paths to messy source URLs with `{param}` placeholders.
99
+ - **Multi-site** — merge several websites behind one API.
100
+ - **Cleaning hooks** — define `clean_<field>` methods to post-process values.
101
+ - **Caching** — pages and parsed results are cached automatically.
102
+ - **Headless browser** — pass `Api(browser="/path/to/geckodriver")` for JS-heavy sites.
103
+
104
+ ## Cleaning values
105
+
106
+ Add a `clean_<fieldname>` method on the Item to transform a value before it's
107
+ returned:
108
+
109
+ ```python
110
+ @api.site("https://news.ycombinator.com")
111
+ @api.route("/posts", "/news")
112
+ class Page(Item):
113
+ next_page = Attr(".morelink", "href")
114
+
115
+ def clean_next_page(self, value):
116
+ return f"/posts?{value.split('?', 1)[1]}"
117
+ ```
118
+
119
+ ## Development
120
+
121
+ ```bash
122
+ git clone https://github.com/elliotgao2/toapi.git
123
+ cd toapi
124
+ uv sync # install deps into .venv
125
+ uv run pytest # run tests
126
+ uv run ruff check .
127
+ ```
128
+
129
+ We use [uv](https://github.com/astral-sh/uv) for packaging and
130
+ [ruff](https://github.com/astral-sh/ruff) for lint + format. Pre-commit hooks
131
+ keep both clean:
132
+
133
+ ```bash
134
+ uv run pre-commit install
135
+ ```
136
+
137
+ ## Contributing
138
+
139
+ Pull requests are welcome. For non-trivial changes, please open an issue first
140
+ to discuss what you'd like to change. Make sure `uv run pytest` and
141
+ `uv run ruff check .` pass before submitting.
142
+
143
+ ## License
144
+
145
+ [MIT](LICENSE) © Elliot Gao
toapi-2.2.1/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # toapi
2
+
3
+ [![CI](https://github.com/elliotgao2/toapi/actions/workflows/ci.yml/badge.svg)](https://github.com/elliotgao2/toapi/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/toapi.svg)](https://pypi.org/project/toapi/)
5
+ [![Python](https://img.shields.io/pypi/pyversions/toapi.svg)](https://pypi.org/project/toapi/)
6
+ [![License](https://img.shields.io/pypi/l/toapi.svg)](https://pypi.org/project/toapi/)
7
+
8
+ > Turn any website into a JSON API — declaratively.
9
+
10
+ `toapi` lets you point at a web page, declare the fields you want with CSS
11
+ selectors, and get back a clean JSON API. No crawler to babysit, no database to
12
+ maintain — pages are fetched and parsed on demand, with built‑in caching.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install toapi
18
+ ```
19
+
20
+ Requires Python 3.10+.
21
+
22
+ ## Quickstart
23
+
24
+ ```python
25
+ from htmlparsing import Attr, Text
26
+ from toapi import Api, Item
27
+
28
+ api = Api()
29
+
30
+
31
+ @api.site("https://news.ycombinator.com")
32
+ @api.list(".athing")
33
+ @api.route("/posts", "/news")
34
+ @api.route("/posts?page={page}", "/news?p={page}")
35
+ class Post(Item):
36
+ title = Text(".titleline > a")
37
+ url = Attr(".titleline > a", "href")
38
+
39
+
40
+ api.run(host="127.0.0.1", port=5000)
41
+ ```
42
+
43
+ Run it:
44
+
45
+ ```bash
46
+ python app.py
47
+ ```
48
+
49
+ Then visit <http://127.0.0.1:5000/posts> and you get:
50
+
51
+ ```json
52
+ {
53
+ "Post": [
54
+ {"title": "Mathematicians Crack the Cursed Curve", "url": "https://www.quantamagazine.org/..."},
55
+ {"title": "Stuffing a Tesla Drivetrain into a 1981 Honda Accord", "url": "https://jalopnik.com/..."}
56
+ ]
57
+ }
58
+ ```
59
+
60
+ ## How it works
61
+
62
+ ```
63
+ ┌────────────┐ ┌────────────┐ ┌────────────┐
64
+ │ /posts │ ─▶ │ fetch │ ─▶ │ parse │ ─▶ JSON
65
+ │ (route) │ │ (cache) │ │ (Item) │
66
+ └────────────┘ └────────────┘ └────────────┘
67
+ ```
68
+
69
+ 1. **Route** — `@api.route("/posts", "/news")` maps your API path to a source URL.
70
+ 2. **Fetch** — pages are fetched with `requests` (or a headless browser if you pass `browser=`) and cached in memory.
71
+ 3. **Parse** — each `Item` extracts fields with CSS selectors via `htmlparsing`.
72
+ 4. **Serve** — Flask returns the result as JSON; subsequent calls hit the cache.
73
+
74
+ ## Features
75
+
76
+ - **Declarative** — describe data, not scraping logic.
77
+ - **Routes** — map clean API paths to messy source URLs with `{param}` placeholders.
78
+ - **Multi-site** — merge several websites behind one API.
79
+ - **Cleaning hooks** — define `clean_<field>` methods to post-process values.
80
+ - **Caching** — pages and parsed results are cached automatically.
81
+ - **Headless browser** — pass `Api(browser="/path/to/geckodriver")` for JS-heavy sites.
82
+
83
+ ## Cleaning values
84
+
85
+ Add a `clean_<fieldname>` method on the Item to transform a value before it's
86
+ returned:
87
+
88
+ ```python
89
+ @api.site("https://news.ycombinator.com")
90
+ @api.route("/posts", "/news")
91
+ class Page(Item):
92
+ next_page = Attr(".morelink", "href")
93
+
94
+ def clean_next_page(self, value):
95
+ return f"/posts?{value.split('?', 1)[1]}"
96
+ ```
97
+
98
+ ## Development
99
+
100
+ ```bash
101
+ git clone https://github.com/elliotgao2/toapi.git
102
+ cd toapi
103
+ uv sync # install deps into .venv
104
+ uv run pytest # run tests
105
+ uv run ruff check .
106
+ ```
107
+
108
+ We use [uv](https://github.com/astral-sh/uv) for packaging and
109
+ [ruff](https://github.com/astral-sh/ruff) for lint + format. Pre-commit hooks
110
+ keep both clean:
111
+
112
+ ```bash
113
+ uv run pre-commit install
114
+ ```
115
+
116
+ ## Contributing
117
+
118
+ Pull requests are welcome. For non-trivial changes, please open an issue first
119
+ to discuss what you'd like to change. Make sure `uv run pytest` and
120
+ `uv run ruff check .` pass before submitting.
121
+
122
+ ## License
123
+
124
+ [MIT](LICENSE) © Elliot Gao
@@ -0,0 +1,51 @@
1
+ # Contributing
2
+
3
+ Thanks for your interest in improving `toapi`! Bug reports, feature ideas,
4
+ documentation tweaks, and pull requests are all welcome.
5
+
6
+ ## Reporting an issue
7
+
8
+ Open an issue on [GitHub](https://github.com/elliotgao2/toapi/issues) with:
9
+
10
+ - What you tried
11
+ - What you expected to happen
12
+ - What actually happened (including the full error and traceback)
13
+ - Your Python version and `toapi` version
14
+
15
+ ## Setting up a development environment
16
+
17
+ We use [uv](https://github.com/astral-sh/uv) for packaging and
18
+ [ruff](https://github.com/astral-sh/ruff) for lint and format.
19
+
20
+ ```bash
21
+ git clone https://github.com/elliotgao2/toapi.git
22
+ cd toapi
23
+ uv sync
24
+ ```
25
+
26
+ Install the pre-commit hooks so ruff runs on every commit:
27
+
28
+ ```bash
29
+ uv run pre-commit install
30
+ ```
31
+
32
+ ## Running the checks
33
+
34
+ ```bash
35
+ uv run pytest # tests
36
+ uv run ruff check . # lint
37
+ uv run ruff format --check . # format
38
+ ```
39
+
40
+ CI runs the same checks on Python 3.10, 3.11, and 3.12.
41
+
42
+ ## Submitting a pull request
43
+
44
+ 1. Fork the repo and create a topic branch.
45
+ 2. Make your change. Keep diffs focused — one concern per PR.
46
+ 3. Add or update tests when the behavior changes.
47
+ 4. Make sure `pytest` and `ruff check` pass locally.
48
+ 5. Open the PR with a short description of *what* changed and *why*.
49
+
50
+ For non-trivial changes, please open an issue first so we can discuss the
51
+ approach before you spend time on it.
@@ -0,0 +1,49 @@
1
+ # Installation
2
+
3
+ ## Requirements
4
+
5
+ - Python 3.10 or newer
6
+ - pip (or [uv](https://github.com/astral-sh/uv),
7
+ [pipx](https://pipx.pypa.io/), [Poetry](https://python-poetry.org/) — any
8
+ modern installer)
9
+
10
+ Check your Python version:
11
+
12
+ ```bash
13
+ python --version
14
+ ```
15
+
16
+ ## Install from PyPI
17
+
18
+ ```bash
19
+ pip install toapi
20
+ ```
21
+
22
+ Or with uv:
23
+
24
+ ```bash
25
+ uv add toapi
26
+ ```
27
+
28
+ ## Verify
29
+
30
+ ```bash
31
+ python -c "import toapi; print(toapi.__version__)"
32
+ ```
33
+
34
+ ## Upgrade
35
+
36
+ ```bash
37
+ pip install -U toapi
38
+ ```
39
+
40
+ ## Install from source
41
+
42
+ ```bash
43
+ git clone https://github.com/elliotgao2/toapi.git
44
+ cd toapi
45
+ uv sync
46
+ ```
47
+
48
+ This drops you in a working development environment with all dependencies
49
+ and dev tools.
@@ -0,0 +1,27 @@
1
+ # License
2
+
3
+ `toapi` is released under the MIT License.
4
+
5
+ ```
6
+ MIT License
7
+
8
+ Copyright (c) 2021 Elliot Gao
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to
12
+ deal in the Software without restriction, including without limitation the
13
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14
+ sell copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in
18
+ all copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26
+ IN THE SOFTWARE.
27
+ ```
@@ -0,0 +1,39 @@
1
+ # Release Notes
2
+
3
+ ## Upgrading
4
+
5
+ ```bash
6
+ pip install -U toapi
7
+ ```
8
+
9
+ Or with uv:
10
+
11
+ ```bash
12
+ uv add toapi@latest
13
+ ```
14
+
15
+ ## Changelog
16
+
17
+ ### 2.2.0 (2026-05-22)
18
+
19
+ - Switched packaging from Poetry to [uv](https://github.com/astral-sh/uv)
20
+ (PEP 621 + hatchling).
21
+ - Raised the minimum Python version to 3.10.
22
+ - Replaced the abandoned `cchardet` dependency with `charset-normalizer`.
23
+ - Bumped Flask 2 → 3, plus `requests`, `click`, `colorama`, and `cssselect`
24
+ to current majors.
25
+ - Replaced black + isort + flake8 + `pytest-pep8` with a single
26
+ [ruff](https://github.com/astral-sh/ruff) toolchain.
27
+ - Replaced Travis CI with GitHub Actions on a 3.10 / 3.11 / 3.12 matrix.
28
+ - Replaced the `ItemType` metaclass with `__init_subclass__` — same
29
+ behavior, half the code.
30
+ - `__version__` is now sourced from package metadata, fixing an import
31
+ error in `toapi.cli`.
32
+
33
+ ### 2.1.x
34
+
35
+ - Maintenance releases on the old Poetry / Python 3.8 stack.
36
+
37
+ ### 1.0.0 (2017-12-26)
38
+
39
+ - Initial release.
@@ -0,0 +1,49 @@
1
+ # toapi
2
+
3
+ > Turn any website into a JSON API — declaratively.
4
+
5
+ `toapi` lets you point at a web page, declare the fields you want with CSS
6
+ selectors, and get a clean JSON API back. No crawler to babysit, no database
7
+ to maintain — pages are fetched and parsed on demand, with built-in caching.
8
+
9
+ ## A 10-line example
10
+
11
+ ```python
12
+ from htmlparsing import Attr, Text
13
+ from toapi import Api, Item
14
+
15
+ api = Api()
16
+
17
+
18
+ @api.site("https://news.ycombinator.com")
19
+ @api.list(".athing")
20
+ @api.route("/posts", "/news")
21
+ class Post(Item):
22
+ title = Text(".titleline > a")
23
+ url = Attr(".titleline > a", "href")
24
+
25
+
26
+ api.run(host="127.0.0.1", port=5000)
27
+ ```
28
+
29
+ Visit `http://127.0.0.1:5000/posts` and you get a JSON list of every story
30
+ on the front page.
31
+
32
+ ## How it works
33
+
34
+ 1. **Route** — `@api.route("/posts", "/news")` maps your API path to a source
35
+ URL.
36
+ 2. **Fetch** — pages are fetched with `requests` (or a headless browser if
37
+ you pass `browser=`) and cached in memory.
38
+ 3. **Parse** — each `Item` extracts fields with CSS selectors via
39
+ `htmlparsing`.
40
+ 4. **Serve** — Flask returns the result as JSON; subsequent calls hit the
41
+ cache.
42
+
43
+ ## Next steps
44
+
45
+ - [Quickstart](quickstart.md) — a complete walk-through with two routes and a
46
+ clean method.
47
+ - [Api](topics/api.md) — the `Api` class and its decorators.
48
+ - [Item](topics/item.md) — how to declare data shapes.
49
+ - [Selectors](topics/selector.md) — picking values out of HTML.
@@ -0,0 +1,84 @@
1
+ # Quickstart
2
+
3
+ Build a small API in front of Hacker News. By the end you'll have two routes,
4
+ a list of posts, and a cleaned `next_page` URL that loops back into your own
5
+ API.
6
+
7
+ ## 1. Install
8
+
9
+ ```bash
10
+ pip install toapi
11
+ ```
12
+
13
+ Requires Python 3.10+.
14
+
15
+ ## 2. Write `app.py`
16
+
17
+ ```python
18
+ from flask import request
19
+ from htmlparsing import Attr, Text
20
+ from toapi import Api, Item
21
+
22
+ api = Api()
23
+
24
+
25
+ @api.site("https://news.ycombinator.com")
26
+ @api.list(".athing")
27
+ @api.route("/posts", "/news")
28
+ @api.route("/posts?page={page}", "/news?p={page}")
29
+ class Post(Item):
30
+ title = Text(".titleline > a")
31
+ url = Attr(".titleline > a", "href")
32
+
33
+
34
+ @api.site("https://news.ycombinator.com")
35
+ @api.route("/posts", "/news")
36
+ @api.route("/posts?page={page}", "/news?p={page}")
37
+ class Page(Item):
38
+ next_page = Attr(".morelink", "href")
39
+
40
+ def clean_next_page(self, value):
41
+ return api.convert_string(
42
+ "/" + value,
43
+ "/news?p={page}",
44
+ request.host_url.strip("/") + "/posts?page={page}",
45
+ )
46
+
47
+
48
+ api.run(host="127.0.0.1", port=5000)
49
+ ```
50
+
51
+ ## 3. Run
52
+
53
+ ```bash
54
+ python app.py
55
+ ```
56
+
57
+ Then open <http://127.0.0.1:5000/posts>:
58
+
59
+ ```json
60
+ {
61
+ "Post": [
62
+ {"title": "Mathematicians Crack the Cursed Curve", "url": "https://..."},
63
+ {"title": "Stuffing a Tesla Drivetrain into a 1981 Honda Accord", "url": "https://..."}
64
+ ],
65
+ "Page": {
66
+ "next_page": "http://127.0.0.1:5000/posts?page=2"
67
+ }
68
+ }
69
+ ```
70
+
71
+ ## What just happened?
72
+
73
+ - `@api.site(...)` told the item which website to scrape from.
74
+ - `@api.list(".athing")` said *this item repeats* — each `.athing` element on
75
+ the page becomes one entry.
76
+ - `@api.route(api_path, source_path)` mapped the path your users hit to the
77
+ path on the source site. `{page}` is a placeholder passed through both
78
+ directions.
79
+ - `Text(...)` and `Attr(...)` are CSS selectors that pull a value out of each
80
+ matched element.
81
+ - `clean_next_page(self, value)` runs after parsing and rewrites the source
82
+ pagination link to point back at our own API.
83
+
84
+ That's the whole framework. See [Topics](topics/api.md) for the details.
@@ -0,0 +1,83 @@
1
+ # Api
2
+
3
+ `Api` is the entry point. It owns the Flask app, the cache, and the registry
4
+ of routes.
5
+
6
+ ```python
7
+ from toapi import Api
8
+
9
+ api = Api()
10
+ ```
11
+
12
+ ## Constructor
13
+
14
+ ```python
15
+ Api(site: str = "", browser: str | None = None)
16
+ ```
17
+
18
+ - **`site`** — a default base URL prefix appended in front of every Item's
19
+ source path. Most users leave this blank and put the site on the Item with
20
+ `@api.site(...)`.
21
+ - **`browser`** — path to a headless-browser driver (e.g. `geckodriver`).
22
+ When set, pages are fetched through the browser instead of plain
23
+ `requests`. Useful for JavaScript-heavy sites.
24
+
25
+ ## Decorators
26
+
27
+ Decorators are stacked on an `Item` class to declare *what* to scrape,
28
+ *where* it lives, and *which URLs* expose it.
29
+
30
+ ### `@api.site(url)`
31
+
32
+ Sets the source website for an Item.
33
+
34
+ ```python
35
+ @api.site("https://news.ycombinator.com")
36
+ class Post(Item): ...
37
+ ```
38
+
39
+ ### `@api.list(selector)`
40
+
41
+ Marks the Item as a *list item* — the parser will return one entry per
42
+ element matched by `selector` on the source page.
43
+
44
+ ```python
45
+ @api.list(".athing")
46
+ class Post(Item): ...
47
+ ```
48
+
49
+ Without `@api.list`, the Item is a *detail item* — it parses a single record
50
+ from the page.
51
+
52
+ ### `@api.route(api_path, source_path)`
53
+
54
+ Maps a path on your API to a path on the source site. Placeholders like
55
+ `{page}` are passed through both directions.
56
+
57
+ ```python
58
+ @api.route("/posts?page={page}", "/news?p={page}")
59
+ @api.route("/posts", "/news")
60
+ class Post(Item): ...
61
+ ```
62
+
63
+ Multiple `@api.route` decorators may be stacked on the same Item.
64
+
65
+ ## `api.run(host, port, **flask_options)`
66
+
67
+ Starts the Flask development server.
68
+
69
+ ```python
70
+ api.run(host="0.0.0.0", port=5000, debug=True)
71
+ ```
72
+
73
+ For production, mount `api.app` (a plain Flask app) under your WSGI server of
74
+ choice — gunicorn, uWSGI, waitress.
75
+
76
+ ## Caching
77
+
78
+ Two in-memory caches are populated automatically:
79
+
80
+ - **Page cache** (`api._storage`) — keyed by source URL, stores raw HTML.
81
+ - **Result cache** (`api._cache`) — keyed by API path, stores parsed JSON.
82
+
83
+ Both live for the lifetime of the process. Restart to clear.