toapi 2.2.0__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toapi-2.2.2/.claude/settings.local.json +24 -0
- {toapi-2.2.0 → toapi-2.2.2}/.gitignore +1 -0
- toapi-2.2.2/PKG-INFO +159 -0
- toapi-2.2.2/README.md +124 -0
- toapi-2.2.2/docs/about/contributing.md +51 -0
- toapi-2.2.2/docs/about/installation.md +49 -0
- toapi-2.2.2/docs/about/license.md +27 -0
- toapi-2.2.2/docs/about/release-notes.md +39 -0
- toapi-2.2.2/docs/index.md +49 -0
- toapi-2.2.2/docs/quickstart.md +84 -0
- toapi-2.2.2/docs/topics/api.md +83 -0
- toapi-2.2.2/docs/topics/item.md +71 -0
- toapi-2.2.2/docs/topics/selector.md +45 -0
- toapi-2.2.2/mkdocs.yml +61 -0
- {toapi-2.2.0 → toapi-2.2.2}/pyproject.toml +17 -1
- toapi-2.2.2/tests/test_toapi.py +123 -0
- {toapi-2.2.0 → toapi-2.2.2}/toapi/api.py +6 -6
- toapi-2.2.2/toapi/item.py +36 -0
- toapi-2.2.2/toapi/log.py +25 -0
- {toapi-2.2.0 → toapi-2.2.2}/uv.lock +1 -1
- toapi-2.2.0/.omc/project-memory.json +0 -174
- toapi-2.2.0/.omc/state/agent-replay-86181004-c476-471f-90d2-1c64e40fb749.jsonl +0 -4
- toapi-2.2.0/.omc/state/hud-stdin-cache.json +0 -1
- toapi-2.2.0/.omc/state/idle-notif-cooldown.json +0 -5
- toapi-2.2.0/.omc/state/last-tool-error.json +0 -7
- toapi-2.2.0/.omc/state/mission-state.json +0 -79
- toapi-2.2.0/.omc/state/sessions/86181004-c476-471f-90d2-1c64e40fb749/hud-state.json +0 -6
- toapi-2.2.0/.omc/state/subagent-tracking.json +0 -26
- toapi-2.2.0/PKG-INFO +0 -117
- toapi-2.2.0/README.md +0 -96
- toapi-2.2.0/docs/about/contributing.md +0 -62
- toapi-2.2.0/docs/about/installation.md +0 -86
- toapi-2.2.0/docs/about/license.md +0 -13
- toapi-2.2.0/docs/about/release-notes.md +0 -32
- toapi-2.2.0/docs/articles/index.md +0 -0
- toapi-2.2.0/docs/articles/release.md +0 -96
- toapi-2.2.0/docs/index.md +0 -227
- toapi-2.2.0/docs/quickstart.md +0 -0
- toapi-2.2.0/docs/topics/api.md +0 -90
- toapi-2.2.0/docs/topics/cache.md +0 -136
- toapi-2.2.0/docs/topics/item.md +0 -45
- toapi-2.2.0/docs/topics/selector.md +0 -57
- toapi-2.2.0/docs/topics/settings.md +0 -48
- toapi-2.2.0/docs/topics/storage.md +0 -67
- toapi-2.2.0/docs/tutorials/introducing.md +0 -28
- toapi-2.2.0/docs/tutorials/step0-creating-new-project.md +0 -41
- toapi-2.2.0/docs/tutorials/step1-global-settings.md +0 -39
- toapi-2.2.0/docs/tutorials/step2-redis.md +0 -48
- toapi-2.2.0/docs/tutorials/step3-sqlite3.md +0 -50
- toapi-2.2.0/docs/tutorials/step4-defining-items.md +0 -61
- toapi-2.2.0/docs/tutorials/step5-deploy.md +0 -95
- toapi-2.2.0/mkdocs.yml +0 -56
- toapi-2.2.0/tests/test_toapi.py +0 -55
- toapi-2.2.0/toapi/item.py +0 -42
- toapi-2.2.0/toapi/log.py +0 -34
- {toapi-2.2.0 → toapi-2.2.2}/.github/workflows/ci.yml +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/.pre-commit-config.yaml +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/LICENSE +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/CNAME +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/diagram.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/introducing-1.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/introducing-2.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/introducing-3.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/introducing-4.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/runinglog.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/runningitems.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/runningresult.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/runningstatus.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/imgs/step-0-1.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/docs/logo.png +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/examples/click/app.py +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/examples/click/static/main.js +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/examples/click/templates/index.html +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/examples/hackernews_page.py +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/toapi/__init__.py +0 -0
- {toapi-2.2.0 → toapi-2.2.2}/toapi/cli.py +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(uv run *)",
|
|
5
|
+
"Bash(uv sync *)",
|
|
6
|
+
"Bash(uv build *)",
|
|
7
|
+
"Bash(git add *)",
|
|
8
|
+
"Bash(curl -sI https://pypi.org/project/toapi/2.2.1/)",
|
|
9
|
+
"Bash(curl -s \"https://pypi.org/simple/toapi/\")",
|
|
10
|
+
"Bash(curl -sI -L https://pypi.org/project/toapi/2.2.1/)",
|
|
11
|
+
"Bash(curl -s https://pypi.org/pypi/toapi/2.2.1/json)",
|
|
12
|
+
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\('version:', d['info']['version']\\); print\\('files:', [f['filename'] for f in d['urls']]\\)\")",
|
|
13
|
+
"Bash(gh run *)",
|
|
14
|
+
"Bash(git commit -m ' *)",
|
|
15
|
+
"Bash(git push *)",
|
|
16
|
+
"Bash(curl -sI \"https://github.com/elliotgao2/toapi/actions/workflows/ci.yml/badge.svg\")",
|
|
17
|
+
"Bash(curl -s \"https://img.shields.io/pypi/v/toapi.svg\")",
|
|
18
|
+
"Bash(curl -s \"https://img.shields.io/pypi/pyversions/toapi.svg\")",
|
|
19
|
+
"Bash(curl -s \"https://img.shields.io/pypi/l/toapi.svg\")",
|
|
20
|
+
"Bash(curl -s https://pypi.org/pypi/toapi/json)",
|
|
21
|
+
"Bash(python3 -c ' *)"
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
}
|
toapi-2.2.2/PKG-INFO
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: toapi
|
|
3
|
+
Version: 2.2.2
|
|
4
|
+
Summary: Every web site provides APIs.
|
|
5
|
+
Project-URL: homepage, https://github.com/gaojiuli/toapi
|
|
6
|
+
Project-URL: repository, https://github.com/gaojiuli/toapi
|
|
7
|
+
Project-URL: documentation, https://gaojiuli.github.io/toapi/
|
|
8
|
+
Author-email: Elliot Gao <gaojiuli@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Framework :: Flask
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: charset-normalizer>=3.3
|
|
27
|
+
Requires-Dist: click>=8.1
|
|
28
|
+
Requires-Dist: colorama>=0.4.6
|
|
29
|
+
Requires-Dist: cssselect>=1.2
|
|
30
|
+
Requires-Dist: flask>=3.0
|
|
31
|
+
Requires-Dist: htmlfetcher>=0.0.6
|
|
32
|
+
Requires-Dist: htmlparsing>=0.1.5
|
|
33
|
+
Requires-Dist: requests>=2.32
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# toapi
|
|
37
|
+
|
|
38
|
+
[](https://github.com/elliotgao2/toapi/actions/workflows/ci.yml)
|
|
39
|
+
[](https://pypi.org/project/toapi/)
|
|
40
|
+
[](https://pypi.org/project/toapi/)
|
|
41
|
+
[](https://pypi.org/project/toapi/)
|
|
42
|
+
|
|
43
|
+
> Turn any website into a JSON API — declaratively.
|
|
44
|
+
|
|
45
|
+
`toapi` lets you point at a web page, declare the fields you want with CSS
|
|
46
|
+
selectors, and get back a clean JSON API. No crawler to babysit, no database to
|
|
47
|
+
maintain — pages are fetched and parsed on demand, with built‑in caching.
|
|
48
|
+
|
|
49
|
+
## Install
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install toapi
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Requires Python 3.10+.
|
|
56
|
+
|
|
57
|
+
## Quickstart
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from htmlparsing import Attr, Text
|
|
61
|
+
from toapi import Api, Item
|
|
62
|
+
|
|
63
|
+
api = Api()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@api.site("https://news.ycombinator.com")
|
|
67
|
+
@api.list(".athing")
|
|
68
|
+
@api.route("/posts", "/news")
|
|
69
|
+
@api.route("/posts?page={page}", "/news?p={page}")
|
|
70
|
+
class Post(Item):
|
|
71
|
+
title = Text(".titleline > a")
|
|
72
|
+
url = Attr(".titleline > a", "href")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
api.run(host="127.0.0.1", port=5000)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Run it:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
python app.py
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Then visit <http://127.0.0.1:5000/posts> and you get:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"Post": [
|
|
89
|
+
{"title": "Mathematicians Crack the Cursed Curve", "url": "https://www.quantamagazine.org/..."},
|
|
90
|
+
{"title": "Stuffing a Tesla Drivetrain into a 1981 Honda Accord", "url": "https://jalopnik.com/..."}
|
|
91
|
+
]
|
|
92
|
+
}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## How it works
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
┌────────────┐ ┌────────────┐ ┌────────────┐
|
|
99
|
+
│ /posts │ ─▶ │ fetch │ ─▶ │ parse │ ─▶ JSON
|
|
100
|
+
│ (route) │ │ (cache) │ │ (Item) │
|
|
101
|
+
└────────────┘ └────────────┘ └────────────┘
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
1. **Route** — `@api.route("/posts", "/news")` maps your API path to a source URL.
|
|
105
|
+
2. **Fetch** — pages are fetched with `requests` (or a headless browser if you pass `browser=`) and cached in memory.
|
|
106
|
+
3. **Parse** — each `Item` extracts fields with CSS selectors via `htmlparsing`.
|
|
107
|
+
4. **Serve** — Flask returns the result as JSON; subsequent calls hit the cache.
|
|
108
|
+
|
|
109
|
+
## Features
|
|
110
|
+
|
|
111
|
+
- **Declarative** — describe data, not scraping logic.
|
|
112
|
+
- **Routes** — map clean API paths to messy source URLs with `{param}` placeholders.
|
|
113
|
+
- **Multi-site** — merge several websites behind one API.
|
|
114
|
+
- **Cleaning hooks** — define `clean_<field>` methods to post-process values.
|
|
115
|
+
- **Caching** — pages and parsed results are cached automatically.
|
|
116
|
+
- **Headless browser** — pass `Api(browser="/path/to/geckodriver")` for JS-heavy sites.
|
|
117
|
+
|
|
118
|
+
## Cleaning values
|
|
119
|
+
|
|
120
|
+
Add a `clean_<fieldname>` method on the Item to transform a value before it's
|
|
121
|
+
returned:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
@api.site("https://news.ycombinator.com")
|
|
125
|
+
@api.route("/posts", "/news")
|
|
126
|
+
class Page(Item):
|
|
127
|
+
next_page = Attr(".morelink", "href")
|
|
128
|
+
|
|
129
|
+
def clean_next_page(self, value):
|
|
130
|
+
return f"/posts?{value.split('?', 1)[1]}"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Development
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
git clone https://github.com/elliotgao2/toapi.git
|
|
137
|
+
cd toapi
|
|
138
|
+
uv sync # install deps into .venv
|
|
139
|
+
uv run pytest # run tests
|
|
140
|
+
uv run ruff check .
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
We use [uv](https://github.com/astral-sh/uv) for packaging and
|
|
144
|
+
[ruff](https://github.com/astral-sh/ruff) for lint + format. Pre-commit hooks
|
|
145
|
+
keep both clean:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
uv run pre-commit install
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Contributing
|
|
152
|
+
|
|
153
|
+
Pull requests are welcome. For non-trivial changes, please open an issue first
|
|
154
|
+
to discuss what you'd like to change. Make sure `uv run pytest` and
|
|
155
|
+
`uv run ruff check .` pass before submitting.
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
[MIT](LICENSE) © Elliot Gao
|
toapi-2.2.2/README.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# toapi
|
|
2
|
+
|
|
3
|
+
[](https://github.com/elliotgao2/toapi/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/toapi/)
|
|
5
|
+
[](https://pypi.org/project/toapi/)
|
|
6
|
+
[](https://pypi.org/project/toapi/)
|
|
7
|
+
|
|
8
|
+
> Turn any website into a JSON API — declaratively.
|
|
9
|
+
|
|
10
|
+
`toapi` lets you point at a web page, declare the fields you want with CSS
|
|
11
|
+
selectors, and get back a clean JSON API. No crawler to babysit, no database to
|
|
12
|
+
maintain — pages are fetched and parsed on demand, with built‑in caching.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install toapi
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Requires Python 3.10+.
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from htmlparsing import Attr, Text
|
|
26
|
+
from toapi import Api, Item
|
|
27
|
+
|
|
28
|
+
api = Api()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@api.site("https://news.ycombinator.com")
|
|
32
|
+
@api.list(".athing")
|
|
33
|
+
@api.route("/posts", "/news")
|
|
34
|
+
@api.route("/posts?page={page}", "/news?p={page}")
|
|
35
|
+
class Post(Item):
|
|
36
|
+
title = Text(".titleline > a")
|
|
37
|
+
url = Attr(".titleline > a", "href")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
api.run(host="127.0.0.1", port=5000)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Run it:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
python app.py
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Then visit <http://127.0.0.1:5000/posts> and you get:
|
|
50
|
+
|
|
51
|
+
```json
|
|
52
|
+
{
|
|
53
|
+
"Post": [
|
|
54
|
+
{"title": "Mathematicians Crack the Cursed Curve", "url": "https://www.quantamagazine.org/..."},
|
|
55
|
+
{"title": "Stuffing a Tesla Drivetrain into a 1981 Honda Accord", "url": "https://jalopnik.com/..."}
|
|
56
|
+
]
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## How it works
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
┌────────────┐ ┌────────────┐ ┌────────────┐
|
|
64
|
+
│ /posts │ ─▶ │ fetch │ ─▶ │ parse │ ─▶ JSON
|
|
65
|
+
│ (route) │ │ (cache) │ │ (Item) │
|
|
66
|
+
└────────────┘ └────────────┘ └────────────┘
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
1. **Route** — `@api.route("/posts", "/news")` maps your API path to a source URL.
|
|
70
|
+
2. **Fetch** — pages are fetched with `requests` (or a headless browser if you pass `browser=`) and cached in memory.
|
|
71
|
+
3. **Parse** — each `Item` extracts fields with CSS selectors via `htmlparsing`.
|
|
72
|
+
4. **Serve** — Flask returns the result as JSON; subsequent calls hit the cache.
|
|
73
|
+
|
|
74
|
+
## Features
|
|
75
|
+
|
|
76
|
+
- **Declarative** — describe data, not scraping logic.
|
|
77
|
+
- **Routes** — map clean API paths to messy source URLs with `{param}` placeholders.
|
|
78
|
+
- **Multi-site** — merge several websites behind one API.
|
|
79
|
+
- **Cleaning hooks** — define `clean_<field>` methods to post-process values.
|
|
80
|
+
- **Caching** — pages and parsed results are cached automatically.
|
|
81
|
+
- **Headless browser** — pass `Api(browser="/path/to/geckodriver")` for JS-heavy sites.
|
|
82
|
+
|
|
83
|
+
## Cleaning values
|
|
84
|
+
|
|
85
|
+
Add a `clean_<fieldname>` method on the Item to transform a value before it's
|
|
86
|
+
returned:
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
@api.site("https://news.ycombinator.com")
|
|
90
|
+
@api.route("/posts", "/news")
|
|
91
|
+
class Page(Item):
|
|
92
|
+
next_page = Attr(".morelink", "href")
|
|
93
|
+
|
|
94
|
+
def clean_next_page(self, value):
|
|
95
|
+
return f"/posts?{value.split('?', 1)[1]}"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Development
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
git clone https://github.com/elliotgao2/toapi.git
|
|
102
|
+
cd toapi
|
|
103
|
+
uv sync # install deps into .venv
|
|
104
|
+
uv run pytest # run tests
|
|
105
|
+
uv run ruff check .
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
We use [uv](https://github.com/astral-sh/uv) for packaging and
|
|
109
|
+
[ruff](https://github.com/astral-sh/ruff) for lint + format. Pre-commit hooks
|
|
110
|
+
keep both clean:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
uv run pre-commit install
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Contributing
|
|
117
|
+
|
|
118
|
+
Pull requests are welcome. For non-trivial changes, please open an issue first
|
|
119
|
+
to discuss what you'd like to change. Make sure `uv run pytest` and
|
|
120
|
+
`uv run ruff check .` pass before submitting.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
[MIT](LICENSE) © Elliot Gao
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in improving `toapi`! Bug reports, feature ideas,
|
|
4
|
+
documentation tweaks, and pull requests are all welcome.
|
|
5
|
+
|
|
6
|
+
## Reporting an issue
|
|
7
|
+
|
|
8
|
+
Open an issue on [GitHub](https://github.com/elliotgao2/toapi/issues) with:
|
|
9
|
+
|
|
10
|
+
- What you tried
|
|
11
|
+
- What you expected to happen
|
|
12
|
+
- What actually happened (including the full error and traceback)
|
|
13
|
+
- Your Python version and `toapi` version
|
|
14
|
+
|
|
15
|
+
## Setting up a development environment
|
|
16
|
+
|
|
17
|
+
We use [uv](https://github.com/astral-sh/uv) for packaging and
|
|
18
|
+
[ruff](https://github.com/astral-sh/ruff) for lint and format.
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
git clone https://github.com/elliotgao2/toapi.git
|
|
22
|
+
cd toapi
|
|
23
|
+
uv sync
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Install the pre-commit hooks so ruff runs on every commit:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uv run pre-commit install
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Running the checks
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
uv run pytest # tests
|
|
36
|
+
uv run ruff check . # lint
|
|
37
|
+
uv run ruff format --check . # format
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
CI runs the same checks on Python 3.10, 3.11, and 3.12.
|
|
41
|
+
|
|
42
|
+
## Submitting a pull request
|
|
43
|
+
|
|
44
|
+
1. Fork the repo and create a topic branch.
|
|
45
|
+
2. Make your change. Keep diffs focused — one concern per PR.
|
|
46
|
+
3. Add or update tests when the behavior changes.
|
|
47
|
+
4. Make sure `pytest` and `ruff check` pass locally.
|
|
48
|
+
5. Open the PR with a short description of *what* changed and *why*.
|
|
49
|
+
|
|
50
|
+
For non-trivial changes, please open an issue first so we can discuss the
|
|
51
|
+
approach before you spend time on it.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Installation
|
|
2
|
+
|
|
3
|
+
## Requirements
|
|
4
|
+
|
|
5
|
+
- Python 3.10 or newer
|
|
6
|
+
- pip (or [uv](https://github.com/astral-sh/uv),
|
|
7
|
+
[pipx](https://pipx.pypa.io/), [Poetry](https://python-poetry.org/) — any
|
|
8
|
+
modern installer)
|
|
9
|
+
|
|
10
|
+
Check your Python version:
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
python --version
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Install from PyPI
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install toapi
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Or with uv:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
uv add toapi
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Verify
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
python -c "import toapi; print(toapi.__version__)"
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Upgrade
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install -U toapi
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Install from source
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
git clone https://github.com/elliotgao2/toapi.git
|
|
44
|
+
cd toapi
|
|
45
|
+
uv sync
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
This drops you in a working development environment with all dependencies
|
|
49
|
+
and dev tools.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# License
|
|
2
|
+
|
|
3
|
+
`toapi` is released under the MIT License.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2021 Elliot Gao
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to
|
|
12
|
+
deal in the Software without restriction, including without limitation the
|
|
13
|
+
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
14
|
+
sell copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in
|
|
18
|
+
all copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
25
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
26
|
+
IN THE SOFTWARE.
|
|
27
|
+
```
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Release Notes
|
|
2
|
+
|
|
3
|
+
## Upgrading
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install -U toapi
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Or with uv:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
uv add toapi@latest
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Changelog
|
|
16
|
+
|
|
17
|
+
### 2.2.0 (2026-05-22)
|
|
18
|
+
|
|
19
|
+
- Switched packaging from Poetry to [uv](https://github.com/astral-sh/uv)
|
|
20
|
+
(PEP 621 + hatchling).
|
|
21
|
+
- Raised the minimum Python version to 3.10.
|
|
22
|
+
- Replaced the abandoned `cchardet` dependency with `charset-normalizer`.
|
|
23
|
+
- Bumped Flask 2 → 3, plus `requests`, `click`, `colorama`, and `cssselect`
|
|
24
|
+
to current majors.
|
|
25
|
+
- Replaced black + isort + flake8 + `pytest-pep8` with a single
|
|
26
|
+
[ruff](https://github.com/astral-sh/ruff) toolchain.
|
|
27
|
+
- Replaced Travis CI with GitHub Actions on a 3.10 / 3.11 / 3.12 matrix.
|
|
28
|
+
- Replaced the `ItemType` metaclass with `__init_subclass__` — same
|
|
29
|
+
behavior, half the code.
|
|
30
|
+
- `__version__` is now sourced from package metadata, fixing an import
|
|
31
|
+
error in `toapi.cli`.
|
|
32
|
+
|
|
33
|
+
### 2.1.x
|
|
34
|
+
|
|
35
|
+
- Maintenance releases on the old Poetry / Python 3.8 stack.
|
|
36
|
+
|
|
37
|
+
### 1.0.0 (2017-12-26)
|
|
38
|
+
|
|
39
|
+
- Initial release.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# toapi
|
|
2
|
+
|
|
3
|
+
> Turn any website into a JSON API — declaratively.
|
|
4
|
+
|
|
5
|
+
`toapi` lets you point at a web page, declare the fields you want with CSS
|
|
6
|
+
selectors, and get a clean JSON API back. No crawler to babysit, no database
|
|
7
|
+
to maintain — pages are fetched and parsed on demand, with built-in caching.
|
|
8
|
+
|
|
9
|
+
## A 10-line example
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from htmlparsing import Attr, Text
|
|
13
|
+
from toapi import Api, Item
|
|
14
|
+
|
|
15
|
+
api = Api()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@api.site("https://news.ycombinator.com")
|
|
19
|
+
@api.list(".athing")
|
|
20
|
+
@api.route("/posts", "/news")
|
|
21
|
+
class Post(Item):
|
|
22
|
+
title = Text(".titleline > a")
|
|
23
|
+
url = Attr(".titleline > a", "href")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
api.run(host="127.0.0.1", port=5000)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Visit `http://127.0.0.1:5000/posts` and you get a JSON list of every story
|
|
30
|
+
on the front page.
|
|
31
|
+
|
|
32
|
+
## How it works
|
|
33
|
+
|
|
34
|
+
1. **Route** — `@api.route("/posts", "/news")` maps your API path to a source
|
|
35
|
+
URL.
|
|
36
|
+
2. **Fetch** — pages are fetched with `requests` (or a headless browser if
|
|
37
|
+
you pass `browser=`) and cached in memory.
|
|
38
|
+
3. **Parse** — each `Item` extracts fields with CSS selectors via
|
|
39
|
+
`htmlparsing`.
|
|
40
|
+
4. **Serve** — Flask returns the result as JSON; subsequent calls hit the
|
|
41
|
+
cache.
|
|
42
|
+
|
|
43
|
+
## Next steps
|
|
44
|
+
|
|
45
|
+
- [Quickstart](quickstart.md) — a complete walk-through with two routes and a
|
|
46
|
+
clean method.
|
|
47
|
+
- [Api](topics/api.md) — the `Api` class and its decorators.
|
|
48
|
+
- [Item](topics/item.md) — how to declare data shapes.
|
|
49
|
+
- [Selectors](topics/selector.md) — picking values out of HTML.
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Quickstart
|
|
2
|
+
|
|
3
|
+
Build a small API in front of Hacker News. By the end you'll have two routes,
|
|
4
|
+
a list of posts, and a cleaned `next_page` URL that loops back into your own
|
|
5
|
+
API.
|
|
6
|
+
|
|
7
|
+
## 1. Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install toapi
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires Python 3.10+.
|
|
14
|
+
|
|
15
|
+
## 2. Write `app.py`
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
from flask import request
|
|
19
|
+
from htmlparsing import Attr, Text
|
|
20
|
+
from toapi import Api, Item
|
|
21
|
+
|
|
22
|
+
api = Api()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@api.site("https://news.ycombinator.com")
|
|
26
|
+
@api.list(".athing")
|
|
27
|
+
@api.route("/posts", "/news")
|
|
28
|
+
@api.route("/posts?page={page}", "/news?p={page}")
|
|
29
|
+
class Post(Item):
|
|
30
|
+
title = Text(".titleline > a")
|
|
31
|
+
url = Attr(".titleline > a", "href")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@api.site("https://news.ycombinator.com")
|
|
35
|
+
@api.route("/posts", "/news")
|
|
36
|
+
@api.route("/posts?page={page}", "/news?p={page}")
|
|
37
|
+
class Page(Item):
|
|
38
|
+
next_page = Attr(".morelink", "href")
|
|
39
|
+
|
|
40
|
+
def clean_next_page(self, value):
|
|
41
|
+
return api.convert_string(
|
|
42
|
+
"/" + value,
|
|
43
|
+
"/news?p={page}",
|
|
44
|
+
request.host_url.strip("/") + "/posts?page={page}",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
api.run(host="127.0.0.1", port=5000)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## 3. Run
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
python app.py
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Then open <http://127.0.0.1:5000/posts>:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"Post": [
|
|
62
|
+
{"title": "Mathematicians Crack the Cursed Curve", "url": "https://..."},
|
|
63
|
+
{"title": "Stuffing a Tesla Drivetrain into a 1981 Honda Accord", "url": "https://..."}
|
|
64
|
+
],
|
|
65
|
+
"Page": {
|
|
66
|
+
"next_page": "http://127.0.0.1:5000/posts?page=2"
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## What just happened?
|
|
72
|
+
|
|
73
|
+
- `@api.site(...)` told the item which website to scrape from.
|
|
74
|
+
- `@api.list(".athing")` said *this item repeats* — each `.athing` element on
|
|
75
|
+
the page becomes one entry.
|
|
76
|
+
- `@api.route(api_path, source_path)` mapped the path your users hit to the
|
|
77
|
+
path on the source site. `{page}` is a placeholder passed through both
|
|
78
|
+
directions.
|
|
79
|
+
- `Text(...)` and `Attr(...)` are CSS selectors that pull a value out of each
|
|
80
|
+
matched element.
|
|
81
|
+
- `clean_next_page(self, value)` runs after parsing and rewrites the source
|
|
82
|
+
pagination link to point back at our own API.
|
|
83
|
+
|
|
84
|
+
That's the whole framework. See [Topics](topics/api.md) for the details.
|