tldextract 5.1.2__tar.gz → 5.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tldextract-5.1.2 → tldextract-5.2.0}/.github/workflows/ci.yml +5 -6
- {tldextract-5.1.2 → tldextract-5.2.0}/CHANGELOG.md +29 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/LICENSE +1 -1
- {tldextract-5.1.2 → tldextract-5.2.0}/PKG-INFO +43 -29
- {tldextract-5.1.2 → tldextract-5.2.0}/README.md +37 -24
- {tldextract-5.1.2 → tldextract-5.2.0}/pyproject.toml +7 -4
- {tldextract-5.1.2 → tldextract-5.2.0}/scripts/release.py +48 -42
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/__snapshots__/test_release.ambr +3 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/main_test.py +41 -3
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/test_cache.py +3 -3
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/test_parallel.py +1 -13
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/test_release.py +7 -6
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/.tld_set_snapshot +5247 -3232
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/__init__.py +4 -2
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/_version.py +9 -4
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/cache.py +5 -16
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/remote.py +1 -1
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/suffix_list.py +3 -1
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/tldextract.py +99 -42
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract.egg-info/PKG-INFO +43 -29
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract.egg-info/SOURCES.txt +0 -1
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract.egg-info/requires.txt +1 -1
- {tldextract-5.1.2 → tldextract-5.2.0}/tox.ini +5 -5
- tldextract-5.1.2/tests/integration_test.py +0 -13
- {tldextract-5.1.2 → tldextract-5.2.0}/.github/FUNDING.yml +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/.gitignore +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/setup.cfg +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/__init__.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/cli_test.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/conftest.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/custom_suffix_test.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tests/test_trie.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/__main__.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/cli.py +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract/py.typed +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract.egg-info/dependency_links.txt +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract.egg-info/entry_points.txt +0 -0
- {tldextract-5.1.2 → tldextract-5.2.0}/tldextract.egg-info/top_level.txt +0 -0
@@ -14,22 +14,21 @@ jobs:
|
|
14
14
|
os: [macos-latest, windows-latest, ubuntu-latest]
|
15
15
|
language:
|
16
16
|
[
|
17
|
-
{python-version: "3.8", toxenv: "py38"},
|
18
17
|
{python-version: "3.9", toxenv: "py39"},
|
19
18
|
{python-version: "3.10", toxenv: "py310"},
|
20
19
|
{python-version: "3.11", toxenv: "py311"},
|
21
20
|
{python-version: "3.12", toxenv: "py312"},
|
22
|
-
{python-version: "
|
21
|
+
{python-version: "3.13", toxenv: "py313"},
|
23
22
|
{python-version: "pypy3.9", toxenv: "pypy39"},
|
24
23
|
{python-version: "pypy3.10", toxenv: "pypy310"},
|
25
24
|
]
|
26
25
|
include:
|
27
26
|
- os: ubuntu-latest
|
28
|
-
language: {python-version: "3.
|
27
|
+
language: {python-version: "3.9", toxenv: "codestyle"}
|
29
28
|
- os: ubuntu-latest
|
30
|
-
language: {python-version: "3.
|
29
|
+
language: {python-version: "3.9", toxenv: "lint"}
|
31
30
|
- os: ubuntu-latest
|
32
|
-
language: {python-version: "3.
|
31
|
+
language: {python-version: "3.9", toxenv: "typecheck"}
|
33
32
|
runs-on: ${{ matrix.os }}
|
34
33
|
steps:
|
35
34
|
- name: Check out repository
|
@@ -41,7 +40,7 @@ jobs:
|
|
41
40
|
check-latest: true
|
42
41
|
- name: Install Python requirements
|
43
42
|
run: |
|
44
|
-
pip install --upgrade tox
|
43
|
+
pip install --upgrade tox tox-uv
|
45
44
|
- name: Test
|
46
45
|
run: tox
|
47
46
|
env:
|
@@ -3,6 +3,35 @@
|
|
3
3
|
After upgrading, update your cache file by deleting it or via `tldextract
|
4
4
|
--update`.
|
5
5
|
|
6
|
+
## 5.2.0 (2025-04-07)
|
7
|
+
|
8
|
+
* Features
|
9
|
+
* Add `reverse_domain_name` result property ([#342](https://github.com/john-kurkowski/tldextract/issues/342))
|
10
|
+
* Bugfixes
|
11
|
+
* Extend exported public interface with `ExtractResult` and `update` ([`36ff658`](https://github.com/john-kurkowski/tldextract/commit/36ff658c53b510c5d56f8af235c8b08ce3c512f5))
|
12
|
+
* These were always meant to be public. Eases user import.
|
13
|
+
* Docs
|
14
|
+
* Document result fields
|
15
|
+
* Note all return values
|
16
|
+
* Colocate usage in the usage section
|
17
|
+
* Link to private domain docs
|
18
|
+
* Misc.
|
19
|
+
* Update bundled snapshot
|
20
|
+
|
21
|
+
## 5.1.3 (2024-11-04)
|
22
|
+
|
23
|
+
* Bugfixes
|
24
|
+
* Reduce logging errors ([`921a825`](https://github.com/john-kurkowski/tldextract/commit/921a82523c0e4403d21d50b2c3410d9af43520ac))
|
25
|
+
* Drop support for EOL Python 3.8 ([#340](https://github.com/john-kurkowski/tldextract/issues/340))
|
26
|
+
* Support Python 3.13 ([#341](https://github.com/john-kurkowski/tldextract/issues/341))
|
27
|
+
* Update bundled snapshot
|
28
|
+
* Docs
|
29
|
+
* Clarify how to use your own definitions
|
30
|
+
* Clarify first-successful definitions vs. merged definitions
|
31
|
+
* Misc.
|
32
|
+
* Switch from Black to Ruff ([#333](https://github.com/john-kurkowski/tldextract/issues/333))
|
33
|
+
* Switch from pip to uv, during tox ([#324](https://github.com/john-kurkowski/tldextract/issues/324))
|
34
|
+
|
6
35
|
## 5.1.2 (2024-03-18)
|
7
36
|
|
8
37
|
* Bugfixes
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: tldextract
|
3
|
-
Version: 5.
|
3
|
+
Version: 5.2.0
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Author-email: John Kurkowski <john.kurkowski@gmail.com>
|
6
6
|
License: BSD-3-Clause
|
@@ -10,12 +10,12 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Topic :: Utilities
|
11
11
|
Classifier: License :: OSI Approved :: BSD License
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
14
13
|
Classifier: Programming Language :: Python :: 3.9
|
15
14
|
Classifier: Programming Language :: Python :: 3.10
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
18
|
-
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
18
|
+
Requires-Python: >=3.9
|
19
19
|
Description-Content-Type: text/markdown
|
20
20
|
License-File: LICENSE
|
21
21
|
Requires-Dist: idna
|
@@ -26,7 +26,6 @@ Provides-Extra: release
|
|
26
26
|
Requires-Dist: build; extra == "release"
|
27
27
|
Requires-Dist: twine; extra == "release"
|
28
28
|
Provides-Extra: testing
|
29
|
-
Requires-Dist: black; extra == "testing"
|
30
29
|
Requires-Dist: mypy; extra == "testing"
|
31
30
|
Requires-Dist: pytest; extra == "testing"
|
32
31
|
Requires-Dist: pytest-gitignore; extra == "testing"
|
@@ -35,8 +34,10 @@ Requires-Dist: responses; extra == "testing"
|
|
35
34
|
Requires-Dist: ruff; extra == "testing"
|
36
35
|
Requires-Dist: syrupy; extra == "testing"
|
37
36
|
Requires-Dist: tox; extra == "testing"
|
37
|
+
Requires-Dist: tox-uv; extra == "testing"
|
38
38
|
Requires-Dist: types-filelock; extra == "testing"
|
39
39
|
Requires-Dist: types-requests; extra == "testing"
|
40
|
+
Dynamic: license-file
|
40
41
|
|
41
42
|
# tldextract [](https://badge.fury.io/py/tldextract) [](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
|
42
43
|
|
@@ -95,8 +96,17 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
95
96
|
'forums.bbc.co.uk'
|
96
97
|
```
|
97
98
|
|
99
|
+
In addition to the Python interface, there is a command-line interface. Split
|
100
|
+
the URL components by space:
|
101
|
+
|
102
|
+
```zsh
|
103
|
+
$ tldextract 'http://forums.bbc.co.uk'
|
104
|
+
forums bbc co.uk
|
105
|
+
```
|
106
|
+
|
98
107
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
99
|
-
You can optionally support the Public Suffix List's private
|
108
|
+
You can optionally support the Public Suffix List's [private
|
109
|
+
domains](#public-vs-private-domains) as well.
|
100
110
|
|
101
111
|
This package started by implementing the chosen answer from [this StackOverflow question on
|
102
112
|
getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
|
@@ -118,19 +128,12 @@ Or the latest dev version:
|
|
118
128
|
pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
|
119
129
|
```
|
120
130
|
|
121
|
-
Command-line usage, splits the URL components by space:
|
122
|
-
|
123
|
-
```zsh
|
124
|
-
tldextract http://forums.bbc.co.uk
|
125
|
-
# forums bbc co.uk
|
126
|
-
```
|
127
|
-
|
128
131
|
## Note about caching
|
129
132
|
|
130
133
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
131
134
|
request. This updated TLD set is usually cached indefinitely in `$HOME/.cache/python-tldextract`.
|
132
|
-
To control the cache's location, set TLDEXTRACT_CACHE environment variable or set the
|
133
|
-
cache_dir path
|
135
|
+
To control the cache's location, set the `TLDEXTRACT_CACHE` environment variable or set the
|
136
|
+
`cache_dir` path when constructing a `TLDExtract`.
|
134
137
|
|
135
138
|
(Arguably runtime bootstrapping like that shouldn't be the default behavior,
|
136
139
|
like for production systems. But I want you to have the latest TLDs, especially
|
@@ -188,15 +191,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
|
|
188
191
|
```
|
189
192
|
|
190
193
|
The following overrides this.
|
194
|
+
|
191
195
|
```python
|
192
196
|
>>> extract = tldextract.TLDExtract()
|
193
197
|
>>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
|
194
198
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
195
199
|
```
|
196
200
|
|
197
|
-
|
201
|
+
To change the default for all extract calls:
|
202
|
+
|
198
203
|
```python
|
199
|
-
>>> extract = tldextract.TLDExtract(
|
204
|
+
>>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
|
200
205
|
>>> extract('waiterrant.blogspot.com')
|
201
206
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
202
207
|
```
|
@@ -219,10 +224,12 @@ extract = tldextract.TLDExtract(
|
|
219
224
|
fallback_to_snapshot=False)
|
220
225
|
```
|
221
226
|
|
222
|
-
|
223
|
-
|
227
|
+
If the cached version of public suffix definitions doesn't exist, such as on
|
228
|
+
the first run, the above snippet will request the URLs you specified in order,
|
229
|
+
and use the first successful response.
|
224
230
|
|
225
|
-
If you want to use input data from your local filesystem,
|
231
|
+
If you want to use input data from your local filesystem, use the `file://`
|
232
|
+
protocol with an absolute path:
|
226
233
|
|
227
234
|
```python
|
228
235
|
extract = tldextract.TLDExtract(
|
@@ -231,17 +238,24 @@ extract = tldextract.TLDExtract(
|
|
231
238
|
fallback_to_snapshot=False)
|
232
239
|
```
|
233
240
|
|
234
|
-
|
235
|
-
`os.path` is your friend.
|
236
|
-
|
237
|
-
The command line update command can be used with a URL or local file you specify:
|
241
|
+
This also works via command line update:
|
238
242
|
|
239
243
|
```zsh
|
240
244
|
tldextract --update --suffix_list_url "http://foo.bar.baz"
|
241
245
|
```
|
242
246
|
|
243
|
-
|
244
|
-
list on first use, or if you are behind a complex
|
247
|
+
Using your own URLs could be useful in production when you don't want the delay
|
248
|
+
with updating the suffix list on first use, or if you are behind a complex
|
249
|
+
firewall.
|
250
|
+
|
251
|
+
You can also specify additional suffixes in the `extra_suffixes` param. These
|
252
|
+
will be merged into whatever public suffix definitions are already in use by
|
253
|
+
`tldextract`.
|
254
|
+
|
255
|
+
```python
|
256
|
+
extract = tldextract.TLDExtract(
|
257
|
+
extra_suffixes=["foo", "bar", "baz"])
|
258
|
+
```
|
245
259
|
|
246
260
|
## FAQ
|
247
261
|
|
@@ -250,9 +264,9 @@ list on first use, or if you are behind a complex firewall that prevents a simpl
|
|
250
264
|
This project doesn't contain an actual list of public suffixes. That comes from
|
251
265
|
[the Public Suffix List (PSL)](https://publicsuffix.org/). Submit amendments there.
|
252
266
|
|
253
|
-
|
267
|
+
In the meantime, you can tell tldextract about your exception by either
|
254
268
|
forking the PSL and using your fork in the `suffix_list_urls` param, or adding
|
255
|
-
your suffix piecemeal with the `extra_suffixes` param.
|
269
|
+
your suffix piecemeal with the `extra_suffixes` param.
|
256
270
|
|
257
271
|
### I see my suffix in [the Public Suffix List (PSL)](https://publicsuffix.org/), but this library doesn't extract it.
|
258
272
|
|
@@ -309,5 +323,5 @@ tox -e py311
|
|
309
323
|
Automatically format all code:
|
310
324
|
|
311
325
|
```zsh
|
312
|
-
|
326
|
+
ruff format .
|
313
327
|
```
|
@@ -55,8 +55,17 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
55
55
|
'forums.bbc.co.uk'
|
56
56
|
```
|
57
57
|
|
58
|
+
In addition to the Python interface, there is a command-line interface. Split
|
59
|
+
the URL components by space:
|
60
|
+
|
61
|
+
```zsh
|
62
|
+
$ tldextract 'http://forums.bbc.co.uk'
|
63
|
+
forums bbc co.uk
|
64
|
+
```
|
65
|
+
|
58
66
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
59
|
-
You can optionally support the Public Suffix List's private
|
67
|
+
You can optionally support the Public Suffix List's [private
|
68
|
+
domains](#public-vs-private-domains) as well.
|
60
69
|
|
61
70
|
This package started by implementing the chosen answer from [this StackOverflow question on
|
62
71
|
getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
|
@@ -78,19 +87,12 @@ Or the latest dev version:
|
|
78
87
|
pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
|
79
88
|
```
|
80
89
|
|
81
|
-
Command-line usage, splits the URL components by space:
|
82
|
-
|
83
|
-
```zsh
|
84
|
-
tldextract http://forums.bbc.co.uk
|
85
|
-
# forums bbc co.uk
|
86
|
-
```
|
87
|
-
|
88
90
|
## Note about caching
|
89
91
|
|
90
92
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
91
93
|
request. This updated TLD set is usually cached indefinitely in `$HOME/.cache/python-tldextract`.
|
92
|
-
To control the cache's location, set TLDEXTRACT_CACHE environment variable or set the
|
93
|
-
cache_dir path
|
94
|
+
To control the cache's location, set the `TLDEXTRACT_CACHE` environment variable or set the
|
95
|
+
`cache_dir` path when constructing a `TLDExtract`.
|
94
96
|
|
95
97
|
(Arguably runtime bootstrapping like that shouldn't be the default behavior,
|
96
98
|
like for production systems. But I want you to have the latest TLDs, especially
|
@@ -148,15 +150,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
|
|
148
150
|
```
|
149
151
|
|
150
152
|
The following overrides this.
|
153
|
+
|
151
154
|
```python
|
152
155
|
>>> extract = tldextract.TLDExtract()
|
153
156
|
>>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
|
154
157
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
155
158
|
```
|
156
159
|
|
157
|
-
|
160
|
+
To change the default for all extract calls:
|
161
|
+
|
158
162
|
```python
|
159
|
-
>>> extract = tldextract.TLDExtract(
|
163
|
+
>>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
|
160
164
|
>>> extract('waiterrant.blogspot.com')
|
161
165
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
162
166
|
```
|
@@ -179,10 +183,12 @@ extract = tldextract.TLDExtract(
|
|
179
183
|
fallback_to_snapshot=False)
|
180
184
|
```
|
181
185
|
|
182
|
-
|
183
|
-
|
186
|
+
If the cached version of public suffix definitions doesn't exist, such as on
|
187
|
+
the first run, the above snippet will request the URLs you specified in order,
|
188
|
+
and use the first successful response.
|
184
189
|
|
185
|
-
If you want to use input data from your local filesystem,
|
190
|
+
If you want to use input data from your local filesystem, use the `file://`
|
191
|
+
protocol with an absolute path:
|
186
192
|
|
187
193
|
```python
|
188
194
|
extract = tldextract.TLDExtract(
|
@@ -191,17 +197,24 @@ extract = tldextract.TLDExtract(
|
|
191
197
|
fallback_to_snapshot=False)
|
192
198
|
```
|
193
199
|
|
194
|
-
|
195
|
-
`os.path` is your friend.
|
196
|
-
|
197
|
-
The command line update command can be used with a URL or local file you specify:
|
200
|
+
This also works via command line update:
|
198
201
|
|
199
202
|
```zsh
|
200
203
|
tldextract --update --suffix_list_url "http://foo.bar.baz"
|
201
204
|
```
|
202
205
|
|
203
|
-
|
204
|
-
list on first use, or if you are behind a complex
|
206
|
+
Using your own URLs could be useful in production when you don't want the delay
|
207
|
+
with updating the suffix list on first use, or if you are behind a complex
|
208
|
+
firewall.
|
209
|
+
|
210
|
+
You can also specify additional suffixes in the `extra_suffixes` param. These
|
211
|
+
will be merged into whatever public suffix definitions are already in use by
|
212
|
+
`tldextract`.
|
213
|
+
|
214
|
+
```python
|
215
|
+
extract = tldextract.TLDExtract(
|
216
|
+
extra_suffixes=["foo", "bar", "baz"])
|
217
|
+
```
|
205
218
|
|
206
219
|
## FAQ
|
207
220
|
|
@@ -210,9 +223,9 @@ list on first use, or if you are behind a complex firewall that prevents a simpl
|
|
210
223
|
This project doesn't contain an actual list of public suffixes. That comes from
|
211
224
|
[the Public Suffix List (PSL)](https://publicsuffix.org/). Submit amendments there.
|
212
225
|
|
213
|
-
|
226
|
+
In the meantime, you can tell tldextract about your exception by either
|
214
227
|
forking the PSL and using your fork in the `suffix_list_urls` param, or adding
|
215
|
-
your suffix piecemeal with the `extra_suffixes` param.
|
228
|
+
your suffix piecemeal with the `extra_suffixes` param.
|
216
229
|
|
217
230
|
### I see my suffix in [the Public Suffix List (PSL)](https://publicsuffix.org/), but this library doesn't extract it.
|
218
231
|
|
@@ -269,5 +282,5 @@ tox -e py311
|
|
269
282
|
Automatically format all code:
|
270
283
|
|
271
284
|
```zsh
|
272
|
-
|
285
|
+
ruff format .
|
273
286
|
```
|
@@ -23,13 +23,13 @@ classifiers = [
|
|
23
23
|
"Topic :: Utilities",
|
24
24
|
"License :: OSI Approved :: BSD License",
|
25
25
|
"Programming Language :: Python :: 3",
|
26
|
-
"Programming Language :: Python :: 3.8",
|
27
26
|
"Programming Language :: Python :: 3.9",
|
28
27
|
"Programming Language :: Python :: 3.10",
|
29
28
|
"Programming Language :: Python :: 3.11",
|
30
29
|
"Programming Language :: Python :: 3.12",
|
30
|
+
"Programming Language :: Python :: 3.13",
|
31
31
|
]
|
32
|
-
requires-python = ">=3.
|
32
|
+
requires-python = ">=3.9"
|
33
33
|
dynamic = ["version"]
|
34
34
|
readme = "README.md"
|
35
35
|
|
@@ -46,7 +46,6 @@ release = [
|
|
46
46
|
"twine",
|
47
47
|
]
|
48
48
|
testing = [
|
49
|
-
"black",
|
50
49
|
"mypy",
|
51
50
|
"pytest",
|
52
51
|
"pytest-gitignore",
|
@@ -55,6 +54,7 @@ testing = [
|
|
55
54
|
"ruff",
|
56
55
|
"syrupy",
|
57
56
|
"tox",
|
57
|
+
"tox-uv",
|
58
58
|
"types-filelock",
|
59
59
|
"types-requests",
|
60
60
|
]
|
@@ -90,6 +90,9 @@ strict = true
|
|
90
90
|
[tool.pytest.ini_options]
|
91
91
|
addopts = "--doctest-modules"
|
92
92
|
|
93
|
+
[tool.ruff.format]
|
94
|
+
docstring-code-format = true
|
95
|
+
|
93
96
|
[tool.ruff.lint]
|
94
97
|
select = [
|
95
98
|
"A",
|
@@ -104,7 +107,7 @@ select = [
|
|
104
107
|
"W",
|
105
108
|
]
|
106
109
|
ignore = [
|
107
|
-
"E501", # line too long; if
|
110
|
+
"E501", # line too long; if formatter does its job, not worried about the rare long line
|
108
111
|
]
|
109
112
|
|
110
113
|
[tool.ruff.lint.pydocstyle]
|
@@ -13,28 +13,44 @@ It will:
|
|
13
13
|
Prerequisites:
|
14
14
|
- This must be run from the root of the repository.
|
15
15
|
- The repo must have a clean git working tree.
|
16
|
-
- The user must have the GITHUB_TOKEN environment variable set to a
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
- The user must have the `GITHUB_TOKEN` environment variable set to a
|
17
|
+
GitHub personal access token with repository "Contents" read and write
|
18
|
+
permission. To generate, see
|
19
|
+
https://github.com/settings/personal-access-tokens
|
20
|
+
- The user will need an API token for the PyPI repository, which the user
|
21
|
+
will be prompted for during the upload step. The user will need to paste
|
22
|
+
the token manually from a password manager or similar. To generate, see
|
23
|
+
https://pypi.org/manage/account/
|
24
|
+
- The CHANGELOG.md file must already contain an entry for the version being
|
25
|
+
released.
|
26
|
+
- Install requirements with: `pip install --upgrade --editable
|
27
|
+
'.[release]'`
|
20
28
|
|
21
29
|
"""
|
22
30
|
|
23
31
|
from __future__ import annotations
|
24
32
|
|
33
|
+
import contextlib
|
25
34
|
import os
|
26
35
|
import re
|
27
36
|
import subprocess
|
28
37
|
import sys
|
38
|
+
from collections.abc import Iterator
|
29
39
|
from pathlib import Path
|
30
40
|
|
31
41
|
import requests
|
32
42
|
|
33
43
|
|
34
|
-
|
44
|
+
@contextlib.contextmanager
|
45
|
+
def add_git_tag_for_version(version: str) -> Iterator[None]:
|
35
46
|
"""Add a git tag for the given version."""
|
36
47
|
subprocess.run(["git", "tag", "-a", version, "-m", version], check=True)
|
37
48
|
print(f"Version {version} tag added successfully.")
|
49
|
+
try:
|
50
|
+
yield
|
51
|
+
except:
|
52
|
+
subprocess.run(["git", "tag", "-d", version])
|
53
|
+
raise
|
38
54
|
|
39
55
|
|
40
56
|
def remove_previous_dist() -> None:
|
@@ -68,14 +84,16 @@ def verify_build(is_test: str) -> None:
|
|
68
84
|
confirmation = input("Does the build look correct? (y/n): ")
|
69
85
|
if confirmation == "y":
|
70
86
|
print("Build verified successfully.")
|
71
|
-
upload_build_to_pypi(is_test)
|
72
|
-
push_git_tags()
|
73
87
|
else:
|
74
88
|
raise Exception("Could not verify. Build was not uploaded.")
|
75
89
|
|
76
90
|
|
77
91
|
def generate_github_release_notes_body(token: str, version: str) -> str:
|
78
|
-
"""Generate and grab release notes URL from Github.
|
92
|
+
"""Generate and grab release notes URL from Github.
|
93
|
+
|
94
|
+
Delete their first paragraph, because we track its contents in a tighter
|
95
|
+
form in CHANGELOG.md. See `get_changelog_release_notes`.
|
96
|
+
"""
|
79
97
|
response = requests.post(
|
80
98
|
"https://api.github.com/repos/john-kurkowski/tldextract/releases/generate-notes",
|
81
99
|
headers={
|
@@ -94,24 +112,13 @@ def generate_github_release_notes_body(token: str, version: str) -> str:
|
|
94
112
|
file=sys.stderr,
|
95
113
|
)
|
96
114
|
return ""
|
97
|
-
return str(response.json()["body"])
|
98
115
|
|
116
|
+
body = str(response.json()["body"])
|
117
|
+
paragraphs = body.split("\n\n")
|
118
|
+
return "\n\n".join(paragraphs[1:])
|
99
119
|
|
100
|
-
def get_release_notes_url(body: str) -> str:
|
101
|
-
"""Parse the release notes content to get the changelog URL."""
|
102
|
-
url_pattern = re.compile(r"\*\*Full Changelog\*\*: (.*)$")
|
103
|
-
match = url_pattern.search(body)
|
104
|
-
if match:
|
105
|
-
return match.group(1)
|
106
|
-
else:
|
107
|
-
print(
|
108
|
-
"WARNING: Failed to parse release notes URL from GitHub response.",
|
109
|
-
file=sys.stderr,
|
110
|
-
)
|
111
|
-
return ""
|
112
120
|
|
113
|
-
|
114
|
-
def get_changelog_release_notes(release_notes_url: str, version: str) -> str:
|
121
|
+
def get_changelog_release_notes(version: str) -> str:
|
115
122
|
"""Get the changelog release notes.
|
116
123
|
|
117
124
|
Uses a regex starting on a heading beginning with the version number
|
@@ -125,25 +132,15 @@ def get_changelog_release_notes(release_notes_url: str, version: str) -> str:
|
|
125
132
|
if match:
|
126
133
|
return str(match.group(1)).strip()
|
127
134
|
else:
|
128
|
-
print(
|
129
|
-
f"WARNING: Failed to parse changelog release notes. Manually copy this version's notes from the CHANGELOG.md file to {release_notes_url}.",
|
130
|
-
file=sys.stderr,
|
131
|
-
)
|
132
135
|
return ""
|
133
136
|
|
134
137
|
|
135
|
-
def create_release_notes_body(token: str, version: str) -> str:
|
136
|
-
"""Compile the release notes."""
|
137
|
-
github_release_body = generate_github_release_notes_body(token, version)
|
138
|
-
release_notes_url = get_release_notes_url(github_release_body)
|
139
|
-
changelog_notes = get_changelog_release_notes(release_notes_url, version)
|
140
|
-
full_release_notes = f"{changelog_notes}\n\n**Full Changelog**: {release_notes_url}"
|
141
|
-
return full_release_notes
|
142
|
-
|
143
|
-
|
144
138
|
def create_github_release_draft(token: str, version: str) -> None:
|
145
139
|
"""Create a release on GitHub."""
|
146
|
-
|
140
|
+
github_release_body = generate_github_release_notes_body(token, version)
|
141
|
+
changelog_notes = get_changelog_release_notes(version)
|
142
|
+
release_body = f"{changelog_notes}\n\n{github_release_body}"
|
143
|
+
|
147
144
|
response = requests.post(
|
148
145
|
"https://api.github.com/repos/john-kurkowski/tldextract/releases",
|
149
146
|
headers={
|
@@ -168,7 +165,14 @@ def create_github_release_draft(token: str, version: str) -> None:
|
|
168
165
|
file=sys.stderr,
|
169
166
|
)
|
170
167
|
return
|
171
|
-
|
168
|
+
|
169
|
+
print(f"Release created successfully: {response.json()['html_url']}")
|
170
|
+
|
171
|
+
if not changelog_notes:
|
172
|
+
print(
|
173
|
+
"WARNING: Failed to parse changelog release notes. Manually copy this version's notes from the CHANGELOG.md file to the above URL.",
|
174
|
+
file=sys.stderr,
|
175
|
+
)
|
172
176
|
|
173
177
|
|
174
178
|
def upload_build_to_pypi(is_test: str) -> None:
|
@@ -227,10 +231,12 @@ def main() -> None:
|
|
227
231
|
is_test = get_is_test_response()
|
228
232
|
version_number = input("Enter the version number: ")
|
229
233
|
|
230
|
-
add_git_tag_for_version(version_number)
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
+
with add_git_tag_for_version(version_number):
|
235
|
+
remove_previous_dist()
|
236
|
+
create_build()
|
237
|
+
verify_build(is_test)
|
238
|
+
upload_build_to_pypi(is_test)
|
239
|
+
push_git_tags()
|
234
240
|
create_github_release_draft(github_token, version_number)
|
235
241
|
|
236
242
|
|
@@ -4,7 +4,6 @@ from __future__ import annotations
|
|
4
4
|
|
5
5
|
import logging
|
6
6
|
import os
|
7
|
-
import sys
|
8
7
|
import tempfile
|
9
8
|
from collections.abc import Sequence
|
10
9
|
from pathlib import Path
|
@@ -168,8 +167,7 @@ def test_looks_like_ipv6() -> None:
|
|
168
167
|
assert looks_like_ipv6("aBcD:ef01:2345:6789:aBcD:ef01:aaaa:2288") is True
|
169
168
|
assert looks_like_ipv6("aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1") is True
|
170
169
|
assert looks_like_ipv6("ZBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1") is False
|
171
|
-
|
172
|
-
assert looks_like_ipv6("aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.01") is False
|
170
|
+
assert looks_like_ipv6("aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.01") is False
|
173
171
|
assert looks_like_ipv6("aBcD:ef01:2345:6789:aBcD:") is False
|
174
172
|
|
175
173
|
|
@@ -417,6 +415,46 @@ def test_ipv4_lookalike() -> None:
|
|
417
415
|
)
|
418
416
|
|
419
417
|
|
418
|
+
def test_reverse_domain_name_notation() -> None:
|
419
|
+
"""Test property `reverse_domain_name`."""
|
420
|
+
assert (
|
421
|
+
tldextract.extract("www.example.com").reverse_domain_name == "com.example.www"
|
422
|
+
)
|
423
|
+
assert (
|
424
|
+
tldextract.extract("www.theregister.co.uk").reverse_domain_name
|
425
|
+
== "co.uk.theregister.www"
|
426
|
+
)
|
427
|
+
assert tldextract.extract("example.com").reverse_domain_name == "com.example"
|
428
|
+
assert (
|
429
|
+
tldextract.extract("theregister.co.uk").reverse_domain_name
|
430
|
+
== "co.uk.theregister"
|
431
|
+
)
|
432
|
+
assert (
|
433
|
+
tldextract.extract("media.forums.theregister.co.uk").reverse_domain_name
|
434
|
+
== "co.uk.theregister.forums.media"
|
435
|
+
)
|
436
|
+
assert (
|
437
|
+
tldextract.extract(
|
438
|
+
"foo.uk.com", include_psl_private_domains=False
|
439
|
+
).reverse_domain_name
|
440
|
+
== "com.uk.foo"
|
441
|
+
)
|
442
|
+
assert (
|
443
|
+
tldextract.extract(
|
444
|
+
"foo.uk.com", include_psl_private_domains=True
|
445
|
+
).reverse_domain_name
|
446
|
+
== "uk.com.foo"
|
447
|
+
)
|
448
|
+
|
449
|
+
|
450
|
+
def test_bad_kwargs_no_way_to_fetch() -> None:
|
451
|
+
"""Test an impossible combination of kwargs that disable all ways to fetch data."""
|
452
|
+
with pytest.raises(ValueError, match="disable all ways"):
|
453
|
+
tldextract.TLDExtract(
|
454
|
+
cache_dir=None, suffix_list_urls=(), fallback_to_snapshot=False
|
455
|
+
)
|
456
|
+
|
457
|
+
|
420
458
|
def test_cache_permission(
|
421
459
|
mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
|
422
460
|
) -> None:
|