tldextract 5.1.3__tar.gz → 5.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tldextract-5.1.3 → tldextract-5.2.0}/CHANGELOG.md +16 -1
- {tldextract-5.1.3 → tldextract-5.2.0}/LICENSE +1 -1
- {tldextract-5.1.3 → tldextract-5.2.0}/PKG-INFO +17 -12
- {tldextract-5.1.3 → tldextract-5.2.0}/README.md +14 -10
- {tldextract-5.1.3 → tldextract-5.2.0}/pyproject.toml +3 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/scripts/release.py +13 -5
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/main_test.py +40 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/.tld_set_snapshot +984 -954
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/__init__.py +4 -2
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/_version.py +9 -4
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/remote.py +1 -1
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/tldextract.py +88 -30
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract.egg-info/PKG-INFO +17 -12
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract.egg-info/SOURCES.txt +0 -1
- tldextract-5.1.3/tests/integration_test.py +0 -13
- {tldextract-5.1.3 → tldextract-5.2.0}/.github/FUNDING.yml +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/.github/workflows/ci.yml +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/.gitignore +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/setup.cfg +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/__init__.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/__snapshots__/test_release.ambr +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/cli_test.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/conftest.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/custom_suffix_test.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/test_cache.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/test_parallel.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/test_release.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tests/test_trie.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/__main__.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/cache.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/cli.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/py.typed +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract/suffix_list.py +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract.egg-info/dependency_links.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract.egg-info/entry_points.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract.egg-info/requires.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tldextract.egg-info/top_level.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.2.0}/tox.ini +0 -0
@@ -3,6 +3,21 @@
|
|
3
3
|
After upgrading, update your cache file by deleting it or via `tldextract
|
4
4
|
--update`.
|
5
5
|
|
6
|
+
## 5.2.0 (2025-04-07)
|
7
|
+
|
8
|
+
* Features
|
9
|
+
* Add `reverse_domain_name` result property ([#342](https://github.com/john-kurkowski/tldextract/issues/342))
|
10
|
+
* Bugfixes
|
11
|
+
* Extend exported public interface with `ExtractResult` and `update` ([`36ff658`](https://github.com/john-kurkowski/tldextract/commit/36ff658c53b510c5d56f8af235c8b08ce3c512f5))
|
12
|
+
* These were always meant to be public. Eases user import.
|
13
|
+
* Docs
|
14
|
+
* Document result fields
|
15
|
+
* Note all return values
|
16
|
+
* Colocate usage in the usage section
|
17
|
+
* Link to private domain docs
|
18
|
+
* Misc.
|
19
|
+
* Update bundled snapshot
|
20
|
+
|
6
21
|
## 5.1.3 (2024-11-04)
|
7
22
|
|
8
23
|
* Bugfixes
|
@@ -10,7 +25,7 @@ After upgrading, update your cache file by deleting it or via `tldextract
|
|
10
25
|
* Drop support for EOL Python 3.8 ([#340](https://github.com/john-kurkowski/tldextract/issues/340))
|
11
26
|
* Support Python 3.13 ([#341](https://github.com/john-kurkowski/tldextract/issues/341))
|
12
27
|
* Update bundled snapshot
|
13
|
-
*
|
28
|
+
* Docs
|
14
29
|
* Clarify how to use your own definitions
|
15
30
|
* Clarify first-successful definitions vs. merged definitions
|
16
31
|
* Misc.
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: tldextract
|
3
|
-
Version: 5.
|
3
|
+
Version: 5.2.0
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Author-email: John Kurkowski <john.kurkowski@gmail.com>
|
6
6
|
License: BSD-3-Clause
|
@@ -37,6 +37,7 @@ Requires-Dist: tox; extra == "testing"
|
|
37
37
|
Requires-Dist: tox-uv; extra == "testing"
|
38
38
|
Requires-Dist: types-filelock; extra == "testing"
|
39
39
|
Requires-Dist: types-requests; extra == "testing"
|
40
|
+
Dynamic: license-file
|
40
41
|
|
41
42
|
# tldextract [](https://badge.fury.io/py/tldextract) [](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
|
42
43
|
|
@@ -95,8 +96,17 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
95
96
|
'forums.bbc.co.uk'
|
96
97
|
```
|
97
98
|
|
99
|
+
In addition to the Python interface, there is a command-line interface. Split
|
100
|
+
the URL components by space:
|
101
|
+
|
102
|
+
```zsh
|
103
|
+
$ tldextract 'http://forums.bbc.co.uk'
|
104
|
+
forums bbc co.uk
|
105
|
+
```
|
106
|
+
|
98
107
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
99
|
-
You can optionally support the Public Suffix List's private
|
108
|
+
You can optionally support the Public Suffix List's [private
|
109
|
+
domains](#public-vs-private-domains) as well.
|
100
110
|
|
101
111
|
This package started by implementing the chosen answer from [this StackOverflow question on
|
102
112
|
getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
|
@@ -118,13 +128,6 @@ Or the latest dev version:
|
|
118
128
|
pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
|
119
129
|
```
|
120
130
|
|
121
|
-
Command-line usage, splits the URL components by space:
|
122
|
-
|
123
|
-
```zsh
|
124
|
-
tldextract http://forums.bbc.co.uk
|
125
|
-
# forums bbc co.uk
|
126
|
-
```
|
127
|
-
|
128
131
|
## Note about caching
|
129
132
|
|
130
133
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
@@ -188,15 +191,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
|
|
188
191
|
```
|
189
192
|
|
190
193
|
The following overrides this.
|
194
|
+
|
191
195
|
```python
|
192
196
|
>>> extract = tldextract.TLDExtract()
|
193
197
|
>>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
|
194
198
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
195
199
|
```
|
196
200
|
|
197
|
-
|
201
|
+
To change the default for all extract calls:
|
202
|
+
|
198
203
|
```python
|
199
|
-
>>> extract = tldextract.TLDExtract(
|
204
|
+
>>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
|
200
205
|
>>> extract('waiterrant.blogspot.com')
|
201
206
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
202
207
|
```
|
@@ -55,8 +55,17 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
55
55
|
'forums.bbc.co.uk'
|
56
56
|
```
|
57
57
|
|
58
|
+
In addition to the Python interface, there is a command-line interface. Split
|
59
|
+
the URL components by space:
|
60
|
+
|
61
|
+
```zsh
|
62
|
+
$ tldextract 'http://forums.bbc.co.uk'
|
63
|
+
forums bbc co.uk
|
64
|
+
```
|
65
|
+
|
58
66
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
59
|
-
You can optionally support the Public Suffix List's private
|
67
|
+
You can optionally support the Public Suffix List's [private
|
68
|
+
domains](#public-vs-private-domains) as well.
|
60
69
|
|
61
70
|
This package started by implementing the chosen answer from [this StackOverflow question on
|
62
71
|
getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
|
@@ -78,13 +87,6 @@ Or the latest dev version:
|
|
78
87
|
pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
|
79
88
|
```
|
80
89
|
|
81
|
-
Command-line usage, splits the URL components by space:
|
82
|
-
|
83
|
-
```zsh
|
84
|
-
tldextract http://forums.bbc.co.uk
|
85
|
-
# forums bbc co.uk
|
86
|
-
```
|
87
|
-
|
88
90
|
## Note about caching
|
89
91
|
|
90
92
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
@@ -148,15 +150,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
|
|
148
150
|
```
|
149
151
|
|
150
152
|
The following overrides this.
|
153
|
+
|
151
154
|
```python
|
152
155
|
>>> extract = tldextract.TLDExtract()
|
153
156
|
>>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
|
154
157
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
155
158
|
```
|
156
159
|
|
157
|
-
|
160
|
+
To change the default for all extract calls:
|
161
|
+
|
158
162
|
```python
|
159
|
-
>>> extract = tldextract.TLDExtract(
|
163
|
+
>>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
|
160
164
|
>>> extract('waiterrant.blogspot.com')
|
161
165
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
162
166
|
```
|
@@ -13,10 +13,18 @@ It will:
|
|
13
13
|
Prerequisites:
|
14
14
|
- This must be run from the root of the repository.
|
15
15
|
- The repo must have a clean git working tree.
|
16
|
-
- The user must have the GITHUB_TOKEN environment variable set to a
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
- The user must have the `GITHUB_TOKEN` environment variable set to a
|
17
|
+
GitHub personal access token with repository "Contents" read and write
|
18
|
+
permission. To generate, see
|
19
|
+
https://github.com/settings/personal-access-tokens
|
20
|
+
- The user will need an API token for the PyPI repository, which the user
|
21
|
+
will be prompted for during the upload step. The user will need to paste
|
22
|
+
the token manually from a password manager or similar. To generate, see
|
23
|
+
https://pypi.org/manage/account/
|
24
|
+
- The CHANGELOG.md file must already contain an entry for the version being
|
25
|
+
released.
|
26
|
+
- Install requirements with: `pip install --upgrade --editable
|
27
|
+
'.[release]'`
|
20
28
|
|
21
29
|
"""
|
22
30
|
|
@@ -158,7 +166,7 @@ def create_github_release_draft(token: str, version: str) -> None:
|
|
158
166
|
)
|
159
167
|
return
|
160
168
|
|
161
|
-
print(f
|
169
|
+
print(f"Release created successfully: {response.json()['html_url']}")
|
162
170
|
|
163
171
|
if not changelog_notes:
|
164
172
|
print(
|
@@ -415,6 +415,46 @@ def test_ipv4_lookalike() -> None:
|
|
415
415
|
)
|
416
416
|
|
417
417
|
|
418
|
+
def test_reverse_domain_name_notation() -> None:
|
419
|
+
"""Test property `reverse_domain_name`."""
|
420
|
+
assert (
|
421
|
+
tldextract.extract("www.example.com").reverse_domain_name == "com.example.www"
|
422
|
+
)
|
423
|
+
assert (
|
424
|
+
tldextract.extract("www.theregister.co.uk").reverse_domain_name
|
425
|
+
== "co.uk.theregister.www"
|
426
|
+
)
|
427
|
+
assert tldextract.extract("example.com").reverse_domain_name == "com.example"
|
428
|
+
assert (
|
429
|
+
tldextract.extract("theregister.co.uk").reverse_domain_name
|
430
|
+
== "co.uk.theregister"
|
431
|
+
)
|
432
|
+
assert (
|
433
|
+
tldextract.extract("media.forums.theregister.co.uk").reverse_domain_name
|
434
|
+
== "co.uk.theregister.forums.media"
|
435
|
+
)
|
436
|
+
assert (
|
437
|
+
tldextract.extract(
|
438
|
+
"foo.uk.com", include_psl_private_domains=False
|
439
|
+
).reverse_domain_name
|
440
|
+
== "com.uk.foo"
|
441
|
+
)
|
442
|
+
assert (
|
443
|
+
tldextract.extract(
|
444
|
+
"foo.uk.com", include_psl_private_domains=True
|
445
|
+
).reverse_domain_name
|
446
|
+
== "uk.com.foo"
|
447
|
+
)
|
448
|
+
|
449
|
+
|
450
|
+
def test_bad_kwargs_no_way_to_fetch() -> None:
|
451
|
+
"""Test an impossible combination of kwargs that disable all ways to fetch data."""
|
452
|
+
with pytest.raises(ValueError, match="disable all ways"):
|
453
|
+
tldextract.TLDExtract(
|
454
|
+
cache_dir=None, suffix_list_urls=(), fallback_to_snapshot=False
|
455
|
+
)
|
456
|
+
|
457
|
+
|
418
458
|
def test_cache_permission(
|
419
459
|
mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
|
420
460
|
) -> None:
|