tldextract 5.1.3__tar.gz → 5.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tldextract-5.1.3 → tldextract-5.3.0}/CHANGELOG.md +33 -1
- {tldextract-5.1.3 → tldextract-5.3.0}/LICENSE +1 -1
- {tldextract-5.1.3 → tldextract-5.3.0}/PKG-INFO +19 -14
- {tldextract-5.1.3 → tldextract-5.3.0}/README.md +16 -12
- {tldextract-5.1.3 → tldextract-5.3.0}/pyproject.toml +6 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/scripts/release.py +13 -5
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/cli_test.py +6 -2
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/custom_suffix_test.py +12 -5
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/main_test.py +138 -10
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/.tld_set_snapshot +984 -954
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/__init__.py +4 -2
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/_version.py +9 -4
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/cli.py +9 -1
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/remote.py +1 -1
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/tldextract.py +254 -62
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/PKG-INFO +19 -14
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/SOURCES.txt +0 -1
- tldextract-5.1.3/tests/integration_test.py +0 -13
- {tldextract-5.1.3 → tldextract-5.3.0}/.github/FUNDING.yml +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/.github/workflows/ci.yml +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/.gitignore +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/setup.cfg +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/__init__.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/__snapshots__/test_release.ambr +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/conftest.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_cache.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_parallel.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_release.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_trie.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/__main__.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/cache.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/py.typed +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/suffix_list.py +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/dependency_links.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/entry_points.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/requires.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/top_level.txt +0 -0
- {tldextract-5.1.3 → tldextract-5.3.0}/tox.ini +0 -0
@@ -3,6 +3,38 @@
|
|
3
3
|
After upgrading, update your cache file by deleting it or via `tldextract
|
4
4
|
--update`.
|
5
5
|
|
6
|
+
## 5.3.0 (2025-04-21)
|
7
|
+
|
8
|
+
* Features
|
9
|
+
* Add result field `registry_suffix` ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
|
10
|
+
* To complement the existing public suffix field `suffix`
|
11
|
+
* Add result property `top_domain_under_public_suffix` ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
|
12
|
+
* Add result property `top_domain_under_registry_suffix` ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
|
13
|
+
* Deprecate `registered_domain` property
|
14
|
+
* Use `top_domain_under_public_suffix` instead, which has the same behavior
|
15
|
+
but a more accurate name
|
16
|
+
* Bugfixes
|
17
|
+
* Fix missing `reverse_domain_name` property in CLI `--json` output ([`a545c67`](https://github.com/john-kurkowski/tldextract/commit/a545c67d87223616fc13e90692886b3ca9af18bb))
|
18
|
+
* Misc.
|
19
|
+
* Expand internal `suffix_index` return type to be richer than bools, and
|
20
|
+
include the registry suffix during trie traversal
|
21
|
+
([#344](https://github.com/john-kurkowski/tldextract/issues/344))
|
22
|
+
|
23
|
+
## 5.2.0 (2025-04-07)
|
24
|
+
|
25
|
+
* Features
|
26
|
+
* Add `reverse_domain_name` result property ([#342](https://github.com/john-kurkowski/tldextract/issues/342))
|
27
|
+
* Bugfixes
|
28
|
+
* Extend exported public interface with `ExtractResult` and `update` ([`36ff658`](https://github.com/john-kurkowski/tldextract/commit/36ff658c53b510c5d56f8af235c8b08ce3c512f5))
|
29
|
+
* These were always meant to be public. Eases user import.
|
30
|
+
* Docs
|
31
|
+
* Document result fields
|
32
|
+
* Note all return values
|
33
|
+
* Colocate usage in the usage section
|
34
|
+
* Link to private domain docs
|
35
|
+
* Misc.
|
36
|
+
* Update bundled snapshot
|
37
|
+
|
6
38
|
## 5.1.3 (2024-11-04)
|
7
39
|
|
8
40
|
* Bugfixes
|
@@ -10,7 +42,7 @@ After upgrading, update your cache file by deleting it or via `tldextract
|
|
10
42
|
* Drop support for EOL Python 3.8 ([#340](https://github.com/john-kurkowski/tldextract/issues/340))
|
11
43
|
* Support Python 3.13 ([#341](https://github.com/john-kurkowski/tldextract/issues/341))
|
12
44
|
* Update bundled snapshot
|
13
|
-
*
|
45
|
+
* Docs
|
14
46
|
* Clarify how to use your own definitions
|
15
47
|
* Clarify first-successful definitions vs. merged definitions
|
16
48
|
* Misc.
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: tldextract
|
3
|
-
Version: 5.
|
3
|
+
Version: 5.3.0
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Author-email: John Kurkowski <john.kurkowski@gmail.com>
|
6
6
|
License: BSD-3-Clause
|
@@ -37,6 +37,7 @@ Requires-Dist: tox; extra == "testing"
|
|
37
37
|
Requires-Dist: tox-uv; extra == "testing"
|
38
38
|
Requires-Dist: types-filelock; extra == "testing"
|
39
39
|
Requires-Dist: types-requests; extra == "testing"
|
40
|
+
Dynamic: license-file
|
40
41
|
|
41
42
|
# tldextract [](https://badge.fury.io/py/tldextract) [](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
|
42
43
|
|
@@ -89,14 +90,23 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
89
90
|
|
90
91
|
```python
|
91
92
|
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
|
92
|
-
>>> ext.
|
93
|
+
>>> ext.top_domain_under_public_suffix
|
93
94
|
'bbc.co.uk'
|
94
95
|
>>> ext.fqdn
|
95
96
|
'forums.bbc.co.uk'
|
96
97
|
```
|
97
98
|
|
99
|
+
In addition to the Python interface, there is a command-line interface. Split
|
100
|
+
the URL components by space:
|
101
|
+
|
102
|
+
```zsh
|
103
|
+
$ tldextract 'http://forums.bbc.co.uk'
|
104
|
+
forums bbc co.uk
|
105
|
+
```
|
106
|
+
|
98
107
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
99
|
-
You can optionally support the Public Suffix List's private
|
108
|
+
You can optionally support the Public Suffix List's [private
|
109
|
+
domains](#public-vs-private-domains) as well.
|
100
110
|
|
101
111
|
This package started by implementing the chosen answer from [this StackOverflow question on
|
102
112
|
getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
|
@@ -118,13 +128,6 @@ Or the latest dev version:
|
|
118
128
|
pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
|
119
129
|
```
|
120
130
|
|
121
|
-
Command-line usage, splits the URL components by space:
|
122
|
-
|
123
|
-
```zsh
|
124
|
-
tldextract http://forums.bbc.co.uk
|
125
|
-
# forums bbc co.uk
|
126
|
-
```
|
127
|
-
|
128
131
|
## Note about caching
|
129
132
|
|
130
133
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
@@ -188,15 +191,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
|
|
188
191
|
```
|
189
192
|
|
190
193
|
The following overrides this.
|
194
|
+
|
191
195
|
```python
|
192
196
|
>>> extract = tldextract.TLDExtract()
|
193
197
|
>>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
|
194
198
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
195
199
|
```
|
196
200
|
|
197
|
-
|
201
|
+
To change the default for all extract calls:
|
202
|
+
|
198
203
|
```python
|
199
|
-
>>> extract = tldextract.TLDExtract(
|
204
|
+
>>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
|
200
205
|
>>> extract('waiterrant.blogspot.com')
|
201
206
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
202
207
|
```
|
@@ -282,7 +287,7 @@ For example:
|
|
282
287
|
extractor = TLDExtract()
|
283
288
|
split_url = urllib.parse.urlsplit("https://foo.bar.com:8080")
|
284
289
|
split_suffix = extractor.extract_urllib(split_url)
|
285
|
-
url_to_crawl = f"{split_url.scheme}://{split_suffix.
|
290
|
+
url_to_crawl = f"{split_url.scheme}://{split_suffix.top_domain_under_public_suffix}:{split_url.port}"
|
286
291
|
```
|
287
292
|
|
288
293
|
`tldextract`'s lenient string parsing stance lowers the learning curve of using
|
@@ -49,14 +49,23 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
49
49
|
|
50
50
|
```python
|
51
51
|
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
|
52
|
-
>>> ext.
|
52
|
+
>>> ext.top_domain_under_public_suffix
|
53
53
|
'bbc.co.uk'
|
54
54
|
>>> ext.fqdn
|
55
55
|
'forums.bbc.co.uk'
|
56
56
|
```
|
57
57
|
|
58
|
+
In addition to the Python interface, there is a command-line interface. Split
|
59
|
+
the URL components by space:
|
60
|
+
|
61
|
+
```zsh
|
62
|
+
$ tldextract 'http://forums.bbc.co.uk'
|
63
|
+
forums bbc co.uk
|
64
|
+
```
|
65
|
+
|
58
66
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
59
|
-
You can optionally support the Public Suffix List's private
|
67
|
+
You can optionally support the Public Suffix List's [private
|
68
|
+
domains](#public-vs-private-domains) as well.
|
60
69
|
|
61
70
|
This package started by implementing the chosen answer from [this StackOverflow question on
|
62
71
|
getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
|
@@ -78,13 +87,6 @@ Or the latest dev version:
|
|
78
87
|
pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
|
79
88
|
```
|
80
89
|
|
81
|
-
Command-line usage, splits the URL components by space:
|
82
|
-
|
83
|
-
```zsh
|
84
|
-
tldextract http://forums.bbc.co.uk
|
85
|
-
# forums bbc co.uk
|
86
|
-
```
|
87
|
-
|
88
90
|
## Note about caching
|
89
91
|
|
90
92
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
@@ -148,15 +150,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
|
|
148
150
|
```
|
149
151
|
|
150
152
|
The following overrides this.
|
153
|
+
|
151
154
|
```python
|
152
155
|
>>> extract = tldextract.TLDExtract()
|
153
156
|
>>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
|
154
157
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
155
158
|
```
|
156
159
|
|
157
|
-
|
160
|
+
To change the default for all extract calls:
|
161
|
+
|
158
162
|
```python
|
159
|
-
>>> extract = tldextract.TLDExtract(
|
163
|
+
>>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
|
160
164
|
>>> extract('waiterrant.blogspot.com')
|
161
165
|
ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
|
162
166
|
```
|
@@ -242,7 +246,7 @@ For example:
|
|
242
246
|
extractor = TLDExtract()
|
243
247
|
split_url = urllib.parse.urlsplit("https://foo.bar.com:8080")
|
244
248
|
split_suffix = extractor.extract_urllib(split_url)
|
245
|
-
url_to_crawl = f"{split_url.scheme}://{split_suffix.
|
249
|
+
url_to_crawl = f"{split_url.scheme}://{split_suffix.top_domain_under_public_suffix}:{split_url.port}"
|
246
250
|
```
|
247
251
|
|
248
252
|
`tldextract`'s lenient string parsing stance lowers the learning curve of using
|
@@ -89,6 +89,12 @@ strict = true
|
|
89
89
|
|
90
90
|
[tool.pytest.ini_options]
|
91
91
|
addopts = "--doctest-modules"
|
92
|
+
filterwarnings = [
|
93
|
+
"ignore:The 'registered_domain' property is deprecated:DeprecationWarning:tldextract.*:"
|
94
|
+
]
|
95
|
+
|
96
|
+
[tool.ruff.format]
|
97
|
+
docstring-code-format = true
|
92
98
|
|
93
99
|
[tool.ruff.lint]
|
94
100
|
select = [
|
@@ -13,10 +13,18 @@ It will:
|
|
13
13
|
Prerequisites:
|
14
14
|
- This must be run from the root of the repository.
|
15
15
|
- The repo must have a clean git working tree.
|
16
|
-
- The user must have the GITHUB_TOKEN environment variable set to a
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
- The user must have the `GITHUB_TOKEN` environment variable set to a
|
17
|
+
GitHub personal access token with repository "Contents" read and write
|
18
|
+
permission. To generate, see
|
19
|
+
https://github.com/settings/personal-access-tokens
|
20
|
+
- The user will need an API token for the PyPI repository, which the user
|
21
|
+
will be prompted for during the upload step. The user will need to paste
|
22
|
+
the token manually from a password manager or similar. To generate, see
|
23
|
+
https://pypi.org/manage/account/
|
24
|
+
- The CHANGELOG.md file must already contain an entry for the version being
|
25
|
+
released.
|
26
|
+
- Install requirements with: `pip install --upgrade --editable
|
27
|
+
'.[release]'`
|
20
28
|
|
21
29
|
"""
|
22
30
|
|
@@ -158,7 +166,7 @@ def create_github_release_draft(token: str, version: str) -> None:
|
|
158
166
|
)
|
159
167
|
return
|
160
168
|
|
161
|
-
print(f
|
169
|
+
print(f"Release created successfully: {response.json()['html_url']}")
|
162
170
|
|
163
171
|
if not changelog_notes:
|
164
172
|
print(
|
@@ -77,12 +77,16 @@ def test_cli_json_output(
|
|
77
77
|
stdout, stderr = capsys.readouterr()
|
78
78
|
assert not stderr
|
79
79
|
assert json.loads(stdout) == {
|
80
|
-
"subdomain": "www",
|
81
80
|
"domain": "bbc",
|
82
|
-
"suffix": "co.uk",
|
83
81
|
"fqdn": "www.bbc.co.uk",
|
84
82
|
"ipv4": "",
|
85
83
|
"ipv6": "",
|
86
84
|
"is_private": False,
|
87
85
|
"registered_domain": "bbc.co.uk",
|
86
|
+
"registry_suffix": "co.uk",
|
87
|
+
"reverse_domain_name": "co.uk.bbc.www",
|
88
|
+
"subdomain": "www",
|
89
|
+
"suffix": "co.uk",
|
90
|
+
"top_domain_under_public_suffix": "bbc.co.uk",
|
91
|
+
"top_domain_under_registry_suffix": "bbc.co.uk",
|
88
92
|
}
|
@@ -32,12 +32,19 @@ def test_private_extraction() -> None:
|
|
32
32
|
"""Test this library's uncached, offline, private domain extraction."""
|
33
33
|
tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
|
34
34
|
|
35
|
-
assert tld("foo.blogspot.com") == ExtractResult(
|
35
|
+
assert tld("foo.blogspot.com") == ExtractResult(
|
36
|
+
subdomain="foo",
|
37
|
+
domain="blogspot",
|
38
|
+
suffix="com",
|
39
|
+
is_private=False,
|
40
|
+
registry_suffix="com",
|
41
|
+
)
|
36
42
|
assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
|
37
|
-
"",
|
38
|
-
"foo",
|
39
|
-
"blogspot.com",
|
40
|
-
True,
|
43
|
+
subdomain="",
|
44
|
+
domain="foo",
|
45
|
+
suffix="blogspot.com",
|
46
|
+
is_private=True,
|
47
|
+
registry_suffix="com",
|
41
48
|
)
|
42
49
|
|
43
50
|
|
@@ -374,6 +374,42 @@ def test_dns_root_label() -> None:
|
|
374
374
|
)
|
375
375
|
|
376
376
|
|
377
|
+
def test_top_domain_under_public_suffix() -> None:
|
378
|
+
"""Test property `top_domain_under_public_suffix`."""
|
379
|
+
assert (
|
380
|
+
tldextract.extract(
|
381
|
+
"http://www.example.auth.us-east-1.amazoncognito.com",
|
382
|
+
include_psl_private_domains=False,
|
383
|
+
).top_domain_under_public_suffix
|
384
|
+
== "amazoncognito.com"
|
385
|
+
)
|
386
|
+
assert (
|
387
|
+
tldextract.extract(
|
388
|
+
"http://www.example.auth.us-east-1.amazoncognito.com",
|
389
|
+
include_psl_private_domains=True,
|
390
|
+
).top_domain_under_public_suffix
|
391
|
+
== "example.auth.us-east-1.amazoncognito.com"
|
392
|
+
)
|
393
|
+
|
394
|
+
|
395
|
+
def test_top_domain_under_registry_suffix() -> None:
|
396
|
+
"""Test property `top_domain_under_registry_suffix`."""
|
397
|
+
assert (
|
398
|
+
tldextract.extract(
|
399
|
+
"http://www.example.auth.us-east-1.amazoncognito.com",
|
400
|
+
include_psl_private_domains=False,
|
401
|
+
).top_domain_under_registry_suffix
|
402
|
+
== "amazoncognito.com"
|
403
|
+
)
|
404
|
+
assert (
|
405
|
+
tldextract.extract(
|
406
|
+
"http://www.example.auth.us-east-1.amazoncognito.com",
|
407
|
+
include_psl_private_domains=True,
|
408
|
+
).top_domain_under_registry_suffix
|
409
|
+
== "amazoncognito.com"
|
410
|
+
)
|
411
|
+
|
412
|
+
|
377
413
|
def test_ipv4() -> None:
|
378
414
|
"""Test IPv4 addresses."""
|
379
415
|
assert_extract(
|
@@ -415,6 +451,46 @@ def test_ipv4_lookalike() -> None:
|
|
415
451
|
)
|
416
452
|
|
417
453
|
|
454
|
+
def test_reverse_domain_name_notation() -> None:
|
455
|
+
"""Test property `reverse_domain_name`."""
|
456
|
+
assert (
|
457
|
+
tldextract.extract("www.example.com").reverse_domain_name == "com.example.www"
|
458
|
+
)
|
459
|
+
assert (
|
460
|
+
tldextract.extract("www.theregister.co.uk").reverse_domain_name
|
461
|
+
== "co.uk.theregister.www"
|
462
|
+
)
|
463
|
+
assert tldextract.extract("example.com").reverse_domain_name == "com.example"
|
464
|
+
assert (
|
465
|
+
tldextract.extract("theregister.co.uk").reverse_domain_name
|
466
|
+
== "co.uk.theregister"
|
467
|
+
)
|
468
|
+
assert (
|
469
|
+
tldextract.extract("media.forums.theregister.co.uk").reverse_domain_name
|
470
|
+
== "co.uk.theregister.forums.media"
|
471
|
+
)
|
472
|
+
assert (
|
473
|
+
tldextract.extract(
|
474
|
+
"foo.uk.com", include_psl_private_domains=False
|
475
|
+
).reverse_domain_name
|
476
|
+
== "com.uk.foo"
|
477
|
+
)
|
478
|
+
assert (
|
479
|
+
tldextract.extract(
|
480
|
+
"foo.uk.com", include_psl_private_domains=True
|
481
|
+
).reverse_domain_name
|
482
|
+
== "uk.com.foo"
|
483
|
+
)
|
484
|
+
|
485
|
+
|
486
|
+
def test_bad_kwargs_no_way_to_fetch() -> None:
|
487
|
+
"""Test an impossible combination of kwargs that disable all ways to fetch data."""
|
488
|
+
with pytest.raises(ValueError, match="disable all ways"):
|
489
|
+
tldextract.TLDExtract(
|
490
|
+
cache_dir=None, suffix_list_urls=(), fallback_to_snapshot=False
|
491
|
+
)
|
492
|
+
|
493
|
+
|
418
494
|
def test_cache_permission(
|
419
495
|
mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
|
420
496
|
) -> None:
|
@@ -486,12 +562,22 @@ def test_include_psl_private_domain_attr() -> None:
|
|
486
562
|
extract_public1 = tldextract.TLDExtract()
|
487
563
|
extract_public2 = tldextract.TLDExtract(include_psl_private_domains=False)
|
488
564
|
assert extract_private("foo.uk.com") == ExtractResult(
|
489
|
-
subdomain="",
|
565
|
+
subdomain="",
|
566
|
+
domain="foo",
|
567
|
+
suffix="uk.com",
|
568
|
+
is_private=True,
|
569
|
+
registry_suffix="com",
|
490
570
|
)
|
491
571
|
assert (
|
492
572
|
extract_public1("foo.uk.com")
|
493
573
|
== extract_public2("foo.uk.com")
|
494
|
-
== ExtractResult(
|
574
|
+
== ExtractResult(
|
575
|
+
subdomain="foo",
|
576
|
+
domain="uk",
|
577
|
+
suffix="com",
|
578
|
+
is_private=False,
|
579
|
+
registry_suffix="com",
|
580
|
+
)
|
495
581
|
)
|
496
582
|
|
497
583
|
|
@@ -514,11 +600,21 @@ def test_global_extract() -> None:
|
|
514
600
|
"""
|
515
601
|
assert tldextract.extract(
|
516
602
|
"blogspot.com", include_psl_private_domains=True
|
517
|
-
) == ExtractResult(
|
603
|
+
) == ExtractResult(
|
604
|
+
subdomain="",
|
605
|
+
domain="",
|
606
|
+
suffix="blogspot.com",
|
607
|
+
is_private=True,
|
608
|
+
registry_suffix="com",
|
609
|
+
)
|
518
610
|
assert tldextract.extract(
|
519
611
|
"foo.blogspot.com", include_psl_private_domains=True
|
520
612
|
) == ExtractResult(
|
521
|
-
subdomain="",
|
613
|
+
subdomain="",
|
614
|
+
domain="foo",
|
615
|
+
suffix="blogspot.com",
|
616
|
+
is_private=True,
|
617
|
+
registry_suffix="com",
|
522
618
|
)
|
523
619
|
|
524
620
|
|
@@ -534,15 +630,26 @@ def test_private_domains_depth() -> None:
|
|
534
630
|
domain="amazonaws",
|
535
631
|
suffix="com",
|
536
632
|
is_private=False,
|
633
|
+
registry_suffix="com",
|
537
634
|
)
|
538
635
|
assert tldextract.extract(
|
539
636
|
"ap-south-1.amazonaws.com", include_psl_private_domains=True
|
540
637
|
) == ExtractResult(
|
541
|
-
subdomain="ap-south-1",
|
638
|
+
subdomain="ap-south-1",
|
639
|
+
domain="amazonaws",
|
640
|
+
suffix="com",
|
641
|
+
is_private=False,
|
642
|
+
registry_suffix="com",
|
542
643
|
)
|
543
644
|
assert tldextract.extract(
|
544
645
|
"amazonaws.com", include_psl_private_domains=True
|
545
|
-
) == ExtractResult(
|
646
|
+
) == ExtractResult(
|
647
|
+
subdomain="",
|
648
|
+
domain="amazonaws",
|
649
|
+
suffix="com",
|
650
|
+
is_private=False,
|
651
|
+
registry_suffix="com",
|
652
|
+
)
|
546
653
|
assert tldextract.extract(
|
547
654
|
"the-quick-brown-fox.cn-north-1.amazonaws.com.cn",
|
548
655
|
include_psl_private_domains=True,
|
@@ -551,16 +658,25 @@ def test_private_domains_depth() -> None:
|
|
551
658
|
domain="amazonaws",
|
552
659
|
suffix="com.cn",
|
553
660
|
is_private=False,
|
661
|
+
registry_suffix="com.cn",
|
554
662
|
)
|
555
663
|
assert tldextract.extract(
|
556
664
|
"cn-north-1.amazonaws.com.cn", include_psl_private_domains=True
|
557
665
|
) == ExtractResult(
|
558
|
-
subdomain="cn-north-1",
|
666
|
+
subdomain="cn-north-1",
|
667
|
+
domain="amazonaws",
|
668
|
+
suffix="com.cn",
|
669
|
+
is_private=False,
|
670
|
+
registry_suffix="com.cn",
|
559
671
|
)
|
560
672
|
assert tldextract.extract(
|
561
673
|
"amazonaws.com.cn", include_psl_private_domains=True
|
562
674
|
) == ExtractResult(
|
563
|
-
subdomain="",
|
675
|
+
subdomain="",
|
676
|
+
domain="amazonaws",
|
677
|
+
suffix="com.cn",
|
678
|
+
is_private=False,
|
679
|
+
registry_suffix="com.cn",
|
564
680
|
)
|
565
681
|
assert tldextract.extract(
|
566
682
|
"another.icann.compute.amazonaws.com", include_psl_private_domains=True
|
@@ -569,6 +685,7 @@ def test_private_domains_depth() -> None:
|
|
569
685
|
domain="another",
|
570
686
|
suffix="icann.compute.amazonaws.com",
|
571
687
|
is_private=True,
|
688
|
+
registry_suffix="com",
|
572
689
|
)
|
573
690
|
assert tldextract.extract(
|
574
691
|
"another.s3.dualstack.us-east-1.amazonaws.com", include_psl_private_domains=True
|
@@ -577,12 +694,17 @@ def test_private_domains_depth() -> None:
|
|
577
694
|
domain="another",
|
578
695
|
suffix="s3.dualstack.us-east-1.amazonaws.com",
|
579
696
|
is_private=True,
|
697
|
+
registry_suffix="com",
|
580
698
|
)
|
581
699
|
|
582
700
|
assert tldextract.extract(
|
583
701
|
"s3.ap-south-1.amazonaws.com", include_psl_private_domains=True
|
584
702
|
) == ExtractResult(
|
585
|
-
subdomain="",
|
703
|
+
subdomain="",
|
704
|
+
domain="",
|
705
|
+
suffix="s3.ap-south-1.amazonaws.com",
|
706
|
+
is_private=True,
|
707
|
+
registry_suffix="com",
|
586
708
|
)
|
587
709
|
assert tldextract.extract(
|
588
710
|
"s3.cn-north-1.amazonaws.com.cn", include_psl_private_domains=True
|
@@ -591,11 +713,16 @@ def test_private_domains_depth() -> None:
|
|
591
713
|
domain="",
|
592
714
|
suffix="s3.cn-north-1.amazonaws.com.cn",
|
593
715
|
is_private=True,
|
716
|
+
registry_suffix="com.cn",
|
594
717
|
)
|
595
718
|
assert tldextract.extract(
|
596
719
|
"icann.compute.amazonaws.com", include_psl_private_domains=True
|
597
720
|
) == ExtractResult(
|
598
|
-
subdomain="",
|
721
|
+
subdomain="",
|
722
|
+
domain="",
|
723
|
+
suffix="icann.compute.amazonaws.com",
|
724
|
+
is_private=True,
|
725
|
+
registry_suffix="com",
|
599
726
|
)
|
600
727
|
|
601
728
|
# Entire URL is private suffix which ends with another private suffix
|
@@ -607,4 +734,5 @@ def test_private_domains_depth() -> None:
|
|
607
734
|
domain="",
|
608
735
|
suffix="s3.dualstack.us-east-1.amazonaws.com",
|
609
736
|
is_private=True,
|
737
|
+
registry_suffix="com",
|
610
738
|
)
|