tldextract 5.1.3__tar.gz → 5.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {tldextract-5.1.3 → tldextract-5.3.0}/CHANGELOG.md +33 -1
  2. {tldextract-5.1.3 → tldextract-5.3.0}/LICENSE +1 -1
  3. {tldextract-5.1.3 → tldextract-5.3.0}/PKG-INFO +19 -14
  4. {tldextract-5.1.3 → tldextract-5.3.0}/README.md +16 -12
  5. {tldextract-5.1.3 → tldextract-5.3.0}/pyproject.toml +6 -0
  6. {tldextract-5.1.3 → tldextract-5.3.0}/scripts/release.py +13 -5
  7. {tldextract-5.1.3 → tldextract-5.3.0}/tests/cli_test.py +6 -2
  8. {tldextract-5.1.3 → tldextract-5.3.0}/tests/custom_suffix_test.py +12 -5
  9. {tldextract-5.1.3 → tldextract-5.3.0}/tests/main_test.py +138 -10
  10. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/.tld_set_snapshot +984 -954
  11. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/__init__.py +4 -2
  12. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/_version.py +9 -4
  13. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/cli.py +9 -1
  14. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/remote.py +1 -1
  15. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/tldextract.py +254 -62
  16. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/PKG-INFO +19 -14
  17. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/SOURCES.txt +0 -1
  18. tldextract-5.1.3/tests/integration_test.py +0 -13
  19. {tldextract-5.1.3 → tldextract-5.3.0}/.github/FUNDING.yml +0 -0
  20. {tldextract-5.1.3 → tldextract-5.3.0}/.github/workflows/ci.yml +0 -0
  21. {tldextract-5.1.3 → tldextract-5.3.0}/.gitignore +0 -0
  22. {tldextract-5.1.3 → tldextract-5.3.0}/setup.cfg +0 -0
  23. {tldextract-5.1.3 → tldextract-5.3.0}/tests/__init__.py +0 -0
  24. {tldextract-5.1.3 → tldextract-5.3.0}/tests/__snapshots__/test_release.ambr +0 -0
  25. {tldextract-5.1.3 → tldextract-5.3.0}/tests/conftest.py +0 -0
  26. {tldextract-5.1.3 → tldextract-5.3.0}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
  27. {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_cache.py +0 -0
  28. {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_parallel.py +0 -0
  29. {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_release.py +0 -0
  30. {tldextract-5.1.3 → tldextract-5.3.0}/tests/test_trie.py +0 -0
  31. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/__main__.py +0 -0
  32. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/cache.py +0 -0
  33. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/py.typed +0 -0
  34. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract/suffix_list.py +0 -0
  35. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/dependency_links.txt +0 -0
  36. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/entry_points.txt +0 -0
  37. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/requires.txt +0 -0
  38. {tldextract-5.1.3 → tldextract-5.3.0}/tldextract.egg-info/top_level.txt +0 -0
  39. {tldextract-5.1.3 → tldextract-5.3.0}/tox.ini +0 -0
@@ -3,6 +3,38 @@
3
3
  After upgrading, update your cache file by deleting it or via `tldextract
4
4
  --update`.
5
5
 
6
+ ## 5.3.0 (2025-04-21)
7
+
8
+ * Features
9
+ * Add result field `registry_suffix` ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
10
+ * To complement the existing public suffix field `suffix`
11
+ * Add result property `top_domain_under_public_suffix` ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
12
+ * Add result property `top_domain_under_registry_suffix` ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
13
+ * Deprecate `registered_domain` property
14
+ * Use `top_domain_under_public_suffix` instead, which has the same behavior
15
+ but a more accurate name
16
+ * Bugfixes
17
+ * Fix missing `reverse_domain_name` property in CLI `--json` output ([`a545c67`](https://github.com/john-kurkowski/tldextract/commit/a545c67d87223616fc13e90692886b3ca9af18bb))
18
+ * Misc.
19
+ * Expand internal `suffix_index` return type to be richer than bools, and
20
+ include the registry suffix during trie traversal
21
+ ([#344](https://github.com/john-kurkowski/tldextract/issues/344))
22
+
23
+ ## 5.2.0 (2025-04-07)
24
+
25
+ * Features
26
+ * Add `reverse_domain_name` result property ([#342](https://github.com/john-kurkowski/tldextract/issues/342))
27
+ * Bugfixes
28
+ * Extend exported public interface with `ExtractResult` and `update` ([`36ff658`](https://github.com/john-kurkowski/tldextract/commit/36ff658c53b510c5d56f8af235c8b08ce3c512f5))
29
+ * These were always meant to be public. Eases user import.
30
+ * Docs
31
+ * Document result fields
32
+ * Note all return values
33
+ * Colocate usage in the usage section
34
+ * Link to private domain docs
35
+ * Misc.
36
+ * Update bundled snapshot
37
+
6
38
  ## 5.1.3 (2024-11-04)
7
39
 
8
40
  * Bugfixes
@@ -10,7 +42,7 @@ After upgrading, update your cache file by deleting it or via `tldextract
10
42
  * Drop support for EOL Python 3.8 ([#340](https://github.com/john-kurkowski/tldextract/issues/340))
11
43
  * Support Python 3.13 ([#341](https://github.com/john-kurkowski/tldextract/issues/341))
12
44
  * Update bundled snapshot
13
- * Documentation
45
+ * Docs
14
46
  * Clarify how to use your own definitions
15
47
  * Clarify first-successful definitions vs. merged definitions
16
48
  * Misc.
@@ -1,6 +1,6 @@
1
1
  BSD 3-Clause License
2
2
 
3
- Copyright (c) 2013-2024, John Kurkowski
3
+ Copyright (c) 2013-2025, John Kurkowski
4
4
  All rights reserved.
5
5
 
6
6
  Redistribution and use in source and binary forms, with or without
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: tldextract
3
- Version: 5.1.3
3
+ Version: 5.3.0
4
4
  Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
5
5
  Author-email: John Kurkowski <john.kurkowski@gmail.com>
6
6
  License: BSD-3-Clause
@@ -37,6 +37,7 @@ Requires-Dist: tox; extra == "testing"
37
37
  Requires-Dist: tox-uv; extra == "testing"
38
38
  Requires-Dist: types-filelock; extra == "testing"
39
39
  Requires-Dist: types-requests; extra == "testing"
40
+ Dynamic: license-file
40
41
 
41
42
  # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml/badge.svg)](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
42
43
 
@@ -89,14 +90,23 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
89
90
 
90
91
  ```python
91
92
  >>> ext = tldextract.extract('http://forums.bbc.co.uk')
92
- >>> ext.registered_domain
93
+ >>> ext.top_domain_under_public_suffix
93
94
  'bbc.co.uk'
94
95
  >>> ext.fqdn
95
96
  'forums.bbc.co.uk'
96
97
  ```
97
98
 
99
+ In addition to the Python interface, there is a command-line interface. Split
100
+ the URL components by space:
101
+
102
+ ```zsh
103
+ $ tldextract 'http://forums.bbc.co.uk'
104
+ forums bbc co.uk
105
+ ```
106
+
98
107
  By default, this package supports the public ICANN TLDs and their exceptions.
99
- You can optionally support the Public Suffix List's private domains as well.
108
+ You can optionally support the Public Suffix List's [private
109
+ domains](#public-vs-private-domains) as well.
100
110
 
101
111
  This package started by implementing the chosen answer from [this StackOverflow question on
102
112
  getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
@@ -118,13 +128,6 @@ Or the latest dev version:
118
128
  pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
119
129
  ```
120
130
 
121
- Command-line usage, splits the URL components by space:
122
-
123
- ```zsh
124
- tldextract http://forums.bbc.co.uk
125
- # forums bbc co.uk
126
- ```
127
-
128
131
  ## Note about caching
129
132
 
130
133
  Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
@@ -188,15 +191,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
188
191
  ```
189
192
 
190
193
  The following overrides this.
194
+
191
195
  ```python
192
196
  >>> extract = tldextract.TLDExtract()
193
197
  >>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
194
198
  ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
195
199
  ```
196
200
 
197
- or to change the default for all extract calls,
201
+ To change the default for all extract calls:
202
+
198
203
  ```python
199
- >>> extract = tldextract.TLDExtract( include_psl_private_domains=True)
204
+ >>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
200
205
  >>> extract('waiterrant.blogspot.com')
201
206
  ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
202
207
  ```
@@ -282,7 +287,7 @@ For example:
282
287
  extractor = TLDExtract()
283
288
  split_url = urllib.parse.urlsplit("https://foo.bar.com:8080")
284
289
  split_suffix = extractor.extract_urllib(split_url)
285
- url_to_crawl = f"{split_url.scheme}://{split_suffix.registered_domain}:{split_url.port}"
290
+ url_to_crawl = f"{split_url.scheme}://{split_suffix.top_domain_under_public_suffix}:{split_url.port}"
286
291
  ```
287
292
 
288
293
  `tldextract`'s lenient string parsing stance lowers the learning curve of using
@@ -49,14 +49,23 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
49
49
 
50
50
  ```python
51
51
  >>> ext = tldextract.extract('http://forums.bbc.co.uk')
52
- >>> ext.registered_domain
52
+ >>> ext.top_domain_under_public_suffix
53
53
  'bbc.co.uk'
54
54
  >>> ext.fqdn
55
55
  'forums.bbc.co.uk'
56
56
  ```
57
57
 
58
+ In addition to the Python interface, there is a command-line interface. Split
59
+ the URL components by space:
60
+
61
+ ```zsh
62
+ $ tldextract 'http://forums.bbc.co.uk'
63
+ forums bbc co.uk
64
+ ```
65
+
58
66
  By default, this package supports the public ICANN TLDs and their exceptions.
59
- You can optionally support the Public Suffix List's private domains as well.
67
+ You can optionally support the Public Suffix List's [private
68
+ domains](#public-vs-private-domains) as well.
60
69
 
61
70
  This package started by implementing the chosen answer from [this StackOverflow question on
62
71
  getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
@@ -78,13 +87,6 @@ Or the latest dev version:
78
87
  pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
79
88
  ```
80
89
 
81
- Command-line usage, splits the URL components by space:
82
-
83
- ```zsh
84
- tldextract http://forums.bbc.co.uk
85
- # forums bbc co.uk
86
- ```
87
-
88
90
  ## Note about caching
89
91
 
90
92
  Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
@@ -148,15 +150,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
148
150
  ```
149
151
 
150
152
  The following overrides this.
153
+
151
154
  ```python
152
155
  >>> extract = tldextract.TLDExtract()
153
156
  >>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
154
157
  ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
155
158
  ```
156
159
 
157
- or to change the default for all extract calls,
160
+ To change the default for all extract calls:
161
+
158
162
  ```python
159
- >>> extract = tldextract.TLDExtract( include_psl_private_domains=True)
163
+ >>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
160
164
  >>> extract('waiterrant.blogspot.com')
161
165
  ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
162
166
  ```
@@ -242,7 +246,7 @@ For example:
242
246
  extractor = TLDExtract()
243
247
  split_url = urllib.parse.urlsplit("https://foo.bar.com:8080")
244
248
  split_suffix = extractor.extract_urllib(split_url)
245
- url_to_crawl = f"{split_url.scheme}://{split_suffix.registered_domain}:{split_url.port}"
249
+ url_to_crawl = f"{split_url.scheme}://{split_suffix.top_domain_under_public_suffix}:{split_url.port}"
246
250
  ```
247
251
 
248
252
  `tldextract`'s lenient string parsing stance lowers the learning curve of using
@@ -89,6 +89,12 @@ strict = true
89
89
 
90
90
  [tool.pytest.ini_options]
91
91
  addopts = "--doctest-modules"
92
+ filterwarnings = [
93
+ "ignore:The 'registered_domain' property is deprecated:DeprecationWarning:tldextract.*:"
94
+ ]
95
+
96
+ [tool.ruff.format]
97
+ docstring-code-format = true
92
98
 
93
99
  [tool.ruff.lint]
94
100
  select = [
@@ -13,10 +13,18 @@ It will:
13
13
  Prerequisites:
14
14
  - This must be run from the root of the repository.
15
15
  - The repo must have a clean git working tree.
16
- - The user must have the GITHUB_TOKEN environment variable set to a GitHub personal access token with repository "Contents" read and write permission.
17
- - The user will need credentials for the PyPI repository, which the user will be prompted for during the upload step. The user will need to paste the token manually from a password manager or similar.
18
- - The CHANGELOG.md file must already contain an entry for the version being released.
19
- - Install requirements with: pip install --upgrade --editable '.[release]'
16
+ - The user must have the `GITHUB_TOKEN` environment variable set to a
17
+ GitHub personal access token with repository "Contents" read and write
18
+ permission. To generate, see
19
+ https://github.com/settings/personal-access-tokens
20
+ - The user will need an API token for the PyPI repository, which the user
21
+ will be prompted for during the upload step. The user will need to paste
22
+ the token manually from a password manager or similar. To generate, see
23
+ https://pypi.org/manage/account/
24
+ - The CHANGELOG.md file must already contain an entry for the version being
25
+ released.
26
+ - Install requirements with: `pip install --upgrade --editable
27
+ '.[release]'`
20
28
 
21
29
  """
22
30
 
@@ -158,7 +166,7 @@ def create_github_release_draft(token: str, version: str) -> None:
158
166
  )
159
167
  return
160
168
 
161
- print(f'Release created successfully: {response.json()["html_url"]}')
169
+ print(f"Release created successfully: {response.json()['html_url']}")
162
170
 
163
171
  if not changelog_notes:
164
172
  print(
@@ -77,12 +77,16 @@ def test_cli_json_output(
77
77
  stdout, stderr = capsys.readouterr()
78
78
  assert not stderr
79
79
  assert json.loads(stdout) == {
80
- "subdomain": "www",
81
80
  "domain": "bbc",
82
- "suffix": "co.uk",
83
81
  "fqdn": "www.bbc.co.uk",
84
82
  "ipv4": "",
85
83
  "ipv6": "",
86
84
  "is_private": False,
87
85
  "registered_domain": "bbc.co.uk",
86
+ "registry_suffix": "co.uk",
87
+ "reverse_domain_name": "co.uk.bbc.www",
88
+ "subdomain": "www",
89
+ "suffix": "co.uk",
90
+ "top_domain_under_public_suffix": "bbc.co.uk",
91
+ "top_domain_under_registry_suffix": "bbc.co.uk",
88
92
  }
@@ -32,12 +32,19 @@ def test_private_extraction() -> None:
32
32
  """Test this library's uncached, offline, private domain extraction."""
33
33
  tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
34
34
 
35
- assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
35
+ assert tld("foo.blogspot.com") == ExtractResult(
36
+ subdomain="foo",
37
+ domain="blogspot",
38
+ suffix="com",
39
+ is_private=False,
40
+ registry_suffix="com",
41
+ )
36
42
  assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
37
- "",
38
- "foo",
39
- "blogspot.com",
40
- True,
43
+ subdomain="",
44
+ domain="foo",
45
+ suffix="blogspot.com",
46
+ is_private=True,
47
+ registry_suffix="com",
41
48
  )
42
49
 
43
50
 
@@ -374,6 +374,42 @@ def test_dns_root_label() -> None:
374
374
  )
375
375
 
376
376
 
377
+ def test_top_domain_under_public_suffix() -> None:
378
+ """Test property `top_domain_under_public_suffix`."""
379
+ assert (
380
+ tldextract.extract(
381
+ "http://www.example.auth.us-east-1.amazoncognito.com",
382
+ include_psl_private_domains=False,
383
+ ).top_domain_under_public_suffix
384
+ == "amazoncognito.com"
385
+ )
386
+ assert (
387
+ tldextract.extract(
388
+ "http://www.example.auth.us-east-1.amazoncognito.com",
389
+ include_psl_private_domains=True,
390
+ ).top_domain_under_public_suffix
391
+ == "example.auth.us-east-1.amazoncognito.com"
392
+ )
393
+
394
+
395
+ def test_top_domain_under_registry_suffix() -> None:
396
+ """Test property `top_domain_under_registry_suffix`."""
397
+ assert (
398
+ tldextract.extract(
399
+ "http://www.example.auth.us-east-1.amazoncognito.com",
400
+ include_psl_private_domains=False,
401
+ ).top_domain_under_registry_suffix
402
+ == "amazoncognito.com"
403
+ )
404
+ assert (
405
+ tldextract.extract(
406
+ "http://www.example.auth.us-east-1.amazoncognito.com",
407
+ include_psl_private_domains=True,
408
+ ).top_domain_under_registry_suffix
409
+ == "amazoncognito.com"
410
+ )
411
+
412
+
377
413
  def test_ipv4() -> None:
378
414
  """Test IPv4 addresses."""
379
415
  assert_extract(
@@ -415,6 +451,46 @@ def test_ipv4_lookalike() -> None:
415
451
  )
416
452
 
417
453
 
454
+ def test_reverse_domain_name_notation() -> None:
455
+ """Test property `reverse_domain_name`."""
456
+ assert (
457
+ tldextract.extract("www.example.com").reverse_domain_name == "com.example.www"
458
+ )
459
+ assert (
460
+ tldextract.extract("www.theregister.co.uk").reverse_domain_name
461
+ == "co.uk.theregister.www"
462
+ )
463
+ assert tldextract.extract("example.com").reverse_domain_name == "com.example"
464
+ assert (
465
+ tldextract.extract("theregister.co.uk").reverse_domain_name
466
+ == "co.uk.theregister"
467
+ )
468
+ assert (
469
+ tldextract.extract("media.forums.theregister.co.uk").reverse_domain_name
470
+ == "co.uk.theregister.forums.media"
471
+ )
472
+ assert (
473
+ tldextract.extract(
474
+ "foo.uk.com", include_psl_private_domains=False
475
+ ).reverse_domain_name
476
+ == "com.uk.foo"
477
+ )
478
+ assert (
479
+ tldextract.extract(
480
+ "foo.uk.com", include_psl_private_domains=True
481
+ ).reverse_domain_name
482
+ == "uk.com.foo"
483
+ )
484
+
485
+
486
+ def test_bad_kwargs_no_way_to_fetch() -> None:
487
+ """Test an impossible combination of kwargs that disable all ways to fetch data."""
488
+ with pytest.raises(ValueError, match="disable all ways"):
489
+ tldextract.TLDExtract(
490
+ cache_dir=None, suffix_list_urls=(), fallback_to_snapshot=False
491
+ )
492
+
493
+
418
494
  def test_cache_permission(
419
495
  mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
420
496
  ) -> None:
@@ -486,12 +562,22 @@ def test_include_psl_private_domain_attr() -> None:
486
562
  extract_public1 = tldextract.TLDExtract()
487
563
  extract_public2 = tldextract.TLDExtract(include_psl_private_domains=False)
488
564
  assert extract_private("foo.uk.com") == ExtractResult(
489
- subdomain="", domain="foo", suffix="uk.com", is_private=True
565
+ subdomain="",
566
+ domain="foo",
567
+ suffix="uk.com",
568
+ is_private=True,
569
+ registry_suffix="com",
490
570
  )
491
571
  assert (
492
572
  extract_public1("foo.uk.com")
493
573
  == extract_public2("foo.uk.com")
494
- == ExtractResult(subdomain="foo", domain="uk", suffix="com", is_private=False)
574
+ == ExtractResult(
575
+ subdomain="foo",
576
+ domain="uk",
577
+ suffix="com",
578
+ is_private=False,
579
+ registry_suffix="com",
580
+ )
495
581
  )
496
582
 
497
583
 
@@ -514,11 +600,21 @@ def test_global_extract() -> None:
514
600
  """
515
601
  assert tldextract.extract(
516
602
  "blogspot.com", include_psl_private_domains=True
517
- ) == ExtractResult(subdomain="", domain="", suffix="blogspot.com", is_private=True)
603
+ ) == ExtractResult(
604
+ subdomain="",
605
+ domain="",
606
+ suffix="blogspot.com",
607
+ is_private=True,
608
+ registry_suffix="com",
609
+ )
518
610
  assert tldextract.extract(
519
611
  "foo.blogspot.com", include_psl_private_domains=True
520
612
  ) == ExtractResult(
521
- subdomain="", domain="foo", suffix="blogspot.com", is_private=True
613
+ subdomain="",
614
+ domain="foo",
615
+ suffix="blogspot.com",
616
+ is_private=True,
617
+ registry_suffix="com",
522
618
  )
523
619
 
524
620
 
@@ -534,15 +630,26 @@ def test_private_domains_depth() -> None:
534
630
  domain="amazonaws",
535
631
  suffix="com",
536
632
  is_private=False,
633
+ registry_suffix="com",
537
634
  )
538
635
  assert tldextract.extract(
539
636
  "ap-south-1.amazonaws.com", include_psl_private_domains=True
540
637
  ) == ExtractResult(
541
- subdomain="ap-south-1", domain="amazonaws", suffix="com", is_private=False
638
+ subdomain="ap-south-1",
639
+ domain="amazonaws",
640
+ suffix="com",
641
+ is_private=False,
642
+ registry_suffix="com",
542
643
  )
543
644
  assert tldextract.extract(
544
645
  "amazonaws.com", include_psl_private_domains=True
545
- ) == ExtractResult(subdomain="", domain="amazonaws", suffix="com", is_private=False)
646
+ ) == ExtractResult(
647
+ subdomain="",
648
+ domain="amazonaws",
649
+ suffix="com",
650
+ is_private=False,
651
+ registry_suffix="com",
652
+ )
546
653
  assert tldextract.extract(
547
654
  "the-quick-brown-fox.cn-north-1.amazonaws.com.cn",
548
655
  include_psl_private_domains=True,
@@ -551,16 +658,25 @@ def test_private_domains_depth() -> None:
551
658
  domain="amazonaws",
552
659
  suffix="com.cn",
553
660
  is_private=False,
661
+ registry_suffix="com.cn",
554
662
  )
555
663
  assert tldextract.extract(
556
664
  "cn-north-1.amazonaws.com.cn", include_psl_private_domains=True
557
665
  ) == ExtractResult(
558
- subdomain="cn-north-1", domain="amazonaws", suffix="com.cn", is_private=False
666
+ subdomain="cn-north-1",
667
+ domain="amazonaws",
668
+ suffix="com.cn",
669
+ is_private=False,
670
+ registry_suffix="com.cn",
559
671
  )
560
672
  assert tldextract.extract(
561
673
  "amazonaws.com.cn", include_psl_private_domains=True
562
674
  ) == ExtractResult(
563
- subdomain="", domain="amazonaws", suffix="com.cn", is_private=False
675
+ subdomain="",
676
+ domain="amazonaws",
677
+ suffix="com.cn",
678
+ is_private=False,
679
+ registry_suffix="com.cn",
564
680
  )
565
681
  assert tldextract.extract(
566
682
  "another.icann.compute.amazonaws.com", include_psl_private_domains=True
@@ -569,6 +685,7 @@ def test_private_domains_depth() -> None:
569
685
  domain="another",
570
686
  suffix="icann.compute.amazonaws.com",
571
687
  is_private=True,
688
+ registry_suffix="com",
572
689
  )
573
690
  assert tldextract.extract(
574
691
  "another.s3.dualstack.us-east-1.amazonaws.com", include_psl_private_domains=True
@@ -577,12 +694,17 @@ def test_private_domains_depth() -> None:
577
694
  domain="another",
578
695
  suffix="s3.dualstack.us-east-1.amazonaws.com",
579
696
  is_private=True,
697
+ registry_suffix="com",
580
698
  )
581
699
 
582
700
  assert tldextract.extract(
583
701
  "s3.ap-south-1.amazonaws.com", include_psl_private_domains=True
584
702
  ) == ExtractResult(
585
- subdomain="", domain="", suffix="s3.ap-south-1.amazonaws.com", is_private=True
703
+ subdomain="",
704
+ domain="",
705
+ suffix="s3.ap-south-1.amazonaws.com",
706
+ is_private=True,
707
+ registry_suffix="com",
586
708
  )
587
709
  assert tldextract.extract(
588
710
  "s3.cn-north-1.amazonaws.com.cn", include_psl_private_domains=True
@@ -591,11 +713,16 @@ def test_private_domains_depth() -> None:
591
713
  domain="",
592
714
  suffix="s3.cn-north-1.amazonaws.com.cn",
593
715
  is_private=True,
716
+ registry_suffix="com.cn",
594
717
  )
595
718
  assert tldextract.extract(
596
719
  "icann.compute.amazonaws.com", include_psl_private_domains=True
597
720
  ) == ExtractResult(
598
- subdomain="", domain="", suffix="icann.compute.amazonaws.com", is_private=True
721
+ subdomain="",
722
+ domain="",
723
+ suffix="icann.compute.amazonaws.com",
724
+ is_private=True,
725
+ registry_suffix="com",
599
726
  )
600
727
 
601
728
  # Entire URL is private suffix which ends with another private suffix
@@ -607,4 +734,5 @@ def test_private_domains_depth() -> None:
607
734
  domain="",
608
735
  suffix="s3.dualstack.us-east-1.amazonaws.com",
609
736
  is_private=True,
737
+ registry_suffix="com",
610
738
  )