tldextract 4.0.0__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tldextract-4.0.0 → tldextract-5.0.0}/.travis.yml +1 -3
- {tldextract-4.0.0 → tldextract-5.0.0}/CHANGELOG.md +24 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/PKG-INFO +9 -27
- {tldextract-4.0.0 → tldextract-5.0.0}/README.md +7 -24
- {tldextract-4.0.0 → tldextract-5.0.0}/pyproject.toml +23 -8
- tldextract-5.0.0/setup.cfg +4 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/cli_test.py +12 -4
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/conftest.py +2 -4
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/custom_suffix_test.py +13 -7
- tldextract-5.0.0/tests/integration_test.py +13 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/main_test.py +99 -94
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/test_cache.py +15 -6
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/test_parallel.py +11 -7
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/test_trie.py +1 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/_version.py +2 -2
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/cache.py +13 -21
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/cli.py +2 -2
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/suffix_list.py +1 -1
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/tldextract.py +21 -38
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/PKG-INFO +9 -27
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/SOURCES.txt +0 -4
- tldextract-5.0.0/tox.ini +35 -0
- tldextract-4.0.0/MANIFEST.in +0 -4
- tldextract-4.0.0/conftest.py +0 -3
- tldextract-4.0.0/pytest.ini +0 -2
- tldextract-4.0.0/setup.cfg +0 -7
- tldextract-4.0.0/tests/integration_test.py +0 -12
- tldextract-4.0.0/tox.ini +0 -43
- {tldextract-4.0.0 → tldextract-5.0.0}/.github/FUNDING.yml +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/.gitignore +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/LICENSE +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/__init__.py +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/.tld_set_snapshot +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/__init__.py +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/__main__.py +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/py.typed +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/remote.py +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/dependency_links.txt +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/entry_points.txt +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/requires.txt +0 -0
- {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/top_level.txt +0 -0
@@ -2,8 +2,6 @@ dist: focal
|
|
2
2
|
language: python
|
3
3
|
matrix:
|
4
4
|
include:
|
5
|
-
- python: "3.7"
|
6
|
-
env: TOXENV=py37
|
7
5
|
- python: "3.8"
|
8
6
|
env: TOXENV=py38
|
9
7
|
- python: "3.9"
|
@@ -12,7 +10,7 @@ matrix:
|
|
12
10
|
env: TOXENV=py310
|
13
11
|
- python: "3.11"
|
14
12
|
env: TOXENV=py311
|
15
|
-
- python: pypy3.
|
13
|
+
- python: pypy3.8-7.3.9
|
16
14
|
dist: xenial
|
17
15
|
env: TOXENV=pypy3
|
18
16
|
- env: TOXENV=codestyle
|
@@ -3,6 +3,30 @@
|
|
3
3
|
After upgrading, update your cache file by deleting it or via `tldextract
|
4
4
|
--update`.
|
5
5
|
|
6
|
+
## 5.0.0 (2023-10-11)
|
7
|
+
|
8
|
+
* Breaking Changes
|
9
|
+
* Migrate `ExtractResult` from `namedtuple` to `dataclass` ([#306](https://github.com/john-kurkowski/tldextract/issues/306))
|
10
|
+
* This means no more iterating/indexing/slicing/unpacking the result
|
11
|
+
object returned by this library. You must directly reference the
|
12
|
+
fields you're interested in. For example, instead of
|
13
|
+
```python
|
14
|
+
tldextract.extract("example.com")[1:3]
|
15
|
+
```
|
16
|
+
you must use
|
17
|
+
```python
|
18
|
+
ext = tldextract.extract("example.com")
|
19
|
+
(ext.domain, ext.suffix)
|
20
|
+
```
|
21
|
+
* Bugfixes
|
22
|
+
* Drop support for EOL Python 3.7
|
23
|
+
* Misc.
|
24
|
+
* Switch from pycodestyle and Pylint to Ruff ([#304](https://github.com/john-kurkowski/tldextract/issues/304))
|
25
|
+
* Consolidate config files
|
26
|
+
* Type tests
|
27
|
+
* Require docstrings in tests
|
28
|
+
* Remove obsolete tests
|
29
|
+
|
6
30
|
## 4.0.0 (2023-10-11)
|
7
31
|
|
8
32
|
* **Breaking** bugfixes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tldextract
|
3
|
-
Version:
|
3
|
+
Version: 5.0.0
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Author-email: John Kurkowski <john.kurkowski@gmail.com>
|
6
6
|
License: BSD-3-Clause
|
@@ -10,12 +10,11 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Topic :: Utilities
|
11
11
|
Classifier: License :: OSI Approved :: BSD License
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
13
|
-
Classifier: Programming Language :: Python :: 3.7
|
14
13
|
Classifier: Programming Language :: Python :: 3.8
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
18
|
-
Requires-Python: >=3.
|
17
|
+
Requires-Python: >=3.8
|
19
18
|
Description-Content-Type: text/markdown
|
20
19
|
License-File: LICENSE
|
21
20
|
Requires-Dist: idna
|
@@ -56,20 +55,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
|
|
56
55
|
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
|
57
56
|
```
|
58
57
|
|
59
|
-
`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
|
60
|
-
|
61
|
-
```python
|
62
|
-
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
|
63
|
-
>>> (ext.subdomain, ext.domain, ext.suffix)
|
64
|
-
('forums', 'bbc', 'co.uk')
|
65
|
-
>>> # rejoin subdomain and domain
|
66
|
-
>>> '.'.join(ext[:2])
|
67
|
-
'forums.bbc'
|
68
|
-
>>> # a common alias
|
69
|
-
>>> ext.registered_domain
|
70
|
-
'bbc.co.uk'
|
71
|
-
```
|
72
|
-
|
73
58
|
Note subdomain and suffix are _optional_. Not all URL-like inputs have a
|
74
59
|
subdomain or a valid suffix.
|
75
60
|
|
@@ -84,17 +69,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
|
|
84
69
|
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
|
85
70
|
```
|
86
71
|
|
87
|
-
|
88
|
-
or suffix were found:
|
72
|
+
To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
89
73
|
|
90
74
|
```python
|
91
|
-
>>> ext = tldextract.extract('http://
|
92
|
-
>>>
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
>>> '.'.join(part for part in ext[:3] if part)
|
97
|
-
'127.0.0.1'
|
75
|
+
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
|
76
|
+
>>> ext.registered_domain
|
77
|
+
'bbc.co.uk'
|
78
|
+
>>> ext.fqdn
|
79
|
+
'forums.bbc.co.uk'
|
98
80
|
```
|
99
81
|
|
100
82
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
@@ -303,7 +285,7 @@ Run all tests against a specific Python environment configuration:
|
|
303
285
|
|
304
286
|
```zsh
|
305
287
|
tox -l
|
306
|
-
tox -e
|
288
|
+
tox -e py311
|
307
289
|
```
|
308
290
|
|
309
291
|
### Code Style
|
@@ -31,20 +31,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
|
|
31
31
|
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
|
32
32
|
```
|
33
33
|
|
34
|
-
`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
|
35
|
-
|
36
|
-
```python
|
37
|
-
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
|
38
|
-
>>> (ext.subdomain, ext.domain, ext.suffix)
|
39
|
-
('forums', 'bbc', 'co.uk')
|
40
|
-
>>> # rejoin subdomain and domain
|
41
|
-
>>> '.'.join(ext[:2])
|
42
|
-
'forums.bbc'
|
43
|
-
>>> # a common alias
|
44
|
-
>>> ext.registered_domain
|
45
|
-
'bbc.co.uk'
|
46
|
-
```
|
47
|
-
|
48
34
|
Note subdomain and suffix are _optional_. Not all URL-like inputs have a
|
49
35
|
subdomain or a valid suffix.
|
50
36
|
|
@@ -59,17 +45,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
|
|
59
45
|
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
|
60
46
|
```
|
61
47
|
|
62
|
-
|
63
|
-
or suffix were found:
|
48
|
+
To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
64
49
|
|
65
50
|
```python
|
66
|
-
>>> ext = tldextract.extract('http://
|
67
|
-
>>>
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
>>> '.'.join(part for part in ext[:3] if part)
|
72
|
-
'127.0.0.1'
|
51
|
+
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
|
52
|
+
>>> ext.registered_domain
|
53
|
+
'bbc.co.uk'
|
54
|
+
>>> ext.fqdn
|
55
|
+
'forums.bbc.co.uk'
|
73
56
|
```
|
74
57
|
|
75
58
|
By default, this package supports the public ICANN TLDs and their exceptions.
|
@@ -278,7 +261,7 @@ Run all tests against a specific Python environment configuration:
|
|
278
261
|
|
279
262
|
```zsh
|
280
263
|
tox -l
|
281
|
-
tox -e
|
264
|
+
tox -e py311
|
282
265
|
```
|
283
266
|
|
284
267
|
### Code Style
|
@@ -23,13 +23,12 @@ classifiers = [
|
|
23
23
|
"Topic :: Utilities",
|
24
24
|
"License :: OSI Approved :: BSD License",
|
25
25
|
"Programming Language :: Python :: 3",
|
26
|
-
"Programming Language :: Python :: 3.7",
|
27
26
|
"Programming Language :: Python :: 3.8",
|
28
27
|
"Programming Language :: Python :: 3.9",
|
29
28
|
"Programming Language :: Python :: 3.10",
|
30
29
|
"Programming Language :: Python :: 3.11",
|
31
30
|
]
|
32
|
-
requires-python = ">=3.
|
31
|
+
requires-python = ">=3.8"
|
33
32
|
dependencies = [
|
34
33
|
"idna",
|
35
34
|
"requests>=2.1.0",
|
@@ -67,11 +66,27 @@ version = {attr = "setuptools_scm.get_version"}
|
|
67
66
|
check_untyped_defs = true
|
68
67
|
disallow_incomplete_defs = true
|
69
68
|
disallow_untyped_calls = true
|
70
|
-
|
71
|
-
[[tool.mypy.overrides]]
|
72
|
-
module = ["tldextract.*"]
|
73
69
|
disallow_untyped_defs = true
|
74
70
|
|
75
|
-
[tool.
|
76
|
-
|
77
|
-
|
71
|
+
[tool.pytest.ini_options]
|
72
|
+
addopts = "--doctest-modules"
|
73
|
+
|
74
|
+
[tool.ruff]
|
75
|
+
select = [
|
76
|
+
"A",
|
77
|
+
"B",
|
78
|
+
"C",
|
79
|
+
"D",
|
80
|
+
"E",
|
81
|
+
"F",
|
82
|
+
"I",
|
83
|
+
"N",
|
84
|
+
"UP",
|
85
|
+
"W",
|
86
|
+
]
|
87
|
+
ignore = [
|
88
|
+
"E501", # line too long; if Black does its job, not worried about the rare long line
|
89
|
+
]
|
90
|
+
|
91
|
+
[tool.ruff.pydocstyle]
|
92
|
+
convention = "pep257"
|
@@ -8,7 +8,8 @@ from tldextract.cli import main
|
|
8
8
|
from tldextract.tldextract import PUBLIC_SUFFIX_LIST_URLS
|
9
9
|
|
10
10
|
|
11
|
-
def test_cli_no_input(monkeypatch):
|
11
|
+
def test_cli_no_input(monkeypatch: pytest.MonkeyPatch) -> None:
|
12
|
+
"""Test CLI without args."""
|
12
13
|
monkeypatch.setattr(sys, "argv", ["tldextract"])
|
13
14
|
with pytest.raises(SystemExit) as ex:
|
14
15
|
main()
|
@@ -16,7 +17,8 @@ def test_cli_no_input(monkeypatch):
|
|
16
17
|
assert ex.value.code == 1
|
17
18
|
|
18
19
|
|
19
|
-
def test_cli_parses_args(monkeypatch):
|
20
|
+
def test_cli_parses_args(monkeypatch: pytest.MonkeyPatch) -> None:
|
21
|
+
"""Test CLI with nonsense args."""
|
20
22
|
monkeypatch.setattr(sys, "argv", ["tldextract", "--some", "nonsense"])
|
21
23
|
with pytest.raises(SystemExit) as ex:
|
22
24
|
main()
|
@@ -24,7 +26,10 @@ def test_cli_parses_args(monkeypatch):
|
|
24
26
|
assert ex.value.code == 2
|
25
27
|
|
26
28
|
|
27
|
-
def test_cli_posargs(
|
29
|
+
def test_cli_posargs(
|
30
|
+
capsys: pytest.CaptureFixture, monkeypatch: pytest.MonkeyPatch
|
31
|
+
) -> None:
|
32
|
+
"""Test CLI with basic, positional args."""
|
28
33
|
monkeypatch.setattr(
|
29
34
|
sys, "argv", ["tldextract", "example.com", "bbc.co.uk", "forums.bbc.co.uk"]
|
30
35
|
)
|
@@ -36,7 +41,10 @@ def test_cli_posargs(capsys, monkeypatch):
|
|
36
41
|
assert stdout == " example com\n bbc co.uk\nforums bbc co.uk\n"
|
37
42
|
|
38
43
|
|
39
|
-
def test_cli_namedargs(
|
44
|
+
def test_cli_namedargs(
|
45
|
+
capsys: pytest.CaptureFixture, monkeypatch: pytest.MonkeyPatch
|
46
|
+
) -> None:
|
47
|
+
"""Test CLI with basic, positional args, and that it parses an optional argument (though it doesn't change output)."""
|
40
48
|
monkeypatch.setattr(
|
41
49
|
sys,
|
42
50
|
"argv",
|
@@ -8,12 +8,10 @@ import tldextract.cache
|
|
8
8
|
|
9
9
|
|
10
10
|
@pytest.fixture(autouse=True)
|
11
|
-
def reset_log_level():
|
11
|
+
def reset_log_level() -> None:
|
12
12
|
"""Automatically reset log level verbosity between tests.
|
13
13
|
|
14
14
|
Generally want test output the Unix way: silence is golden.
|
15
15
|
"""
|
16
|
-
tldextract.cache._DID_LOG_UNABLE_TO_CACHE =
|
17
|
-
False
|
18
|
-
)
|
16
|
+
tldextract.cache._DID_LOG_UNABLE_TO_CACHE = False
|
19
17
|
logging.getLogger().setLevel(logging.WARN)
|
@@ -4,6 +4,7 @@ import os
|
|
4
4
|
import tempfile
|
5
5
|
|
6
6
|
import tldextract
|
7
|
+
from tldextract.tldextract import ExtractResult
|
7
8
|
|
8
9
|
FAKE_SUFFIX_LIST_URL = "file://" + os.path.join(
|
9
10
|
os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat"
|
@@ -23,11 +24,12 @@ extract_using_extra_suffixes = tldextract.TLDExtract(
|
|
23
24
|
)
|
24
25
|
|
25
26
|
|
26
|
-
def test_private_extraction():
|
27
|
+
def test_private_extraction() -> None:
|
28
|
+
"""Test this library's uncached, offline, private domain extraction."""
|
27
29
|
tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
|
28
30
|
|
29
|
-
assert tld("foo.blogspot.com") == ("foo", "blogspot", "com", False)
|
30
|
-
assert tld("foo.blogspot.com", include_psl_private_domains=True) == (
|
31
|
+
assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
|
32
|
+
assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
|
31
33
|
"",
|
32
34
|
"foo",
|
33
35
|
"blogspot.com",
|
@@ -35,7 +37,8 @@ def test_private_extraction():
|
|
35
37
|
)
|
36
38
|
|
37
39
|
|
38
|
-
def test_suffix_which_is_not_in_custom_list():
|
40
|
+
def test_suffix_which_is_not_in_custom_list() -> None:
|
41
|
+
"""Test a custom suffix list without .com."""
|
39
42
|
for fun in (
|
40
43
|
extract_using_fake_suffix_list,
|
41
44
|
extract_using_fake_suffix_list_no_cache,
|
@@ -44,7 +47,8 @@ def test_suffix_which_is_not_in_custom_list():
|
|
44
47
|
assert result.suffix == ""
|
45
48
|
|
46
49
|
|
47
|
-
def test_custom_suffixes():
|
50
|
+
def test_custom_suffixes() -> None:
|
51
|
+
"""Test a custom suffix list with common, metasyntactic suffixes."""
|
48
52
|
for fun in (
|
49
53
|
extract_using_fake_suffix_list,
|
50
54
|
extract_using_fake_suffix_list_no_cache,
|
@@ -54,12 +58,14 @@ def test_custom_suffixes():
|
|
54
58
|
assert result.suffix == custom_suffix
|
55
59
|
|
56
60
|
|
57
|
-
def test_suffix_which_is_not_in_extra_list():
|
61
|
+
def test_suffix_which_is_not_in_extra_list() -> None:
|
62
|
+
"""Test a custom suffix list and extra suffixes without .com."""
|
58
63
|
result = extract_using_extra_suffixes("www.google.com")
|
59
64
|
assert result.suffix == ""
|
60
65
|
|
61
66
|
|
62
|
-
def test_extra_suffixes():
|
67
|
+
def test_extra_suffixes() -> None:
|
68
|
+
"""Test extra suffixes."""
|
63
69
|
for custom_suffix in EXTRA_SUFFIXES:
|
64
70
|
netloc = "www.foo.bar.baz.quux" + "." + custom_suffix
|
65
71
|
result = extract_using_extra_suffixes(netloc)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
"""tldextract integration tests."""
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
|
5
|
+
import tldextract
|
6
|
+
|
7
|
+
|
8
|
+
def test_bad_kwargs_no_way_to_fetch() -> None:
|
9
|
+
"""Test an impossible combination of kwargs that disable all ways to fetch data."""
|
10
|
+
with pytest.raises(ValueError, match="disable all ways"):
|
11
|
+
tldextract.TLDExtract(
|
12
|
+
cache_dir=None, suffix_list_urls=(), fallback_to_snapshot=False
|
13
|
+
)
|