tldextract 4.0.0__tar.gz → 5.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {tldextract-4.0.0 → tldextract-5.0.0}/.travis.yml +1 -3
  2. {tldextract-4.0.0 → tldextract-5.0.0}/CHANGELOG.md +24 -0
  3. {tldextract-4.0.0 → tldextract-5.0.0}/PKG-INFO +9 -27
  4. {tldextract-4.0.0 → tldextract-5.0.0}/README.md +7 -24
  5. {tldextract-4.0.0 → tldextract-5.0.0}/pyproject.toml +23 -8
  6. tldextract-5.0.0/setup.cfg +4 -0
  7. {tldextract-4.0.0 → tldextract-5.0.0}/tests/cli_test.py +12 -4
  8. {tldextract-4.0.0 → tldextract-5.0.0}/tests/conftest.py +2 -4
  9. {tldextract-4.0.0 → tldextract-5.0.0}/tests/custom_suffix_test.py +13 -7
  10. tldextract-5.0.0/tests/integration_test.py +13 -0
  11. {tldextract-4.0.0 → tldextract-5.0.0}/tests/main_test.py +99 -94
  12. {tldextract-4.0.0 → tldextract-5.0.0}/tests/test_cache.py +15 -6
  13. {tldextract-4.0.0 → tldextract-5.0.0}/tests/test_parallel.py +11 -7
  14. {tldextract-4.0.0 → tldextract-5.0.0}/tests/test_trie.py +1 -0
  15. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/_version.py +2 -2
  16. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/cache.py +13 -21
  17. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/cli.py +2 -2
  18. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/suffix_list.py +1 -1
  19. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/tldextract.py +21 -38
  20. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/PKG-INFO +9 -27
  21. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/SOURCES.txt +0 -4
  22. tldextract-5.0.0/tox.ini +35 -0
  23. tldextract-4.0.0/MANIFEST.in +0 -4
  24. tldextract-4.0.0/conftest.py +0 -3
  25. tldextract-4.0.0/pytest.ini +0 -2
  26. tldextract-4.0.0/setup.cfg +0 -7
  27. tldextract-4.0.0/tests/integration_test.py +0 -12
  28. tldextract-4.0.0/tox.ini +0 -43
  29. {tldextract-4.0.0 → tldextract-5.0.0}/.github/FUNDING.yml +0 -0
  30. {tldextract-4.0.0 → tldextract-5.0.0}/.gitignore +0 -0
  31. {tldextract-4.0.0 → tldextract-5.0.0}/LICENSE +0 -0
  32. {tldextract-4.0.0 → tldextract-5.0.0}/tests/__init__.py +0 -0
  33. {tldextract-4.0.0 → tldextract-5.0.0}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
  34. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/.tld_set_snapshot +0 -0
  35. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/__init__.py +0 -0
  36. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/__main__.py +0 -0
  37. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/py.typed +0 -0
  38. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract/remote.py +0 -0
  39. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/dependency_links.txt +0 -0
  40. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/entry_points.txt +0 -0
  41. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/requires.txt +0 -0
  42. {tldextract-4.0.0 → tldextract-5.0.0}/tldextract.egg-info/top_level.txt +0 -0
@@ -2,8 +2,6 @@ dist: focal
2
2
  language: python
3
3
  matrix:
4
4
  include:
5
- - python: "3.7"
6
- env: TOXENV=py37
7
5
  - python: "3.8"
8
6
  env: TOXENV=py38
9
7
  - python: "3.9"
@@ -12,7 +10,7 @@ matrix:
12
10
  env: TOXENV=py310
13
11
  - python: "3.11"
14
12
  env: TOXENV=py311
15
- - python: pypy3.7-7.3.9
13
+ - python: pypy3.8-7.3.9
16
14
  dist: xenial
17
15
  env: TOXENV=pypy3
18
16
  - env: TOXENV=codestyle
@@ -3,6 +3,30 @@
3
3
  After upgrading, update your cache file by deleting it or via `tldextract
4
4
  --update`.
5
5
 
6
+ ## 5.0.0 (2023-10-11)
7
+
8
+ * Breaking Changes
9
+ * Migrate `ExtractResult` from `namedtuple` to `dataclass` ([#306](https://github.com/john-kurkowski/tldextract/issues/306))
10
+ * This means no more iterating/indexing/slicing/unpacking the result
11
+ object returned by this library. You must directly reference the
12
+ fields you're interested in. For example, instead of
13
+ ```python
14
+ tldextract.extract("example.com")[1:3]
15
+ ```
16
+ you must use
17
+ ```python
18
+ ext = tldextract.extract("example.com")
19
+ (ext.domain, ext.suffix)
20
+ ```
21
+ * Bugfixes
22
+ * Drop support for EOL Python 3.7
23
+ * Misc.
24
+ * Switch from pycodestyle and Pylint to Ruff ([#304](https://github.com/john-kurkowski/tldextract/issues/304))
25
+ * Consolidate config files
26
+ * Type tests
27
+ * Require docstrings in tests
28
+ * Remove obsolete tests
29
+
6
30
  ## 4.0.0 (2023-10-11)
7
31
 
8
32
  * **Breaking** bugfixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tldextract
3
- Version: 4.0.0
3
+ Version: 5.0.0
4
4
  Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
5
5
  Author-email: John Kurkowski <john.kurkowski@gmail.com>
6
6
  License: BSD-3-Clause
@@ -10,12 +10,11 @@ Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Topic :: Utilities
11
11
  Classifier: License :: OSI Approved :: BSD License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.7
14
13
  Classifier: Programming Language :: Python :: 3.8
15
14
  Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
- Requires-Python: >=3.7
17
+ Requires-Python: >=3.8
19
18
  Description-Content-Type: text/markdown
20
19
  License-File: LICENSE
21
20
  Requires-Dist: idna
@@ -56,20 +55,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
56
55
  ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
57
56
  ```
58
57
 
59
- `ExtractResult` is a namedtuple, so it's simple to access the parts you want.
60
-
61
- ```python
62
- >>> ext = tldextract.extract('http://forums.bbc.co.uk')
63
- >>> (ext.subdomain, ext.domain, ext.suffix)
64
- ('forums', 'bbc', 'co.uk')
65
- >>> # rejoin subdomain and domain
66
- >>> '.'.join(ext[:2])
67
- 'forums.bbc'
68
- >>> # a common alias
69
- >>> ext.registered_domain
70
- 'bbc.co.uk'
71
- ```
72
-
73
58
  Note subdomain and suffix are _optional_. Not all URL-like inputs have a
74
59
  subdomain or a valid suffix.
75
60
 
@@ -84,17 +69,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
84
69
  ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
85
70
  ```
86
71
 
87
- If you want to rejoin the whole namedtuple, regardless of whether a subdomain
88
- or suffix were found:
72
+ To rejoin the original hostname, if it was indeed a valid, registered hostname:
89
73
 
90
74
  ```python
91
- >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
92
- >>> # this has unwanted dots
93
- >>> '.'.join(ext[:3])
94
- '.127.0.0.1.'
95
- >>> # join each part only if it's truthy
96
- >>> '.'.join(part for part in ext[:3] if part)
97
- '127.0.0.1'
75
+ >>> ext = tldextract.extract('http://forums.bbc.co.uk')
76
+ >>> ext.registered_domain
77
+ 'bbc.co.uk'
78
+ >>> ext.fqdn
79
+ 'forums.bbc.co.uk'
98
80
  ```
99
81
 
100
82
  By default, this package supports the public ICANN TLDs and their exceptions.
@@ -303,7 +285,7 @@ Run all tests against a specific Python environment configuration:
303
285
 
304
286
  ```zsh
305
287
  tox -l
306
- tox -e py37
288
+ tox -e py311
307
289
  ```
308
290
 
309
291
  ### Code Style
@@ -31,20 +31,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
31
31
  ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
32
32
  ```
33
33
 
34
- `ExtractResult` is a namedtuple, so it's simple to access the parts you want.
35
-
36
- ```python
37
- >>> ext = tldextract.extract('http://forums.bbc.co.uk')
38
- >>> (ext.subdomain, ext.domain, ext.suffix)
39
- ('forums', 'bbc', 'co.uk')
40
- >>> # rejoin subdomain and domain
41
- >>> '.'.join(ext[:2])
42
- 'forums.bbc'
43
- >>> # a common alias
44
- >>> ext.registered_domain
45
- 'bbc.co.uk'
46
- ```
47
-
48
34
  Note subdomain and suffix are _optional_. Not all URL-like inputs have a
49
35
  subdomain or a valid suffix.
50
36
 
@@ -59,17 +45,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
59
45
  ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
60
46
  ```
61
47
 
62
- If you want to rejoin the whole namedtuple, regardless of whether a subdomain
63
- or suffix were found:
48
+ To rejoin the original hostname, if it was indeed a valid, registered hostname:
64
49
 
65
50
  ```python
66
- >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
67
- >>> # this has unwanted dots
68
- >>> '.'.join(ext[:3])
69
- '.127.0.0.1.'
70
- >>> # join each part only if it's truthy
71
- >>> '.'.join(part for part in ext[:3] if part)
72
- '127.0.0.1'
51
+ >>> ext = tldextract.extract('http://forums.bbc.co.uk')
52
+ >>> ext.registered_domain
53
+ 'bbc.co.uk'
54
+ >>> ext.fqdn
55
+ 'forums.bbc.co.uk'
73
56
  ```
74
57
 
75
58
  By default, this package supports the public ICANN TLDs and their exceptions.
@@ -278,7 +261,7 @@ Run all tests against a specific Python environment configuration:
278
261
 
279
262
  ```zsh
280
263
  tox -l
281
- tox -e py37
264
+ tox -e py311
282
265
  ```
283
266
 
284
267
  ### Code Style
@@ -23,13 +23,12 @@ classifiers = [
23
23
  "Topic :: Utilities",
24
24
  "License :: OSI Approved :: BSD License",
25
25
  "Programming Language :: Python :: 3",
26
- "Programming Language :: Python :: 3.7",
27
26
  "Programming Language :: Python :: 3.8",
28
27
  "Programming Language :: Python :: 3.9",
29
28
  "Programming Language :: Python :: 3.10",
30
29
  "Programming Language :: Python :: 3.11",
31
30
  ]
32
- requires-python = ">=3.7"
31
+ requires-python = ">=3.8"
33
32
  dependencies = [
34
33
  "idna",
35
34
  "requests>=2.1.0",
@@ -67,11 +66,27 @@ version = {attr = "setuptools_scm.get_version"}
67
66
  check_untyped_defs = true
68
67
  disallow_incomplete_defs = true
69
68
  disallow_untyped_calls = true
70
-
71
- [[tool.mypy.overrides]]
72
- module = ["tldextract.*"]
73
69
  disallow_untyped_defs = true
74
70
 
75
- [tool.pylint.master]
76
- disable = "fixme"
77
- no-docstring-rgx = "(^_|test_.*)"
71
+ [tool.pytest.ini_options]
72
+ addopts = "--doctest-modules"
73
+
74
+ [tool.ruff]
75
+ select = [
76
+ "A",
77
+ "B",
78
+ "C",
79
+ "D",
80
+ "E",
81
+ "F",
82
+ "I",
83
+ "N",
84
+ "UP",
85
+ "W",
86
+ ]
87
+ ignore = [
88
+ "E501", # line too long; if Black does its job, not worried about the rare long line
89
+ ]
90
+
91
+ [tool.ruff.pydocstyle]
92
+ convention = "pep257"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -8,7 +8,8 @@ from tldextract.cli import main
8
8
  from tldextract.tldextract import PUBLIC_SUFFIX_LIST_URLS
9
9
 
10
10
 
11
- def test_cli_no_input(monkeypatch):
11
+ def test_cli_no_input(monkeypatch: pytest.MonkeyPatch) -> None:
12
+ """Test CLI without args."""
12
13
  monkeypatch.setattr(sys, "argv", ["tldextract"])
13
14
  with pytest.raises(SystemExit) as ex:
14
15
  main()
@@ -16,7 +17,8 @@ def test_cli_no_input(monkeypatch):
16
17
  assert ex.value.code == 1
17
18
 
18
19
 
19
- def test_cli_parses_args(monkeypatch):
20
+ def test_cli_parses_args(monkeypatch: pytest.MonkeyPatch) -> None:
21
+ """Test CLI with nonsense args."""
20
22
  monkeypatch.setattr(sys, "argv", ["tldextract", "--some", "nonsense"])
21
23
  with pytest.raises(SystemExit) as ex:
22
24
  main()
@@ -24,7 +26,10 @@ def test_cli_parses_args(monkeypatch):
24
26
  assert ex.value.code == 2
25
27
 
26
28
 
27
- def test_cli_posargs(capsys, monkeypatch):
29
+ def test_cli_posargs(
30
+ capsys: pytest.CaptureFixture, monkeypatch: pytest.MonkeyPatch
31
+ ) -> None:
32
+ """Test CLI with basic, positional args."""
28
33
  monkeypatch.setattr(
29
34
  sys, "argv", ["tldextract", "example.com", "bbc.co.uk", "forums.bbc.co.uk"]
30
35
  )
@@ -36,7 +41,10 @@ def test_cli_posargs(capsys, monkeypatch):
36
41
  assert stdout == " example com\n bbc co.uk\nforums bbc co.uk\n"
37
42
 
38
43
 
39
- def test_cli_namedargs(capsys, monkeypatch):
44
+ def test_cli_namedargs(
45
+ capsys: pytest.CaptureFixture, monkeypatch: pytest.MonkeyPatch
46
+ ) -> None:
47
+ """Test CLI with basic, positional args, and that it parses an optional argument (though it doesn't change output)."""
40
48
  monkeypatch.setattr(
41
49
  sys,
42
50
  "argv",
@@ -8,12 +8,10 @@ import tldextract.cache
8
8
 
9
9
 
10
10
  @pytest.fixture(autouse=True)
11
- def reset_log_level():
11
+ def reset_log_level() -> None:
12
12
  """Automatically reset log level verbosity between tests.
13
13
 
14
14
  Generally want test output the Unix way: silence is golden.
15
15
  """
16
- tldextract.cache._DID_LOG_UNABLE_TO_CACHE = ( # pylint: disable=protected-access
17
- False
18
- )
16
+ tldextract.cache._DID_LOG_UNABLE_TO_CACHE = False
19
17
  logging.getLogger().setLevel(logging.WARN)
@@ -4,6 +4,7 @@ import os
4
4
  import tempfile
5
5
 
6
6
  import tldextract
7
+ from tldextract.tldextract import ExtractResult
7
8
 
8
9
  FAKE_SUFFIX_LIST_URL = "file://" + os.path.join(
9
10
  os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat"
@@ -23,11 +24,12 @@ extract_using_extra_suffixes = tldextract.TLDExtract(
23
24
  )
24
25
 
25
26
 
26
- def test_private_extraction():
27
+ def test_private_extraction() -> None:
28
+ """Test this library's uncached, offline, private domain extraction."""
27
29
  tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
28
30
 
29
- assert tld("foo.blogspot.com") == ("foo", "blogspot", "com", False)
30
- assert tld("foo.blogspot.com", include_psl_private_domains=True) == (
31
+ assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
32
+ assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
31
33
  "",
32
34
  "foo",
33
35
  "blogspot.com",
@@ -35,7 +37,8 @@ def test_private_extraction():
35
37
  )
36
38
 
37
39
 
38
- def test_suffix_which_is_not_in_custom_list():
40
+ def test_suffix_which_is_not_in_custom_list() -> None:
41
+ """Test a custom suffix list without .com."""
39
42
  for fun in (
40
43
  extract_using_fake_suffix_list,
41
44
  extract_using_fake_suffix_list_no_cache,
@@ -44,7 +47,8 @@ def test_suffix_which_is_not_in_custom_list():
44
47
  assert result.suffix == ""
45
48
 
46
49
 
47
- def test_custom_suffixes():
50
+ def test_custom_suffixes() -> None:
51
+ """Test a custom suffix list with common, metasyntactic suffixes."""
48
52
  for fun in (
49
53
  extract_using_fake_suffix_list,
50
54
  extract_using_fake_suffix_list_no_cache,
@@ -54,12 +58,14 @@ def test_custom_suffixes():
54
58
  assert result.suffix == custom_suffix
55
59
 
56
60
 
57
- def test_suffix_which_is_not_in_extra_list():
61
+ def test_suffix_which_is_not_in_extra_list() -> None:
62
+ """Test a custom suffix list and extra suffixes without .com."""
58
63
  result = extract_using_extra_suffixes("www.google.com")
59
64
  assert result.suffix == ""
60
65
 
61
66
 
62
- def test_extra_suffixes():
67
+ def test_extra_suffixes() -> None:
68
+ """Test extra suffixes."""
63
69
  for custom_suffix in EXTRA_SUFFIXES:
64
70
  netloc = "www.foo.bar.baz.quux" + "." + custom_suffix
65
71
  result = extract_using_extra_suffixes(netloc)
@@ -0,0 +1,13 @@
1
+ """tldextract integration tests."""
2
+
3
+ import pytest
4
+
5
+ import tldextract
6
+
7
+
8
+ def test_bad_kwargs_no_way_to_fetch() -> None:
9
+ """Test an impossible combination of kwargs that disable all ways to fetch data."""
10
+ with pytest.raises(ValueError, match="disable all ways"):
11
+ tldextract.TLDExtract(
12
+ cache_dir=None, suffix_list_urls=(), fallback_to_snapshot=False
13
+ )