tldextract 5.0.1__tar.gz → 5.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tldextract-5.1.1/.github/workflows/ci.yml +40 -0
  2. {tldextract-5.0.1 → tldextract-5.1.1}/CHANGELOG.md +17 -0
  3. {tldextract-5.0.1 → tldextract-5.1.1}/PKG-INFO +17 -6
  4. {tldextract-5.0.1 → tldextract-5.1.1}/README.md +3 -4
  5. {tldextract-5.0.1 → tldextract-5.1.1}/pyproject.toml +18 -2
  6. {tldextract-5.0.1 → tldextract-5.1.1}/tests/cli_test.py +23 -0
  7. {tldextract-5.0.1 → tldextract-5.1.1}/tests/custom_suffix_test.py +7 -3
  8. {tldextract-5.0.1 → tldextract-5.1.1}/tests/main_test.py +29 -0
  9. {tldextract-5.0.1 → tldextract-5.1.1}/tests/test_cache.py +5 -6
  10. {tldextract-5.0.1 → tldextract-5.1.1}/tests/test_parallel.py +25 -8
  11. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/_version.py +2 -2
  12. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/cache.py +8 -8
  13. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/cli.py +21 -2
  14. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/suffix_list.py +17 -2
  15. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/tldextract.py +51 -16
  16. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract.egg-info/PKG-INFO +17 -6
  17. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract.egg-info/SOURCES.txt +1 -1
  18. tldextract-5.1.1/tldextract.egg-info/requires.txt +16 -0
  19. {tldextract-5.0.1 → tldextract-5.1.1}/tox.ini +5 -18
  20. tldextract-5.0.1/.travis.yml +0 -21
  21. tldextract-5.0.1/tldextract.egg-info/requires.txt +0 -4
  22. {tldextract-5.0.1 → tldextract-5.1.1}/.github/FUNDING.yml +0 -0
  23. {tldextract-5.0.1 → tldextract-5.1.1}/.gitignore +0 -0
  24. {tldextract-5.0.1 → tldextract-5.1.1}/LICENSE +0 -0
  25. {tldextract-5.0.1 → tldextract-5.1.1}/setup.cfg +0 -0
  26. {tldextract-5.0.1 → tldextract-5.1.1}/tests/__init__.py +0 -0
  27. {tldextract-5.0.1 → tldextract-5.1.1}/tests/conftest.py +0 -0
  28. {tldextract-5.0.1 → tldextract-5.1.1}/tests/fixtures/fake_suffix_list_fixture.dat +0 -0
  29. {tldextract-5.0.1 → tldextract-5.1.1}/tests/integration_test.py +0 -0
  30. {tldextract-5.0.1 → tldextract-5.1.1}/tests/test_trie.py +0 -0
  31. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/.tld_set_snapshot +0 -0
  32. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/__init__.py +0 -0
  33. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/__main__.py +0 -0
  34. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/py.typed +0 -0
  35. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract/remote.py +0 -0
  36. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract.egg-info/dependency_links.txt +0 -0
  37. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract.egg-info/entry_points.txt +0 -0
  38. {tldextract-5.0.1 → tldextract-5.1.1}/tldextract.egg-info/top_level.txt +0 -0
@@ -0,0 +1,40 @@
1
+ name: build
2
+ on: [push, pull_request]
3
+ jobs:
4
+ test:
5
+ strategy:
6
+ fail-fast: false
7
+ matrix:
8
+ os: [macos-latest, windows-latest, ubuntu-latest]
9
+ language:
10
+ [
11
+ {python-version: "3.8", toxenv: "py38"},
12
+ {python-version: "3.9", toxenv: "py39"},
13
+ {python-version: "3.10", toxenv: "py310"},
14
+ {python-version: "3.11", toxenv: "py311"},
15
+ {python-version: "3.12", toxenv: "py312"},
16
+ {python-version: "pypy3.8", toxenv: "pypy38"},
17
+ ]
18
+ include:
19
+ - os: ubuntu-latest
20
+ language: {python-version: "3.8", toxenv: "codestyle"}
21
+ - os: ubuntu-latest
22
+ language: {python-version: "3.8", toxenv: "lint"}
23
+ - os: ubuntu-latest
24
+ language: {python-version: "3.8", toxenv: "typecheck"}
25
+ runs-on: ${{ matrix.os }}
26
+ steps:
27
+ - name: Check out repository
28
+ uses: actions/checkout@v4
29
+ - name: Setup Python
30
+ uses: actions/setup-python@v4
31
+ with:
32
+ python-version: ${{ matrix.language.python-version }}
33
+ - name: Install Python requirements
34
+ run: |
35
+ pip install --upgrade pip
36
+ pip install --upgrade --editable '.[testing]'
37
+ - name: Test
38
+ run: tox
39
+ env:
40
+ TOXENV: ${{ matrix.language.toxenv }}
@@ -3,6 +3,23 @@
3
3
  After upgrading, update your cache file by deleting it or via `tldextract
4
4
  --update`.
5
5
 
6
+ ## 5.1.1 (2023-11-16)
7
+
8
+ * Bugfixes
9
+ * Fix path join on Windows ([#314](https://github.com/john-kurkowski/tldextract/issues/314))
10
+ * Support Python 3.12
11
+
12
+ ## 5.1.0 (2023-11-05)
13
+
14
+ * Features
15
+ * Allow passing in `requests.Session` ([#311](https://github.com/john-kurkowski/tldextract/issues/311))
16
+ * Add "-j, --json" option to support output in json format ([#313](https://github.com/john-kurkowski/tldextract/issues/313))
17
+ * Docs
18
+ * Improve clarity of absolute path ([#312](https://github.com/john-kurkowski/tldextract/issues/312))
19
+ * Misc.
20
+ * Extract all testing deps from tox.ini to pyproject.toml extras ([#310](https://github.com/john-kurkowski/tldextract/issues/310))
21
+ * Work around responses type union error, in tests
22
+
6
23
  ## 5.0.1 (2023-10-17)
7
24
 
8
25
  * Bugfixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tldextract
3
- Version: 5.0.1
3
+ Version: 5.1.1
4
4
  Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
5
5
  Author-email: John Kurkowski <john.kurkowski@gmail.com>
6
6
  License: BSD-3-Clause
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
14
14
  Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
17
18
  Requires-Python: >=3.8
18
19
  Description-Content-Type: text/markdown
19
20
  License-File: LICENSE
@@ -21,8 +22,19 @@ Requires-Dist: idna
21
22
  Requires-Dist: requests>=2.1.0
22
23
  Requires-Dist: requests-file>=1.4
23
24
  Requires-Dist: filelock>=3.0.8
24
-
25
- # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://travis-ci.com/john-kurkowski/tldextract.svg?branch=master)](https://app.travis-ci.com/github/john-kurkowski/tldextract)
25
+ Provides-Extra: testing
26
+ Requires-Dist: black; extra == "testing"
27
+ Requires-Dist: mypy; extra == "testing"
28
+ Requires-Dist: pytest; extra == "testing"
29
+ Requires-Dist: pytest-gitignore; extra == "testing"
30
+ Requires-Dist: pytest-mock; extra == "testing"
31
+ Requires-Dist: responses; extra == "testing"
32
+ Requires-Dist: ruff; extra == "testing"
33
+ Requires-Dist: tox; extra == "testing"
34
+ Requires-Dist: types-filelock; extra == "testing"
35
+ Requires-Dist: types-requests; extra == "testing"
36
+
37
+ # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml/badge.svg)](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
26
38
 
27
39
  `tldextract` accurately separates a URL's subdomain, domain, and public suffix,
28
40
  using [the Public Suffix List (PSL)](https://publicsuffix.org).
@@ -210,7 +222,7 @@ If you want to use input data from your local filesystem, just use the `file://`
210
222
 
211
223
  ```python
212
224
  extract = tldextract.TLDExtract(
213
- suffix_list_urls=["file://absolute/path/to/your/local/suffix/list/file"],
225
+ suffix_list_urls=["file://" + "/absolute/path/to/your/local/suffix/list/file"],
214
226
  cache_dir='/path/to/your/cache/',
215
227
  fallback_to_snapshot=False)
216
228
  ```
@@ -271,7 +283,7 @@ receiving exceptions or error metadata on results.
271
283
 
272
284
  1. `git clone` this repository.
273
285
  2. Change into the new directory.
274
- 3. `pip install tox`
286
+ 3. `pip install --upgrade --editable '.[testing]'`
275
287
 
276
288
  ### Running the test suite
277
289
 
@@ -293,6 +305,5 @@ tox -e py311
293
305
  Automatically format all code:
294
306
 
295
307
  ```zsh
296
- pip install black
297
308
  black .
298
309
  ```
@@ -1,4 +1,4 @@
1
- # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://travis-ci.com/john-kurkowski/tldextract.svg?branch=master)](https://app.travis-ci.com/github/john-kurkowski/tldextract)
1
+ # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml/badge.svg)](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
2
2
 
3
3
  `tldextract` accurately separates a URL's subdomain, domain, and public suffix,
4
4
  using [the Public Suffix List (PSL)](https://publicsuffix.org).
@@ -186,7 +186,7 @@ If you want to use input data from your local filesystem, just use the `file://`
186
186
 
187
187
  ```python
188
188
  extract = tldextract.TLDExtract(
189
- suffix_list_urls=["file://absolute/path/to/your/local/suffix/list/file"],
189
+ suffix_list_urls=["file://" + "/absolute/path/to/your/local/suffix/list/file"],
190
190
  cache_dir='/path/to/your/cache/',
191
191
  fallback_to_snapshot=False)
192
192
  ```
@@ -247,7 +247,7 @@ receiving exceptions or error metadata on results.
247
247
 
248
248
  1. `git clone` this repository.
249
249
  2. Change into the new directory.
250
- 3. `pip install tox`
250
+ 3. `pip install --upgrade --editable '.[testing]'`
251
251
 
252
252
  ### Running the test suite
253
253
 
@@ -269,6 +269,5 @@ tox -e py311
269
269
  Automatically format all code:
270
270
 
271
271
  ```zsh
272
- pip install black
273
272
  black .
274
273
  ```
@@ -27,16 +27,32 @@ classifiers = [
27
27
  "Programming Language :: Python :: 3.9",
28
28
  "Programming Language :: Python :: 3.10",
29
29
  "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
30
31
  ]
31
32
  requires-python = ">=3.8"
33
+ dynamic = ["version"]
34
+ readme = "README.md"
35
+
32
36
  dependencies = [
33
37
  "idna",
34
38
  "requests>=2.1.0",
35
39
  "requests-file>=1.4",
36
40
  "filelock>=3.0.8",
37
41
  ]
38
- dynamic = ["version"]
39
- readme = "README.md"
42
+
43
+ [project.optional-dependencies]
44
+ testing = [
45
+ "black",
46
+ "mypy",
47
+ "pytest",
48
+ "pytest-gitignore",
49
+ "pytest-mock",
50
+ "responses",
51
+ "ruff",
52
+ "tox",
53
+ "types-filelock",
54
+ "types-requests",
55
+ ]
40
56
 
41
57
  [project.urls]
42
58
  Homepage = "https://github.com/john-kurkowski/tldextract"
@@ -1,5 +1,6 @@
1
1
  """tldextract integration tests."""
2
2
 
3
+ import json
3
4
  import sys
4
5
 
5
6
  import pytest
@@ -63,3 +64,25 @@ def test_cli_namedargs(
63
64
  stdout, stderr = capsys.readouterr()
64
65
  assert not stderr
65
66
  assert stdout == " example com\n bbc co.uk\nforums bbc co.uk\n"
67
+
68
+
69
+ def test_cli_json_output(
70
+ capsys: pytest.CaptureFixture[str], monkeypatch: pytest.MonkeyPatch
71
+ ) -> None:
72
+ """Test CLI with --json option."""
73
+ monkeypatch.setattr(sys, "argv", ["tldextract", "--json", "www.bbc.co.uk"])
74
+
75
+ main()
76
+
77
+ stdout, stderr = capsys.readouterr()
78
+ assert not stderr
79
+ assert json.loads(stdout) == {
80
+ "subdomain": "www",
81
+ "domain": "bbc",
82
+ "suffix": "co.uk",
83
+ "fqdn": "www.bbc.co.uk",
84
+ "ipv4": "",
85
+ "ipv6": "",
86
+ "is_private": False,
87
+ "registered_domain": "bbc.co.uk",
88
+ }
@@ -2,13 +2,17 @@
2
2
 
3
3
  import os
4
4
  import tempfile
5
+ from pathlib import Path
5
6
 
6
7
  import tldextract
7
8
  from tldextract.tldextract import ExtractResult
8
9
 
9
- FAKE_SUFFIX_LIST_URL = "file://" + os.path.join(
10
- os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat"
11
- )
10
+ FAKE_SUFFIX_LIST_URL = Path(
11
+ os.path.dirname(os.path.abspath(__file__)),
12
+ "fixtures",
13
+ "fake_suffix_list_fixture.dat",
14
+ ).as_uri()
15
+
12
16
  EXTRA_SUFFIXES = ["foo1", "bar1", "baz1"]
13
17
 
14
18
  extract_using_fake_suffix_list = tldextract.TLDExtract(
@@ -8,6 +8,7 @@ import tempfile
8
8
  from collections.abc import Sequence
9
9
  from pathlib import Path
10
10
  from typing import Any
11
+ from unittest.mock import Mock
11
12
 
12
13
  import pytest
13
14
  import pytest_mock
@@ -449,6 +450,34 @@ def test_cache_timeouts(tmp_path: Path) -> None:
449
450
  tldextract.suffix_list.find_first_response(cache, [server], 5)
450
451
 
451
452
 
453
+ @responses.activate
454
+ def test_find_first_response_without_session(tmp_path: Path) -> None:
455
+ """Test it is able to find first response without session passed in."""
456
+ server = "http://some-server.com"
457
+ response_text = "server response"
458
+ responses.add(responses.GET, server, status=200, body=response_text)
459
+ cache = DiskCache(str(tmp_path))
460
+
461
+ result = tldextract.suffix_list.find_first_response(cache, [server], 5)
462
+ assert result == response_text
463
+
464
+
465
+ def test_find_first_response_with_session(tmp_path: Path) -> None:
466
+ """Test it is able to find first response with passed in session."""
467
+ server = "http://some-server.com"
468
+ response_text = "server response"
469
+ cache = DiskCache(str(tmp_path))
470
+ mock_session = Mock()
471
+ mock_session.get.return_value.text = response_text
472
+
473
+ result = tldextract.suffix_list.find_first_response(
474
+ cache, [server], 5, mock_session
475
+ )
476
+ assert result == response_text
477
+ mock_session.get.assert_called_once_with(server, timeout=5)
478
+ mock_session.close.assert_not_called()
479
+
480
+
452
481
  def test_include_psl_private_domain_attr() -> None:
453
482
  """Test private domains, which default to not being treated differently."""
454
483
  extract_private = tldextract.TLDExtract(include_psl_private_domains=True)
@@ -1,7 +1,6 @@
1
1
  """Test the caching functionality."""
2
2
  from __future__ import annotations
3
3
 
4
- import os.path
5
4
  import sys
6
5
  import types
7
6
  from collections.abc import Hashable
@@ -56,14 +55,14 @@ def test_get_cache_dir(monkeypatch: pytest.MonkeyPatch) -> None:
56
55
  monkeypatch.delenv("HOME", raising=False)
57
56
  monkeypatch.delenv("XDG_CACHE_HOME", raising=False)
58
57
  monkeypatch.delenv("TLDEXTRACT_CACHE", raising=False)
59
- assert get_cache_dir().endswith("tldextract/.suffix_cache/")
58
+ assert get_cache_dir().endswith(str(Path("tldextract", ".suffix_cache")))
60
59
 
61
60
  # with home set, but not anything else specified, use XDG_CACHE_HOME default
62
61
  monkeypatch.setenv("HOME", "/home/john")
63
62
  monkeypatch.delenv("XDG_CACHE_HOME", raising=False)
64
63
  monkeypatch.delenv("TLDEXTRACT_CACHE", raising=False)
65
- assert get_cache_dir() == os.path.join(
66
- "/home/john", ".cache/python-tldextract", pkg_identifier
64
+ assert get_cache_dir() == str(
65
+ Path("/home/john", ".cache/python-tldextract", pkg_identifier)
67
66
  )
68
67
 
69
68
  # if XDG_CACHE_HOME is set, use it
@@ -71,8 +70,8 @@ def test_get_cache_dir(monkeypatch: pytest.MonkeyPatch) -> None:
71
70
  monkeypatch.setenv("XDG_CACHE_HOME", "/my/alt/cache")
72
71
  monkeypatch.delenv("TLDEXTRACT_CACHE", raising=False)
73
72
 
74
- assert get_cache_dir() == os.path.join(
75
- "/my/alt/cache/python-tldextract", pkg_identifier
73
+ assert get_cache_dir() == str(
74
+ Path("/my/alt/cache/python-tldextract", pkg_identifier)
76
75
  )
77
76
 
78
77
  # if TLDEXTRACT_CACHE is set, use it
@@ -1,6 +1,8 @@
1
1
  """Test ability to run in parallel with shared cache."""
2
+
3
+ from __future__ import annotations
4
+
2
5
  import os
3
- import os.path
4
6
  from multiprocessing import Pool
5
7
  from pathlib import Path
6
8
 
@@ -19,14 +21,15 @@ def test_multiprocessing_makes_one_request(tmp_path: Path) -> None:
19
21
  assert sum(http_request_counts) == 1
20
22
 
21
23
 
22
- @responses.activate
23
24
  def _run_extractor(cache_dir: Path) -> int:
24
25
  """Run the extractor."""
25
- responses.add(responses.GET, PUBLIC_SUFFIX_LIST_URLS[0], status=208, body="uk.co")
26
- extract = TLDExtract(cache_dir=str(cache_dir))
26
+ with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
27
+ rsps.add(responses.GET, PUBLIC_SUFFIX_LIST_URLS[0], status=208, body="uk.co")
28
+ extract = TLDExtract(cache_dir=str(cache_dir))
27
29
 
28
- extract("bar.uk.com", include_psl_private_domains=True)
29
- return len(responses.calls)
30
+ extract("bar.uk.com", include_psl_private_domains=True)
31
+ num_calls = len(rsps.calls)
32
+ return num_calls
30
33
 
31
34
 
32
35
  @responses.activate
@@ -41,9 +44,23 @@ def test_cache_cleared_by_other_process(
41
44
  extract("google.com")
42
45
  orig_unlink = os.unlink
43
46
 
44
- def evil_unlink(filename: str) -> None:
47
+ def is_relative_to(path: Path, other_path: str | Path) -> bool:
48
+ """Return True if path is relative to other_path or False.
49
+
50
+ Taken from the Python 3.9 standard library.
51
+ Reference: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to
52
+ """
53
+ try:
54
+ path.relative_to(other_path)
55
+ return True
56
+ except ValueError:
57
+ return False
58
+
59
+ def evil_unlink(filename: str | Path) -> None:
45
60
  """Simulate someone deletes the file right before we try to."""
46
- if filename.startswith(cache_dir):
61
+ if (isinstance(filename, str) and filename.startswith(cache_dir)) or (
62
+ isinstance(filename, Path) and is_relative_to(filename, cache_dir)
63
+ ):
47
64
  orig_unlink(filename)
48
65
  orig_unlink(filename)
49
66
 
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '5.0.1'
16
- __version_tuple__ = version_tuple = (5, 0, 1)
15
+ __version__ = version = '5.1.1'
16
+ __version_tuple__ = version_tuple = (5, 1, 1)
@@ -6,9 +6,9 @@ import hashlib
6
6
  import json
7
7
  import logging
8
8
  import os
9
- import os.path
10
9
  import sys
11
10
  from collections.abc import Callable, Hashable, Iterable
11
+ from pathlib import Path
12
12
  from typing import (
13
13
  TypeVar,
14
14
  cast,
@@ -79,15 +79,15 @@ def get_cache_dir() -> str:
79
79
  if xdg_cache_home is None:
80
80
  user_home = os.getenv("HOME", None)
81
81
  if user_home:
82
- xdg_cache_home = os.path.join(user_home, ".cache")
82
+ xdg_cache_home = str(Path(user_home, ".cache"))
83
83
 
84
84
  if xdg_cache_home is not None:
85
- return os.path.join(
86
- xdg_cache_home, "python-tldextract", get_pkg_unique_identifier()
85
+ return str(
86
+ Path(xdg_cache_home, "python-tldextract", get_pkg_unique_identifier())
87
87
  )
88
88
 
89
89
  # fallback to trying to use package directory itself
90
- return os.path.join(os.path.dirname(__file__), ".suffix_cache/")
90
+ return str(Path(os.path.dirname(__file__), ".suffix_cache"))
91
91
 
92
92
 
93
93
  class DiskCache:
@@ -153,7 +153,7 @@ class DiskCache:
153
153
  self.file_ext + ".lock"
154
154
  ):
155
155
  try:
156
- os.unlink(os.path.join(root, filename))
156
+ os.unlink(str(Path(root, filename)))
157
157
  except FileNotFoundError:
158
158
  pass
159
159
  except OSError as exc:
@@ -165,10 +165,10 @@ class DiskCache:
165
165
  def _key_to_cachefile_path(
166
166
  self, namespace: str, key: str | dict[str, Hashable]
167
167
  ) -> str:
168
- namespace_path = os.path.join(self.cache_dir, namespace)
168
+ namespace_path = str(Path(self.cache_dir, namespace))
169
169
  hashed_key = _make_cache_key(key)
170
170
 
171
- cache_path = os.path.join(namespace_path, hashed_key + self.file_ext)
171
+ cache_path = str(Path(namespace_path, hashed_key + self.file_ext))
172
172
 
173
173
  return cache_path
174
174
 
@@ -1,7 +1,8 @@
1
1
  """tldextract CLI."""
2
2
 
3
-
4
3
  import argparse
4
+ import dataclasses
5
+ import json
5
6
  import logging
6
7
  import os.path
7
8
  import pathlib
@@ -22,6 +23,13 @@ def main() -> None:
22
23
  parser.add_argument(
23
24
  "--version", action="version", version="%(prog)s " + __version__
24
25
  )
26
+ parser.add_argument(
27
+ "-j",
28
+ "--json",
29
+ default=False,
30
+ action="store_true",
31
+ help="output in json format",
32
+ )
25
33
  parser.add_argument(
26
34
  "input", metavar="fqdn|url", type=str, nargs="*", help="fqdn or url"
27
35
  )
@@ -89,4 +97,15 @@ def main() -> None:
89
97
 
90
98
  for i in args.input:
91
99
  ext = tld_extract(i)
92
- print(f"{ext.subdomain} {ext.domain} {ext.suffix}")
100
+ if args.json:
101
+ properties = ("fqdn", "ipv4", "ipv6", "registered_domain")
102
+ print(
103
+ json.dumps(
104
+ {
105
+ **dataclasses.asdict(ext),
106
+ **{prop: getattr(ext, prop) for prop in properties},
107
+ }
108
+ )
109
+ )
110
+ else:
111
+ print(f"{ext.subdomain} {ext.domain} {ext.suffix}")
@@ -31,11 +31,16 @@ def find_first_response(
31
31
  cache: DiskCache,
32
32
  urls: Sequence[str],
33
33
  cache_fetch_timeout: float | int | None = None,
34
+ session: requests.Session | None = None,
34
35
  ) -> str:
35
36
  """Decode the first successfully fetched URL, from UTF-8 encoding to Python unicode."""
36
- with requests.Session() as session:
37
+ session_created = False
38
+ if session is None:
39
+ session = requests.Session()
37
40
  session.mount("file://", FileAdapter())
41
+ session_created = True
38
42
 
43
+ try:
39
44
  for url in urls:
40
45
  try:
41
46
  return cache.cached_fetch_url(
@@ -43,6 +48,11 @@ def find_first_response(
43
48
  )
44
49
  except requests.exceptions.RequestException:
45
50
  LOG.exception("Exception reading Public Suffix List url %s", url)
51
+ finally:
52
+ # Ensure the session is always closed if it's constructed in the method
53
+ if session_created:
54
+ session.close()
55
+
46
56
  raise SuffixListNotFound(
47
57
  "No remote Public Suffix List found. Consider using a mirror, or avoid this"
48
58
  " fetch by constructing your TLDExtract with `suffix_list_urls=()`."
@@ -65,6 +75,7 @@ def get_suffix_lists(
65
75
  urls: Sequence[str],
66
76
  cache_fetch_timeout: float | int | None,
67
77
  fallback_to_snapshot: bool,
78
+ session: requests.Session | None = None,
68
79
  ) -> tuple[list[str], list[str]]:
69
80
  """Fetch, parse, and cache the suffix lists."""
70
81
  return cache.run_and_cache(
@@ -75,6 +86,7 @@ def get_suffix_lists(
75
86
  "urls": urls,
76
87
  "cache_fetch_timeout": cache_fetch_timeout,
77
88
  "fallback_to_snapshot": fallback_to_snapshot,
89
+ "session": session,
78
90
  },
79
91
  hashed_argnames=["urls", "fallback_to_snapshot"],
80
92
  )
@@ -85,10 +97,13 @@ def _get_suffix_lists(
85
97
  urls: Sequence[str],
86
98
  cache_fetch_timeout: float | int | None,
87
99
  fallback_to_snapshot: bool,
100
+ session: requests.Session | None = None,
88
101
  ) -> tuple[list[str], list[str]]:
89
102
  """Fetch, parse, and cache the suffix lists."""
90
103
  try:
91
- text = find_first_response(cache, urls, cache_fetch_timeout=cache_fetch_timeout)
104
+ text = find_first_response(
105
+ cache, urls, cache_fetch_timeout=cache_fetch_timeout, session=session
106
+ )
92
107
  except SuffixListNotFound as exc:
93
108
  if fallback_to_snapshot:
94
109
  maybe_pkg_data = pkgutil.get_data("tldextract", ".tld_set_snapshot")
@@ -44,6 +44,7 @@ from dataclasses import dataclass
44
44
  from functools import wraps
45
45
 
46
46
  import idna
47
+ import requests
47
48
 
48
49
  from .cache import DiskCache, get_cache_dir
49
50
  from .remote import lenient_netloc, looks_like_ip, looks_like_ipv6
@@ -221,13 +222,19 @@ class TLDExtract:
221
222
  self._cache = DiskCache(cache_dir)
222
223
 
223
224
  def __call__(
224
- self, url: str, include_psl_private_domains: bool | None = None
225
+ self,
226
+ url: str,
227
+ include_psl_private_domains: bool | None = None,
228
+ session: requests.Session | None = None,
225
229
  ) -> ExtractResult:
226
230
  """Alias for `extract_str`."""
227
- return self.extract_str(url, include_psl_private_domains)
231
+ return self.extract_str(url, include_psl_private_domains, session=session)
228
232
 
229
233
  def extract_str(
230
- self, url: str, include_psl_private_domains: bool | None = None
234
+ self,
235
+ url: str,
236
+ include_psl_private_domains: bool | None = None,
237
+ session: requests.Session | None = None,
231
238
  ) -> ExtractResult:
232
239
  """Take a string URL and splits it into its subdomain, domain, and suffix components.
233
240
 
@@ -238,13 +245,27 @@ class TLDExtract:
238
245
  ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
239
246
  >>> extractor.extract_str('http://forums.bbc.co.uk/')
240
247
  ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)
248
+
249
+ Allows configuring the HTTP request via the optional `session`
250
+ parameter. For example, if you need to use a HTTP proxy. See also
251
+ `requests.Session`.
252
+
253
+ >>> import requests
254
+ >>> session = requests.Session()
255
+ >>> # customize your session here
256
+ >>> with session:
257
+ ... extractor.extract_str("http://forums.news.cnn.com/", session=session)
258
+ ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
241
259
  """
242
- return self._extract_netloc(lenient_netloc(url), include_psl_private_domains)
260
+ return self._extract_netloc(
261
+ lenient_netloc(url), include_psl_private_domains, session=session
262
+ )
243
263
 
244
264
  def extract_urllib(
245
265
  self,
246
266
  url: urllib.parse.ParseResult | urllib.parse.SplitResult,
247
267
  include_psl_private_domains: bool | None = None,
268
+ session: requests.Session | None = None,
248
269
  ) -> ExtractResult:
249
270
  """Take the output of urllib.parse URL parsing methods and further splits the parsed URL.
250
271
 
@@ -260,10 +281,15 @@ class TLDExtract:
260
281
  >>> extractor.extract_urllib(urllib.parse.urlsplit('http://forums.bbc.co.uk/'))
261
282
  ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)
262
283
  """
263
- return self._extract_netloc(url.netloc, include_psl_private_domains)
284
+ return self._extract_netloc(
285
+ url.netloc, include_psl_private_domains, session=session
286
+ )
264
287
 
265
288
  def _extract_netloc(
266
- self, netloc: str, include_psl_private_domains: bool | None
289
+ self,
290
+ netloc: str,
291
+ include_psl_private_domains: bool | None,
292
+ session: requests.Session | None = None,
267
293
  ) -> ExtractResult:
268
294
  netloc_with_ascii_dots = (
269
295
  netloc.replace("\u3002", "\u002e")
@@ -282,9 +308,9 @@ class TLDExtract:
282
308
 
283
309
  labels = netloc_with_ascii_dots.split(".")
284
310
 
285
- suffix_index, is_private = self._get_tld_extractor().suffix_index(
286
- labels, include_psl_private_domains=include_psl_private_domains
287
- )
311
+ suffix_index, is_private = self._get_tld_extractor(
312
+ session=session
313
+ ).suffix_index(labels, include_psl_private_domains=include_psl_private_domains)
288
314
 
289
315
  num_ipv4_labels = 4
290
316
  if suffix_index == len(labels) == num_ipv4_labels and looks_like_ip(
@@ -297,23 +323,27 @@ class TLDExtract:
297
323
  domain = labels[suffix_index - 1] if suffix_index else ""
298
324
  return ExtractResult(subdomain, domain, suffix, is_private)
299
325
 
300
- def update(self, fetch_now: bool = False) -> None:
326
+ def update(
327
+ self, fetch_now: bool = False, session: requests.Session | None = None
328
+ ) -> None:
301
329
  """Force fetch the latest suffix list definitions."""
302
330
  self._extractor = None
303
331
  self._cache.clear()
304
332
  if fetch_now:
305
- self._get_tld_extractor()
333
+ self._get_tld_extractor(session=session)
306
334
 
307
335
  @property
308
- def tlds(self) -> list[str]:
336
+ def tlds(self, session: requests.Session | None = None) -> list[str]:
309
337
  """
310
338
  Returns the list of tld's used by default.
311
339
 
312
340
  This will vary based on `include_psl_private_domains` and `extra_suffixes`
313
341
  """
314
- return list(self._get_tld_extractor().tlds())
342
+ return list(self._get_tld_extractor(session=session).tlds())
315
343
 
316
- def _get_tld_extractor(self) -> _PublicSuffixListTLDExtractor:
344
+ def _get_tld_extractor(
345
+ self, session: requests.Session | None = None
346
+ ) -> _PublicSuffixListTLDExtractor:
317
347
  """Get or compute this object's TLDExtractor.
318
348
 
319
349
  Looks up the TLDExtractor in roughly the following order, based on the
@@ -332,6 +362,7 @@ class TLDExtract:
332
362
  urls=self.suffix_list_urls,
333
363
  cache_fetch_timeout=self.cache_fetch_timeout,
334
364
  fallback_to_snapshot=self.fallback_to_snapshot,
365
+ session=session,
335
366
  )
336
367
 
337
368
  if not any([public_tlds, private_tlds, self.extra_suffixes]):
@@ -400,9 +431,13 @@ class Trie:
400
431
 
401
432
  @wraps(TLD_EXTRACTOR.__call__)
402
433
  def extract( # noqa: D103
403
- url: str, include_psl_private_domains: bool | None = False
434
+ url: str,
435
+ include_psl_private_domains: bool | None = False,
436
+ session: requests.Session | None = None,
404
437
  ) -> ExtractResult:
405
- return TLD_EXTRACTOR(url, include_psl_private_domains=include_psl_private_domains)
438
+ return TLD_EXTRACTOR(
439
+ url, include_psl_private_domains=include_psl_private_domains, session=session
440
+ )
406
441
 
407
442
 
408
443
  @wraps(TLD_EXTRACTOR.update)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tldextract
3
- Version: 5.0.1
3
+ Version: 5.1.1
4
4
  Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
5
5
  Author-email: John Kurkowski <john.kurkowski@gmail.com>
6
6
  License: BSD-3-Clause
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
14
14
  Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
17
18
  Requires-Python: >=3.8
18
19
  Description-Content-Type: text/markdown
19
20
  License-File: LICENSE
@@ -21,8 +22,19 @@ Requires-Dist: idna
21
22
  Requires-Dist: requests>=2.1.0
22
23
  Requires-Dist: requests-file>=1.4
23
24
  Requires-Dist: filelock>=3.0.8
24
-
25
- # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://travis-ci.com/john-kurkowski/tldextract.svg?branch=master)](https://app.travis-ci.com/github/john-kurkowski/tldextract)
25
+ Provides-Extra: testing
26
+ Requires-Dist: black; extra == "testing"
27
+ Requires-Dist: mypy; extra == "testing"
28
+ Requires-Dist: pytest; extra == "testing"
29
+ Requires-Dist: pytest-gitignore; extra == "testing"
30
+ Requires-Dist: pytest-mock; extra == "testing"
31
+ Requires-Dist: responses; extra == "testing"
32
+ Requires-Dist: ruff; extra == "testing"
33
+ Requires-Dist: tox; extra == "testing"
34
+ Requires-Dist: types-filelock; extra == "testing"
35
+ Requires-Dist: types-requests; extra == "testing"
36
+
37
+ # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml/badge.svg)](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
26
38
 
27
39
  `tldextract` accurately separates a URL's subdomain, domain, and public suffix,
28
40
  using [the Public Suffix List (PSL)](https://publicsuffix.org).
@@ -210,7 +222,7 @@ If you want to use input data from your local filesystem, just use the `file://`
210
222
 
211
223
  ```python
212
224
  extract = tldextract.TLDExtract(
213
- suffix_list_urls=["file://absolute/path/to/your/local/suffix/list/file"],
225
+ suffix_list_urls=["file://" + "/absolute/path/to/your/local/suffix/list/file"],
214
226
  cache_dir='/path/to/your/cache/',
215
227
  fallback_to_snapshot=False)
216
228
  ```
@@ -271,7 +283,7 @@ receiving exceptions or error metadata on results.
271
283
 
272
284
  1. `git clone` this repository.
273
285
  2. Change into the new directory.
274
- 3. `pip install tox`
286
+ 3. `pip install --upgrade --editable '.[testing]'`
275
287
 
276
288
  ### Running the test suite
277
289
 
@@ -293,6 +305,5 @@ tox -e py311
293
305
  Automatically format all code:
294
306
 
295
307
  ```zsh
296
- pip install black
297
308
  black .
298
309
  ```
@@ -1,11 +1,11 @@
1
1
  .gitignore
2
- .travis.yml
3
2
  CHANGELOG.md
4
3
  LICENSE
5
4
  README.md
6
5
  pyproject.toml
7
6
  tox.ini
8
7
  .github/FUNDING.yml
8
+ .github/workflows/ci.yml
9
9
  tests/__init__.py
10
10
  tests/cli_test.py
11
11
  tests/conftest.py
@@ -0,0 +1,16 @@
1
+ idna
2
+ requests>=2.1.0
3
+ requests-file>=1.4
4
+ filelock>=3.0.8
5
+
6
+ [testing]
7
+ black
8
+ mypy
9
+ pytest
10
+ pytest-gitignore
11
+ pytest-mock
12
+ responses
13
+ ruff
14
+ tox
15
+ types-filelock
16
+ types-requests
@@ -1,35 +1,22 @@
1
1
  [tox]
2
- envlist = py{38,39,310,311,py3},codestyle,lint,typecheck
2
+ envlist = py{38,39,310,311,312,py38},codestyle,lint,typecheck
3
3
 
4
4
  [testenv]
5
- deps =
6
- pytest
7
- pytest-gitignore
8
- pytest-mock
9
- responses
10
5
  commands = pytest {posargs}
6
+ extras = testing
11
7
 
12
8
  [testenv:codestyle]
13
9
  basepython = python3.8
14
- deps =
15
- black
16
10
  commands =
17
11
  black --check {posargs:.}
12
+ extras = testing
18
13
 
19
14
  [testenv:lint]
20
15
  basepython = python3.8
21
- deps =
22
- ruff
23
16
  commands = ruff check {posargs:.}
17
+ extras = testing
24
18
 
25
19
  [testenv:typecheck]
26
20
  basepython = python3.8
27
- deps =
28
- mypy
29
- pytest
30
- pytest-gitignore
31
- pytest-mock
32
- responses
33
- types-filelock
34
- types-requests
35
21
  commands = mypy --show-error-codes tldextract tests
22
+ extras = testing
@@ -1,21 +0,0 @@
1
- dist: focal
2
- language: python
3
- matrix:
4
- include:
5
- - python: "3.8"
6
- env: TOXENV=py38
7
- - python: "3.9"
8
- env: TOXENV=py39
9
- - python: "3.10"
10
- env: TOXENV=py310
11
- - python: "3.11"
12
- env: TOXENV=py311
13
- - python: pypy3.8-7.3.9
14
- dist: xenial
15
- env: TOXENV=pypy3
16
- - env: TOXENV=codestyle
17
- - env: TOXENV=lint
18
- - env: TOXENV=typecheck
19
- python: "3.10"
20
- install: pip install tox
21
- script: tox
@@ -1,4 +0,0 @@
1
- idna
2
- requests>=2.1.0
3
- requests-file>=1.4
4
- filelock>=3.0.8
File without changes
File without changes
File without changes
File without changes
File without changes