tldextract 5.1.2__py3-none-any.whl → 5.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tldextract/.tld_set_snapshot +4390 -2405
- tldextract/_version.py +2 -2
- tldextract/cache.py +5 -16
- tldextract/suffix_list.py +3 -1
- tldextract/tldextract.py +11 -12
- {tldextract-5.1.2.dist-info → tldextract-5.1.3.dist-info}/METADATA +27 -18
- tldextract-5.1.3.dist-info/RECORD +16 -0
- {tldextract-5.1.2.dist-info → tldextract-5.1.3.dist-info}/WHEEL +1 -1
- tldextract-5.1.2.dist-info/RECORD +0 -16
- {tldextract-5.1.2.dist-info → tldextract-5.1.3.dist-info}/LICENSE +0 -0
- {tldextract-5.1.2.dist-info → tldextract-5.1.3.dist-info}/entry_points.txt +0 -0
- {tldextract-5.1.2.dist-info → tldextract-5.1.3.dist-info}/top_level.txt +0 -0
tldextract/_version.py
CHANGED
tldextract/cache.py
CHANGED
@@ -24,18 +24,6 @@ _DID_LOG_UNABLE_TO_CACHE = False
|
|
24
24
|
|
25
25
|
T = TypeVar("T")
|
26
26
|
|
27
|
-
if sys.version_info >= (3, 9):
|
28
|
-
|
29
|
-
def md5(*args: bytes) -> hashlib._Hash:
|
30
|
-
"""Use argument only available in newer Python.
|
31
|
-
|
32
|
-
In this file, MD5 is only used for cache location, not security.
|
33
|
-
"""
|
34
|
-
return hashlib.md5(*args, usedforsecurity=False)
|
35
|
-
|
36
|
-
else:
|
37
|
-
md5 = hashlib.md5
|
38
|
-
|
39
27
|
|
40
28
|
def get_pkg_unique_identifier() -> str:
|
41
29
|
"""Generate an identifier unique to the python version, tldextract version, and python instance.
|
@@ -51,7 +39,9 @@ def get_pkg_unique_identifier() -> str:
|
|
51
39
|
tldextract_version = "tldextract-" + version
|
52
40
|
python_env_name = os.path.basename(sys.prefix)
|
53
41
|
# just to handle the edge case of two identically named python environments
|
54
|
-
python_binary_path_short_hash = md5(
|
42
|
+
python_binary_path_short_hash = hashlib.md5(
|
43
|
+
sys.prefix.encode("utf-8"), usedforsecurity=False
|
44
|
+
).hexdigest()[:6]
|
55
45
|
python_version = ".".join([str(v) for v in sys.version_info[:-1]])
|
56
46
|
identifier_parts = [
|
57
47
|
python_version,
|
@@ -113,8 +103,7 @@ class DiskCache:
|
|
113
103
|
with open(cache_filepath) as cache_file:
|
114
104
|
return json.load(cache_file)
|
115
105
|
except (OSError, ValueError) as exc:
|
116
|
-
|
117
|
-
raise KeyError("namespace: " + namespace + " key: " + repr(key)) from None
|
106
|
+
raise KeyError("namespace: " + namespace + " key: " + repr(key)) from exc
|
118
107
|
|
119
108
|
def set( # noqa: A003
|
120
109
|
self, namespace: str, key: str | dict[str, Hashable], value: object
|
@@ -238,7 +227,7 @@ def _fetch_url(session: requests.Session, url: str, timeout: int | None) -> str:
|
|
238
227
|
|
239
228
|
def _make_cache_key(inputs: str | dict[str, Hashable]) -> str:
|
240
229
|
key = repr(inputs)
|
241
|
-
return md5(key.encode("utf8")).hexdigest()
|
230
|
+
return hashlib.md5(key.encode("utf8"), usedforsecurity=False).hexdigest()
|
242
231
|
|
243
232
|
|
244
233
|
def _make_dir(filename: str) -> None:
|
tldextract/suffix_list.py
CHANGED
@@ -47,7 +47,9 @@ def find_first_response(
|
|
47
47
|
session=session, url=url, timeout=cache_fetch_timeout
|
48
48
|
)
|
49
49
|
except requests.exceptions.RequestException:
|
50
|
-
LOG.
|
50
|
+
LOG.warning(
|
51
|
+
"Exception reading Public Suffix List url %s", url, exc_info=True
|
52
|
+
)
|
51
53
|
finally:
|
52
54
|
# Ensure the session is always closed if it's constructed in the method
|
53
55
|
if session_created:
|
tldextract/tldextract.py
CHANGED
@@ -36,7 +36,6 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
|
|
36
36
|
|
37
37
|
from __future__ import annotations
|
38
38
|
|
39
|
-
import logging
|
40
39
|
import os
|
41
40
|
import urllib.parse
|
42
41
|
from collections.abc import Collection, Sequence
|
@@ -50,9 +49,6 @@ from .cache import DiskCache, get_cache_dir
|
|
50
49
|
from .remote import lenient_netloc, looks_like_ip, looks_like_ipv6
|
51
50
|
from .suffix_list import get_suffix_lists
|
52
51
|
|
53
|
-
LOG = logging.getLogger("tldextract")
|
54
|
-
|
55
|
-
|
56
52
|
CACHE_TIMEOUT = os.environ.get("TLDEXTRACT_CACHE_TIMEOUT")
|
57
53
|
|
58
54
|
PUBLIC_SUFFIX_LIST_URLS = (
|
@@ -161,13 +157,14 @@ class TLDExtract:
|
|
161
157
|
the `cache_dir` will live in the tldextract directory. You can disable
|
162
158
|
the caching functionality of this module by setting `cache_dir` to `None`.
|
163
159
|
|
164
|
-
If the cached version does not exist
|
165
|
-
`suffix_list_urls` in order,
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
160
|
+
If the cached version does not exist, such as on the first run, HTTP
|
161
|
+
request the URLs in `suffix_list_urls` in order, and use the first
|
162
|
+
successful response for public suffix definitions. Subsequent, untried
|
163
|
+
URLs are ignored. The default URLs are the latest version of the
|
164
|
+
Mozilla Public Suffix List and its mirror, but any similar document URL
|
165
|
+
could be specified. Local files can be specified by using the `file://`
|
166
|
+
protocol (see `urllib2` documentation). To disable HTTP requests, set
|
167
|
+
this to an empty sequence.
|
171
168
|
|
172
169
|
If there is no cached version loaded and no data is found from the `suffix_list_urls`,
|
173
170
|
the module will fall back to the included TLD set snapshot. If you do not want
|
@@ -179,7 +176,9 @@ class TLDExtract:
|
|
179
176
|
suffix, so these domains are excluded by default. If you'd like them
|
180
177
|
included instead, set `include_psl_private_domains` to True.
|
181
178
|
|
182
|
-
You can
|
179
|
+
You can specify additional suffixes in the `extra_suffixes` argument.
|
180
|
+
These will be merged into whatever public suffix definitions are
|
181
|
+
already in use by `tldextract`, above.
|
183
182
|
|
184
183
|
cache_fetch_timeout is passed unmodified to the underlying request object
|
185
184
|
per the requests documentation here:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tldextract
|
3
|
-
Version: 5.1.
|
3
|
+
Version: 5.1.3
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Author-email: John Kurkowski <john.kurkowski@gmail.com>
|
6
6
|
License: BSD-3-Clause
|
@@ -10,12 +10,12 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Topic :: Utilities
|
11
11
|
Classifier: License :: OSI Approved :: BSD License
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
14
13
|
Classifier: Programming Language :: Python :: 3.9
|
15
14
|
Classifier: Programming Language :: Python :: 3.10
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
18
|
-
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
18
|
+
Requires-Python: >=3.9
|
19
19
|
Description-Content-Type: text/markdown
|
20
20
|
License-File: LICENSE
|
21
21
|
Requires-Dist: idna
|
@@ -26,7 +26,6 @@ Provides-Extra: release
|
|
26
26
|
Requires-Dist: build ; extra == 'release'
|
27
27
|
Requires-Dist: twine ; extra == 'release'
|
28
28
|
Provides-Extra: testing
|
29
|
-
Requires-Dist: black ; extra == 'testing'
|
30
29
|
Requires-Dist: mypy ; extra == 'testing'
|
31
30
|
Requires-Dist: pytest ; extra == 'testing'
|
32
31
|
Requires-Dist: pytest-gitignore ; extra == 'testing'
|
@@ -35,6 +34,7 @@ Requires-Dist: responses ; extra == 'testing'
|
|
35
34
|
Requires-Dist: ruff ; extra == 'testing'
|
36
35
|
Requires-Dist: syrupy ; extra == 'testing'
|
37
36
|
Requires-Dist: tox ; extra == 'testing'
|
37
|
+
Requires-Dist: tox-uv ; extra == 'testing'
|
38
38
|
Requires-Dist: types-filelock ; extra == 'testing'
|
39
39
|
Requires-Dist: types-requests ; extra == 'testing'
|
40
40
|
|
@@ -129,8 +129,8 @@ tldextract http://forums.bbc.co.uk
|
|
129
129
|
|
130
130
|
Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
|
131
131
|
request. This updated TLD set is usually cached indefinitely in `$HOME/.cache/python-tldextract`.
|
132
|
-
To control the cache's location, set TLDEXTRACT_CACHE environment variable or set the
|
133
|
-
cache_dir path
|
132
|
+
To control the cache's location, set the `TLDEXTRACT_CACHE` environment variable or set the
|
133
|
+
`cache_dir` path when constructing a `TLDExtract`.
|
134
134
|
|
135
135
|
(Arguably runtime bootstrapping like that shouldn't be the default behavior,
|
136
136
|
like for production systems. But I want you to have the latest TLDs, especially
|
@@ -219,10 +219,12 @@ extract = tldextract.TLDExtract(
|
|
219
219
|
fallback_to_snapshot=False)
|
220
220
|
```
|
221
221
|
|
222
|
-
|
223
|
-
|
222
|
+
If the cached version of public suffix definitions doesn't exist, such as on
|
223
|
+
the first run, the above snippet will request the URLs you specified in order,
|
224
|
+
and use the first successful response.
|
224
225
|
|
225
|
-
If you want to use input data from your local filesystem,
|
226
|
+
If you want to use input data from your local filesystem, use the `file://`
|
227
|
+
protocol with an absolute path:
|
226
228
|
|
227
229
|
```python
|
228
230
|
extract = tldextract.TLDExtract(
|
@@ -231,17 +233,24 @@ extract = tldextract.TLDExtract(
|
|
231
233
|
fallback_to_snapshot=False)
|
232
234
|
```
|
233
235
|
|
234
|
-
|
235
|
-
`os.path` is your friend.
|
236
|
-
|
237
|
-
The command line update command can be used with a URL or local file you specify:
|
236
|
+
This also works via command line update:
|
238
237
|
|
239
238
|
```zsh
|
240
239
|
tldextract --update --suffix_list_url "http://foo.bar.baz"
|
241
240
|
```
|
242
241
|
|
243
|
-
|
244
|
-
list on first use, or if you are behind a complex
|
242
|
+
Using your own URLs could be useful in production when you don't want the delay
|
243
|
+
with updating the suffix list on first use, or if you are behind a complex
|
244
|
+
firewall.
|
245
|
+
|
246
|
+
You can also specify additional suffixes in the `extra_suffixes` param. These
|
247
|
+
will be merged into whatever public suffix definitions are already in use by
|
248
|
+
`tldextract`.
|
249
|
+
|
250
|
+
```python
|
251
|
+
extract = tldextract.TLDExtract(
|
252
|
+
extra_suffixes=["foo", "bar", "baz"])
|
253
|
+
```
|
245
254
|
|
246
255
|
## FAQ
|
247
256
|
|
@@ -250,9 +259,9 @@ list on first use, or if you are behind a complex firewall that prevents a simpl
|
|
250
259
|
This project doesn't contain an actual list of public suffixes. That comes from
|
251
260
|
[the Public Suffix List (PSL)](https://publicsuffix.org/). Submit amendments there.
|
252
261
|
|
253
|
-
|
262
|
+
In the meantime, you can tell tldextract about your exception by either
|
254
263
|
forking the PSL and using your fork in the `suffix_list_urls` param, or adding
|
255
|
-
your suffix piecemeal with the `extra_suffixes` param.
|
264
|
+
your suffix piecemeal with the `extra_suffixes` param.
|
256
265
|
|
257
266
|
### I see my suffix in [the Public Suffix List (PSL)](https://publicsuffix.org/), but this library doesn't extract it.
|
258
267
|
|
@@ -309,5 +318,5 @@ tox -e py311
|
|
309
318
|
Automatically format all code:
|
310
319
|
|
311
320
|
```zsh
|
312
|
-
|
321
|
+
ruff format .
|
313
322
|
```
|
@@ -0,0 +1,16 @@
|
|
1
|
+
tldextract/.tld_set_snapshot,sha256=ka7u1d_ISlMmXZXPuw57tS347XWiSidWiBfNaVeKL4A,316061
|
2
|
+
tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
|
3
|
+
tldextract/__main__.py,sha256=oiZ5EW_lxRLH6Khk6MdzXf7a1Ld5-A3k4wOFRmNNk2o,89
|
4
|
+
tldextract/_version.py,sha256=VdKaCcy2qOG-0Ob0KB_SDtakGRI7V4T0oP9kN9zPqhE,411
|
5
|
+
tldextract/cache.py,sha256=nrT9VuLmrjHHFxj-Cai97IyUXXenCX6KbHi07mPkzMc,8289
|
6
|
+
tldextract/cli.py,sha256=nCzBAFrgAopTK1t5eBRQgeveSgWheUx4LAlAHE_8mzQ,3010
|
7
|
+
tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
tldextract/remote.py,sha256=sklRFbATwPs_S33-KeIu9ixuSWP5w7QXO8jnhi_lgJs,1944
|
9
|
+
tldextract/suffix_list.py,sha256=ePH6iOIUBe0JE_mc07a34Nd7tFyfmHgP_mJkFhxzr7c,3947
|
10
|
+
tldextract/tldextract.py,sha256=vlCp78tE3R7P__RmQusIz0T9_lq5ZTWDvtFV-tAKPiU,18846
|
11
|
+
tldextract-5.1.3.dist-info/LICENSE,sha256=dKIruBYZ9wJFoTWv8hvg2bhDv9TXDQ82u-0EERuGJYg,1527
|
12
|
+
tldextract-5.1.3.dist-info/METADATA,sha256=RBH5Qs1hdA36toKVJC8mxKNgSg7edeuKDSY52r096N8,11622
|
13
|
+
tldextract-5.1.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
14
|
+
tldextract-5.1.3.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
|
15
|
+
tldextract-5.1.3.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
|
16
|
+
tldextract-5.1.3.dist-info/RECORD,,
|
@@ -1,16 +0,0 @@
|
|
1
|
-
tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
|
2
|
-
tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
|
3
|
-
tldextract/__main__.py,sha256=oiZ5EW_lxRLH6Khk6MdzXf7a1Ld5-A3k4wOFRmNNk2o,89
|
4
|
-
tldextract/_version.py,sha256=iJQJoAO8HGnLsPBpH1rkF4KPbrYxIqs4qAXfUgzhRqQ,411
|
5
|
-
tldextract/cache.py,sha256=vsr4ERgNxmBO_mYwXLCMbRRKq1s-IDZZLXoaGIYXmBM,8601
|
6
|
-
tldextract/cli.py,sha256=nCzBAFrgAopTK1t5eBRQgeveSgWheUx4LAlAHE_8mzQ,3010
|
7
|
-
tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
tldextract/remote.py,sha256=sklRFbATwPs_S33-KeIu9ixuSWP5w7QXO8jnhi_lgJs,1944
|
9
|
-
tldextract/suffix_list.py,sha256=TcUpMTZwsicZn6_eHKqA4bjurQrKYde14P-4HT4s4yE,3896
|
10
|
-
tldextract/tldextract.py,sha256=oUYLJcgWmeika0teDq2nNI5UCSbAR0c3eosYslVJPUY,18731
|
11
|
-
tldextract-5.1.2.dist-info/LICENSE,sha256=dKIruBYZ9wJFoTWv8hvg2bhDv9TXDQ82u-0EERuGJYg,1527
|
12
|
-
tldextract-5.1.2.dist-info/METADATA,sha256=dkiY2wl_8M2guJ0MGhGi0YQ9OgZI4vGpJ0I9LMLSGyQ,11464
|
13
|
-
tldextract-5.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
14
|
-
tldextract-5.1.2.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
|
15
|
-
tldextract-5.1.2.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
|
16
|
-
tldextract-5.1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|