tldextract 5.1.2__py3-none-any.whl → 5.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tldextract/__init__.py CHANGED
@@ -1,12 +1,14 @@
1
1
  """Export tldextract's public interface."""
2
2
 
3
3
  from . import _version
4
- from .tldextract import TLDExtract, extract
4
+ from .tldextract import ExtractResult, TLDExtract, extract, update
5
5
 
6
6
  __version__: str = _version.version
7
7
 
8
8
  __all__ = [
9
+ "__version__",
9
10
  "extract",
11
+ "ExtractResult",
10
12
  "TLDExtract",
11
- "__version__",
13
+ "update",
12
14
  ]
tldextract/_version.py CHANGED
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '5.1.2'
16
- __version_tuple__ = version_tuple = (5, 1, 2)
20
+ __version__ = version = '5.2.0'
21
+ __version_tuple__ = version_tuple = (5, 2, 0)
tldextract/cache.py CHANGED
@@ -24,18 +24,6 @@ _DID_LOG_UNABLE_TO_CACHE = False
24
24
 
25
25
  T = TypeVar("T")
26
26
 
27
- if sys.version_info >= (3, 9):
28
-
29
- def md5(*args: bytes) -> hashlib._Hash:
30
- """Use argument only available in newer Python.
31
-
32
- In this file, MD5 is only used for cache location, not security.
33
- """
34
- return hashlib.md5(*args, usedforsecurity=False)
35
-
36
- else:
37
- md5 = hashlib.md5
38
-
39
27
 
40
28
  def get_pkg_unique_identifier() -> str:
41
29
  """Generate an identifier unique to the python version, tldextract version, and python instance.
@@ -51,7 +39,9 @@ def get_pkg_unique_identifier() -> str:
51
39
  tldextract_version = "tldextract-" + version
52
40
  python_env_name = os.path.basename(sys.prefix)
53
41
  # just to handle the edge case of two identically named python environments
54
- python_binary_path_short_hash = md5(sys.prefix.encode("utf-8")).hexdigest()[:6]
42
+ python_binary_path_short_hash = hashlib.md5(
43
+ sys.prefix.encode("utf-8"), usedforsecurity=False
44
+ ).hexdigest()[:6]
55
45
  python_version = ".".join([str(v) for v in sys.version_info[:-1]])
56
46
  identifier_parts = [
57
47
  python_version,
@@ -113,8 +103,7 @@ class DiskCache:
113
103
  with open(cache_filepath) as cache_file:
114
104
  return json.load(cache_file)
115
105
  except (OSError, ValueError) as exc:
116
- LOG.error("error reading TLD cache file %s: %s", cache_filepath, exc)
117
- raise KeyError("namespace: " + namespace + " key: " + repr(key)) from None
106
+ raise KeyError("namespace: " + namespace + " key: " + repr(key)) from exc
118
107
 
119
108
  def set( # noqa: A003
120
109
  self, namespace: str, key: str | dict[str, Hashable], value: object
@@ -238,7 +227,7 @@ def _fetch_url(session: requests.Session, url: str, timeout: int | None) -> str:
238
227
 
239
228
  def _make_cache_key(inputs: str | dict[str, Hashable]) -> str:
240
229
  key = repr(inputs)
241
- return md5(key.encode("utf8")).hexdigest()
230
+ return hashlib.md5(key.encode("utf8"), usedforsecurity=False).hexdigest()
242
231
 
243
232
 
244
233
  def _make_dir(filename: str) -> None:
tldextract/remote.py CHANGED
@@ -46,7 +46,7 @@ def _schemeless_url(url: str) -> str:
46
46
  return url[2:]
47
47
  if (
48
48
  double_slashes_start < 2
49
- or not url[double_slashes_start - 1] == ":"
49
+ or url[double_slashes_start - 1] != ":"
50
50
  or set(url[: double_slashes_start - 1]) - scheme_chars_set
51
51
  ):
52
52
  return url
tldextract/suffix_list.py CHANGED
@@ -47,7 +47,9 @@ def find_first_response(
47
47
  session=session, url=url, timeout=cache_fetch_timeout
48
48
  )
49
49
  except requests.exceptions.RequestException:
50
- LOG.exception("Exception reading Public Suffix List url %s", url)
50
+ LOG.warning(
51
+ "Exception reading Public Suffix List url %s", url, exc_info=True
52
+ )
51
53
  finally:
52
54
  # Ensure the session is always closed if it's constructed in the method
53
55
  if session_created:
tldextract/tldextract.py CHANGED
@@ -4,30 +4,30 @@ It does this via the Public Suffix List (PSL).
4
4
 
5
5
  >>> import tldextract
6
6
 
7
- >>> tldextract.extract('http://forums.news.cnn.com/')
7
+ >>> tldextract.extract("http://forums.news.cnn.com/")
8
8
  ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
9
9
 
10
- >>> tldextract.extract('http://forums.bbc.co.uk/') # United Kingdom
10
+ >>> tldextract.extract("http://forums.bbc.co.uk/") # United Kingdom
11
11
  ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)
12
12
 
13
- >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
13
+ >>> tldextract.extract("http://www.worldbank.org.kg/") # Kyrgyzstan
14
14
  ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
15
15
 
16
16
  Note subdomain and suffix are _optional_. Not all URL-like inputs have a
17
17
  subdomain or a valid suffix.
18
18
 
19
- >>> tldextract.extract('google.com')
19
+ >>> tldextract.extract("google.com")
20
20
  ExtractResult(subdomain='', domain='google', suffix='com', is_private=False)
21
21
 
22
- >>> tldextract.extract('google.notavalidsuffix')
22
+ >>> tldextract.extract("google.notavalidsuffix")
23
23
  ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_private=False)
24
24
 
25
- >>> tldextract.extract('http://127.0.0.1:8080/deployed/')
25
+ >>> tldextract.extract("http://127.0.0.1:8080/deployed/")
26
26
  ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
27
27
 
28
28
  To rejoin the original hostname, if it was indeed a valid, registered hostname:
29
29
 
30
- >>> ext = tldextract.extract('http://forums.bbc.co.uk')
30
+ >>> ext = tldextract.extract("http://forums.bbc.co.uk")
31
31
  >>> ext.registered_domain
32
32
  'bbc.co.uk'
33
33
  >>> ext.fqdn
@@ -36,7 +36,6 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
36
36
 
37
37
  from __future__ import annotations
38
38
 
39
- import logging
40
39
  import os
41
40
  import urllib.parse
42
41
  from collections.abc import Collection, Sequence
@@ -50,9 +49,6 @@ from .cache import DiskCache, get_cache_dir
50
49
  from .remote import lenient_netloc, looks_like_ip, looks_like_ipv6
51
50
  from .suffix_list import get_suffix_lists
52
51
 
53
- LOG = logging.getLogger("tldextract")
54
-
55
-
56
52
  CACHE_TIMEOUT = os.environ.get("TLDEXTRACT_CACHE_TIMEOUT")
57
53
 
58
54
  PUBLIC_SUFFIX_LIST_URLS = (
@@ -65,21 +61,48 @@ PUBLIC_SUFFIX_LIST_URLS = (
65
61
  class ExtractResult:
66
62
  """A URL's extracted subdomain, domain, and suffix.
67
63
 
68
- Also contains metadata, like a flag that indicates if the URL has a private suffix.
64
+ These first 3 fields are what most users of this library will care about.
65
+ They are the split, non-overlapping hostname components of the input URL.
66
+ They can be used to rebuild the original URL's hostname.
67
+
68
+ Beyond the first 3 fields, the class contains metadata fields, like a flag
69
+ that indicates if the input URL's suffix is from a private domain.
69
70
  """
70
71
 
71
72
  subdomain: str
73
+ """All subdomains beneath the domain of the input URL, if it contained any such subdomains, or else the empty string."""
74
+
72
75
  domain: str
76
+ """The topmost domain of the input URL, if it contained a domain name, or else everything hostname-like in the input.
77
+
78
+ If the input URL didn't contain a real domain name, the `suffix` field will
79
+ be empty, and this field will catch values like an IP address, or
80
+ private network hostnames like "localhost".
81
+ """
82
+
73
83
  suffix: str
84
+ """The public suffix of the input URL, if it contained one, or else the empty string.
85
+
86
+ If `include_psl_private_domains` was set to `False`, this field is the same
87
+ as `registry_suffix`, i.e. a domain under which people can register
88
+ subdomains through a registrar. If `include_psl_private_domains` was set to
89
+ `True`, this field may be a PSL private domain, like "blogspot.com".
90
+ """
91
+
74
92
  is_private: bool
93
+ """Whether the input URL belongs in the Public Suffix List's private domains.
94
+
95
+ If `include_psl_private_domains` was set to `False`, this field is always
96
+ `False`.
97
+ """
75
98
 
76
99
  @property
77
100
  def registered_domain(self) -> str:
78
- """Joins the domain and suffix fields with a dot, if they're both set.
101
+ """The `domain` and `suffix` fields joined with a dot, if they're both set, or else the empty string.
79
102
 
80
- >>> extract('http://forums.bbc.co.uk').registered_domain
103
+ >>> extract("http://forums.bbc.co.uk").registered_domain
81
104
  'bbc.co.uk'
82
- >>> extract('http://localhost:8080').registered_domain
105
+ >>> extract("http://localhost:8080").registered_domain
83
106
  ''
84
107
  """
85
108
  if self.suffix and self.domain:
@@ -88,11 +111,11 @@ class ExtractResult:
88
111
 
89
112
  @property
90
113
  def fqdn(self) -> str:
91
- """Returns a Fully Qualified Domain Name, if there is a proper domain/suffix.
114
+ """The Fully Qualified Domain Name (FQDN), if there is a proper `domain` and `suffix`, or else the empty string.
92
115
 
93
- >>> extract('http://forums.bbc.co.uk/path/to/file').fqdn
116
+ >>> extract("http://forums.bbc.co.uk/path/to/file").fqdn
94
117
  'forums.bbc.co.uk'
95
- >>> extract('http://localhost:8080').fqdn
118
+ >>> extract("http://localhost:8080").fqdn
96
119
  ''
97
120
  """
98
121
  if self.suffix and (self.domain or self.is_private):
@@ -101,13 +124,13 @@ class ExtractResult:
101
124
 
102
125
  @property
103
126
  def ipv4(self) -> str:
104
- """Returns the ipv4 if that is what the presented domain/url is.
127
+ """The IPv4 address, if that is what the input domain/URL was, or else the empty string.
105
128
 
106
- >>> extract('http://127.0.0.1/path/to/file').ipv4
129
+ >>> extract("http://127.0.0.1/path/to/file").ipv4
107
130
  '127.0.0.1'
108
- >>> extract('http://127.0.0.1.1/path/to/file').ipv4
131
+ >>> extract("http://127.0.0.1.1/path/to/file").ipv4
109
132
  ''
110
- >>> extract('http://256.1.1.1').ipv4
133
+ >>> extract("http://256.1.1.1").ipv4
111
134
  ''
112
135
  """
113
136
  if (
@@ -120,13 +143,17 @@ class ExtractResult:
120
143
 
121
144
  @property
122
145
  def ipv6(self) -> str:
123
- """Returns the ipv6 if that is what the presented domain/url is.
146
+ """The IPv6 address, if that is what the input domain/URL was, or else the empty string.
124
147
 
125
- >>> extract('http://[aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1]/path/to/file').ipv6
148
+ >>> extract(
149
+ ... "http://[aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1]/path/to/file"
150
+ ... ).ipv6
126
151
  'aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1'
127
- >>> extract('http://[aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1.1]/path/to/file').ipv6
152
+ >>> extract(
153
+ ... "http://[aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1.1]/path/to/file"
154
+ ... ).ipv6
128
155
  ''
129
- >>> extract('http://[aBcD:ef01:2345:6789:aBcD:ef01:256.0.0.1]').ipv6
156
+ >>> extract("http://[aBcD:ef01:2345:6789:aBcD:ef01:256.0.0.1]").ipv6
130
157
  ''
131
158
  """
132
159
  min_num_ipv6_chars = 4
@@ -141,6 +168,31 @@ class ExtractResult:
141
168
  return debracketed
142
169
  return ""
143
170
 
171
+ @property
172
+ def reverse_domain_name(self) -> str:
173
+ """The domain name in Reverse Domain Name Notation.
174
+
175
+ Joins extracted components of the input URL in reverse domain name
176
+ notation. The suffix is used as the leftmost component, followed by the
177
+ domain, then followed by the subdomain with its parts reversed.
178
+
179
+ Reverse Domain Name Notation is typically used to organize namespaces
180
+ for packages and plugins. Technically, a full reversal would reverse
181
+ the parts of the suffix, e.g. "co.uk" would become "uk.co", but this is
182
+ not done in practice when Reverse Domain Name Notation is called for.
183
+ So this property leaves the `suffix` part in its original order.
184
+
185
+ >>> extract("login.example.com").reverse_domain_name
186
+ 'com.example.login'
187
+
188
+ >>> extract("login.example.co.uk").reverse_domain_name
189
+ 'co.uk.example.login'
190
+ """
191
+ stack = [self.suffix, self.domain]
192
+ if self.subdomain:
193
+ stack.extend(reversed(self.subdomain.split(".")))
194
+ return ".".join(stack)
195
+
144
196
 
145
197
  class TLDExtract:
146
198
  """A callable for extracting, subdomain, domain, and suffix components from a URL."""
@@ -161,13 +213,14 @@ class TLDExtract:
161
213
  the `cache_dir` will live in the tldextract directory. You can disable
162
214
  the caching functionality of this module by setting `cache_dir` to `None`.
163
215
 
164
- If the cached version does not exist (such as on the first run), HTTP request the URLs in
165
- `suffix_list_urls` in order, until one returns public suffix list data. To disable HTTP
166
- requests, set this to an empty sequence.
167
-
168
- The default list of URLs point to the latest version of the Mozilla Public Suffix List and
169
- its mirror, but any similar document could be specified. Local files can be specified by
170
- using the `file://` protocol. (See `urllib2` documentation.)
216
+ If the cached version does not exist, such as on the first run, HTTP
217
+ request the URLs in `suffix_list_urls` in order, and use the first
218
+ successful response for public suffix definitions. Subsequent, untried
219
+ URLs are ignored. The default URLs are the latest version of the
220
+ Mozilla Public Suffix List and its mirror, but any similar document URL
221
+ could be specified. Local files can be specified by using the `file://`
222
+ protocol (see `urllib2` documentation). To disable HTTP requests, set
223
+ this to an empty sequence.
171
224
 
172
225
  If there is no cached version loaded and no data is found from the `suffix_list_urls`,
173
226
  the module will fall back to the included TLD set snapshot. If you do not want
@@ -179,7 +232,9 @@ class TLDExtract:
179
232
  suffix, so these domains are excluded by default. If you'd like them
180
233
  included instead, set `include_psl_private_domains` to True.
181
234
 
182
- You can pass additional suffixes in `extra_suffixes` argument without changing list URL
235
+ You can specify additional suffixes in the `extra_suffixes` argument.
236
+ These will be merged into whatever public suffix definitions are
237
+ already in use by `tldextract`, above.
183
238
 
184
239
  cache_fetch_timeout is passed unmodified to the underlying request object
185
240
  per the requests documentation here:
@@ -237,9 +292,9 @@ class TLDExtract:
237
292
  I.e. its effective TLD, gTLD, ccTLD, etc. components.
238
293
 
239
294
  >>> extractor = TLDExtract()
240
- >>> extractor.extract_str('http://forums.news.cnn.com/')
295
+ >>> extractor.extract_str("http://forums.news.cnn.com/")
241
296
  ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
242
- >>> extractor.extract_str('http://forums.bbc.co.uk/')
297
+ >>> extractor.extract_str("http://forums.bbc.co.uk/")
243
298
  ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)
244
299
 
245
300
  Allows configuring the HTTP request via the optional `session`
@@ -272,9 +327,11 @@ class TLDExtract:
272
327
  name has already been parsed.
273
328
 
274
329
  >>> extractor = TLDExtract()
275
- >>> extractor.extract_urllib(urllib.parse.urlsplit('http://forums.news.cnn.com/'))
330
+ >>> extractor.extract_urllib(
331
+ ... urllib.parse.urlsplit("http://forums.news.cnn.com/")
332
+ ... )
276
333
  ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
277
- >>> extractor.extract_urllib(urllib.parse.urlsplit('http://forums.bbc.co.uk/'))
334
+ >>> extractor.extract_urllib(urllib.parse.urlsplit("http://forums.bbc.co.uk/"))
278
335
  ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)
279
336
  """
280
337
  return self._extract_netloc(
@@ -298,9 +355,9 @@ class TLDExtract:
298
355
  len(netloc_with_ascii_dots) >= min_num_ipv6_chars
299
356
  and netloc_with_ascii_dots[0] == "["
300
357
  and netloc_with_ascii_dots[-1] == "]"
358
+ and looks_like_ipv6(netloc_with_ascii_dots[1:-1])
301
359
  ):
302
- if looks_like_ipv6(netloc_with_ascii_dots[1:-1]):
303
- return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)
360
+ return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)
304
361
 
305
362
  labels = netloc_with_ascii_dots.split(".")
306
363
 
@@ -330,9 +387,9 @@ class TLDExtract:
330
387
 
331
388
  @property
332
389
  def tlds(self, session: requests.Session | None = None) -> list[str]:
333
- """Returns the list of tld's used by default.
390
+ """The list of TLDs used by default.
334
391
 
335
- This will vary based on `include_psl_private_domains` and `extra_suffixes`
392
+ This will vary based on `include_psl_private_domains` and `extra_suffixes`.
336
393
  """
337
394
  return list(self._get_tld_extractor(session=session).tlds())
338
395
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: tldextract
3
- Version: 5.1.2
3
+ Version: 5.2.0
4
4
  Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
5
5
  Author-email: John Kurkowski <john.kurkowski@gmail.com>
6
6
  License: BSD-3-Clause
@@ -10,33 +10,34 @@ Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Topic :: Utilities
11
11
  Classifier: License :: OSI Approved :: BSD License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.8
14
13
  Classifier: Programming Language :: Python :: 3.9
15
14
  Classifier: Programming Language :: Python :: 3.10
16
15
  Classifier: Programming Language :: Python :: 3.11
17
16
  Classifier: Programming Language :: Python :: 3.12
18
- Requires-Python: >=3.8
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Requires-Python: >=3.9
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: idna
22
- Requires-Dist: requests >=2.1.0
23
- Requires-Dist: requests-file >=1.4
24
- Requires-Dist: filelock >=3.0.8
22
+ Requires-Dist: requests>=2.1.0
23
+ Requires-Dist: requests-file>=1.4
24
+ Requires-Dist: filelock>=3.0.8
25
25
  Provides-Extra: release
26
- Requires-Dist: build ; extra == 'release'
27
- Requires-Dist: twine ; extra == 'release'
26
+ Requires-Dist: build; extra == "release"
27
+ Requires-Dist: twine; extra == "release"
28
28
  Provides-Extra: testing
29
- Requires-Dist: black ; extra == 'testing'
30
- Requires-Dist: mypy ; extra == 'testing'
31
- Requires-Dist: pytest ; extra == 'testing'
32
- Requires-Dist: pytest-gitignore ; extra == 'testing'
33
- Requires-Dist: pytest-mock ; extra == 'testing'
34
- Requires-Dist: responses ; extra == 'testing'
35
- Requires-Dist: ruff ; extra == 'testing'
36
- Requires-Dist: syrupy ; extra == 'testing'
37
- Requires-Dist: tox ; extra == 'testing'
38
- Requires-Dist: types-filelock ; extra == 'testing'
39
- Requires-Dist: types-requests ; extra == 'testing'
29
+ Requires-Dist: mypy; extra == "testing"
30
+ Requires-Dist: pytest; extra == "testing"
31
+ Requires-Dist: pytest-gitignore; extra == "testing"
32
+ Requires-Dist: pytest-mock; extra == "testing"
33
+ Requires-Dist: responses; extra == "testing"
34
+ Requires-Dist: ruff; extra == "testing"
35
+ Requires-Dist: syrupy; extra == "testing"
36
+ Requires-Dist: tox; extra == "testing"
37
+ Requires-Dist: tox-uv; extra == "testing"
38
+ Requires-Dist: types-filelock; extra == "testing"
39
+ Requires-Dist: types-requests; extra == "testing"
40
+ Dynamic: license-file
40
41
 
41
42
  # tldextract [![PyPI version](https://badge.fury.io/py/tldextract.svg)](https://badge.fury.io/py/tldextract) [![Build Status](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml/badge.svg)](https://github.com/john-kurkowski/tldextract/actions/workflows/ci.yml)
42
43
 
@@ -95,8 +96,17 @@ To rejoin the original hostname, if it was indeed a valid, registered hostname:
95
96
  'forums.bbc.co.uk'
96
97
  ```
97
98
 
99
+ In addition to the Python interface, there is a command-line interface. Split
100
+ the URL components by space:
101
+
102
+ ```zsh
103
+ $ tldextract 'http://forums.bbc.co.uk'
104
+ forums bbc co.uk
105
+ ```
106
+
98
107
  By default, this package supports the public ICANN TLDs and their exceptions.
99
- You can optionally support the Public Suffix List's private domains as well.
108
+ You can optionally support the Public Suffix List's [private
109
+ domains](#public-vs-private-domains) as well.
100
110
 
101
111
  This package started by implementing the chosen answer from [this StackOverflow question on
102
112
  getting the "domain name" from a URL](http://stackoverflow.com/questions/569137/how-to-get-domain-name-from-url/569219#569219).
@@ -118,19 +128,12 @@ Or the latest dev version:
118
128
  pip install -e 'git://github.com/john-kurkowski/tldextract.git#egg=tldextract'
119
129
  ```
120
130
 
121
- Command-line usage, splits the URL components by space:
122
-
123
- ```zsh
124
- tldextract http://forums.bbc.co.uk
125
- # forums bbc co.uk
126
- ```
127
-
128
131
  ## Note about caching
129
132
 
130
133
  Beware when first calling `tldextract`, it updates its TLD list with a live HTTP
131
134
  request. This updated TLD set is usually cached indefinitely in `$HOME/.cache/python-tldextract`.
132
- To control the cache's location, set TLDEXTRACT_CACHE environment variable or set the
133
- cache_dir path in TLDExtract initialization.
135
+ To control the cache's location, set the `TLDEXTRACT_CACHE` environment variable or set the
136
+ `cache_dir` path when constructing a `TLDExtract`.
134
137
 
135
138
  (Arguably runtime bootstrapping like that shouldn't be the default behavior,
136
139
  like for production systems. But I want you to have the latest TLDs, especially
@@ -188,15 +191,17 @@ ExtractResult(subdomain='waiterrant', domain='blogspot', suffix='com', is_privat
188
191
  ```
189
192
 
190
193
  The following overrides this.
194
+
191
195
  ```python
192
196
  >>> extract = tldextract.TLDExtract()
193
197
  >>> extract('waiterrant.blogspot.com', include_psl_private_domains=True)
194
198
  ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
195
199
  ```
196
200
 
197
- or to change the default for all extract calls,
201
+ To change the default for all extract calls:
202
+
198
203
  ```python
199
- >>> extract = tldextract.TLDExtract( include_psl_private_domains=True)
204
+ >>> extract = tldextract.TLDExtract(include_psl_private_domains=True)
200
205
  >>> extract('waiterrant.blogspot.com')
201
206
  ExtractResult(subdomain='', domain='waiterrant', suffix='blogspot.com', is_private=True)
202
207
  ```
@@ -219,10 +224,12 @@ extract = tldextract.TLDExtract(
219
224
  fallback_to_snapshot=False)
220
225
  ```
221
226
 
222
- The above snippet will fetch from the URL *you* specified, upon first need to download the
223
- suffix list (i.e. if the cached version doesn't exist).
227
+ If the cached version of public suffix definitions doesn't exist, such as on
228
+ the first run, the above snippet will request the URLs you specified in order,
229
+ and use the first successful response.
224
230
 
225
- If you want to use input data from your local filesystem, just use the `file://` protocol:
231
+ If you want to use input data from your local filesystem, use the `file://`
232
+ protocol with an absolute path:
226
233
 
227
234
  ```python
228
235
  extract = tldextract.TLDExtract(
@@ -231,17 +238,24 @@ extract = tldextract.TLDExtract(
231
238
  fallback_to_snapshot=False)
232
239
  ```
233
240
 
234
- Use an absolute path when specifying the `suffix_list_urls` keyword argument.
235
- `os.path` is your friend.
236
-
237
- The command line update command can be used with a URL or local file you specify:
241
+ This also works via command line update:
238
242
 
239
243
  ```zsh
240
244
  tldextract --update --suffix_list_url "http://foo.bar.baz"
241
245
  ```
242
246
 
243
- This could be useful in production when you don't want the delay associated with updating the suffix
244
- list on first use, or if you are behind a complex firewall that prevents a simple update from working.
247
+ Using your own URLs could be useful in production when you don't want the delay
248
+ with updating the suffix list on first use, or if you are behind a complex
249
+ firewall.
250
+
251
+ You can also specify additional suffixes in the `extra_suffixes` param. These
252
+ will be merged into whatever public suffix definitions are already in use by
253
+ `tldextract`.
254
+
255
+ ```python
256
+ extract = tldextract.TLDExtract(
257
+ extra_suffixes=["foo", "bar", "baz"])
258
+ ```
245
259
 
246
260
  ## FAQ
247
261
 
@@ -250,9 +264,9 @@ list on first use, or if you are behind a complex firewall that prevents a simpl
250
264
  This project doesn't contain an actual list of public suffixes. That comes from
251
265
  [the Public Suffix List (PSL)](https://publicsuffix.org/). Submit amendments there.
252
266
 
253
- (In the meantime, you can tell tldextract about your exception by either
267
+ In the meantime, you can tell tldextract about your exception by either
254
268
  forking the PSL and using your fork in the `suffix_list_urls` param, or adding
255
- your suffix piecemeal with the `extra_suffixes` param.)
269
+ your suffix piecemeal with the `extra_suffixes` param.
256
270
 
257
271
  ### I see my suffix in [the Public Suffix List (PSL)](https://publicsuffix.org/), but this library doesn't extract it.
258
272
 
@@ -309,5 +323,5 @@ tox -e py311
309
323
  Automatically format all code:
310
324
 
311
325
  ```zsh
312
- black .
326
+ ruff format .
313
327
  ```
@@ -0,0 +1,16 @@
1
+ tldextract/.tld_set_snapshot,sha256=tpMVwIXVOXJyS48t8RH_wymwyE_gpH1iyMkWVcx3Sjg,318581
2
+ tldextract/__init__.py,sha256=1n2QxAmFCFp3X1A5O46wJOTZqWM2ukshNkkG-TrOaLQ,274
3
+ tldextract/__main__.py,sha256=oiZ5EW_lxRLH6Khk6MdzXf7a1Ld5-A3k4wOFRmNNk2o,89
4
+ tldextract/_version.py,sha256=1-tO6tx4p9okXz3ScGW6YFdQDbS8ruoK2_y0riYBx7M,511
5
+ tldextract/cache.py,sha256=nrT9VuLmrjHHFxj-Cai97IyUXXenCX6KbHi07mPkzMc,8289
6
+ tldextract/cli.py,sha256=nCzBAFrgAopTK1t5eBRQgeveSgWheUx4LAlAHE_8mzQ,3010
7
+ tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ tldextract/remote.py,sha256=rovbxR73G8To-QRrA_cgSfk0S3k0_B2SyYefXiLHrQg,1940
9
+ tldextract/suffix_list.py,sha256=ePH6iOIUBe0JE_mc07a34Nd7tFyfmHgP_mJkFhxzr7c,3947
10
+ tldextract/tldextract.py,sha256=tM2Lrj0yclAulBueRxAK40bzhMl86Ftz4FHaFGyec7k,21454
11
+ tldextract-5.2.0.dist-info/licenses/LICENSE,sha256=ZUrmz9cSprvhQmqmUdHIWbD51Cytv6PDTMlJLruTLuI,1527
12
+ tldextract-5.2.0.dist-info/METADATA,sha256=LNhqdHD4eMtqA1DaxoVw0YpFq-XqW4_dtqc53bUAriM,11709
13
+ tldextract-5.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
14
+ tldextract-5.2.0.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
15
+ tldextract-5.2.0.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
16
+ tldextract-5.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  BSD 3-Clause License
2
2
 
3
- Copyright (c) 2013-2024, John Kurkowski
3
+ Copyright (c) 2013-2025, John Kurkowski
4
4
  All rights reserved.
5
5
 
6
6
  Redistribution and use in source and binary forms, with or without
@@ -1,16 +0,0 @@
1
- tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
2
- tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
3
- tldextract/__main__.py,sha256=oiZ5EW_lxRLH6Khk6MdzXf7a1Ld5-A3k4wOFRmNNk2o,89
4
- tldextract/_version.py,sha256=iJQJoAO8HGnLsPBpH1rkF4KPbrYxIqs4qAXfUgzhRqQ,411
5
- tldextract/cache.py,sha256=vsr4ERgNxmBO_mYwXLCMbRRKq1s-IDZZLXoaGIYXmBM,8601
6
- tldextract/cli.py,sha256=nCzBAFrgAopTK1t5eBRQgeveSgWheUx4LAlAHE_8mzQ,3010
7
- tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- tldextract/remote.py,sha256=sklRFbATwPs_S33-KeIu9ixuSWP5w7QXO8jnhi_lgJs,1944
9
- tldextract/suffix_list.py,sha256=TcUpMTZwsicZn6_eHKqA4bjurQrKYde14P-4HT4s4yE,3896
10
- tldextract/tldextract.py,sha256=oUYLJcgWmeika0teDq2nNI5UCSbAR0c3eosYslVJPUY,18731
11
- tldextract-5.1.2.dist-info/LICENSE,sha256=dKIruBYZ9wJFoTWv8hvg2bhDv9TXDQ82u-0EERuGJYg,1527
12
- tldextract-5.1.2.dist-info/METADATA,sha256=dkiY2wl_8M2guJ0MGhGi0YQ9OgZI4vGpJ0I9LMLSGyQ,11464
13
- tldextract-5.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
- tldextract-5.1.2.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
15
- tldextract-5.1.2.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
16
- tldextract-5.1.2.dist-info/RECORD,,