tldextract 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tldextract/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '4.0.0'
16
- __version_tuple__ = version_tuple = (4, 0, 0)
15
+ __version__ = version = '5.0.0'
16
+ __version_tuple__ = version_tuple = (5, 0, 0)
tldextract/cache.py CHANGED
@@ -21,7 +21,7 @@ LOG = logging.getLogger(__name__)
21
21
 
22
22
  _DID_LOG_UNABLE_TO_CACHE = False
23
23
 
24
- T = TypeVar("T") # pylint: disable=invalid-name
24
+ T = TypeVar("T")
25
25
 
26
26
 
27
27
  def get_pkg_unique_identifier() -> str:
@@ -32,7 +32,6 @@ def get_pkg_unique_identifier() -> str:
32
32
  a new version of tldextract
33
33
  """
34
34
  try:
35
- # pylint: disable=import-outside-toplevel
36
35
  from tldextract._version import version
37
36
  except ImportError:
38
37
  version = "dev"
@@ -83,6 +82,7 @@ class DiskCache:
83
82
  """Disk _cache that only works for jsonable values."""
84
83
 
85
84
  def __init__(self, cache_dir: str | None, lock_timeout: int = 20):
85
+ """Construct a disk cache in the given directory."""
86
86
  self.enabled = bool(cache_dir)
87
87
  self.cache_dir = os.path.expanduser(str(cache_dir) or "")
88
88
  self.lock_timeout = lock_timeout
@@ -99,14 +99,13 @@ class DiskCache:
99
99
  if not os.path.isfile(cache_filepath):
100
100
  raise KeyError("namespace: " + namespace + " key: " + repr(key))
101
101
  try:
102
- # pylint: disable-next=unspecified-encoding
103
102
  with open(cache_filepath) as cache_file:
104
103
  return json.load(cache_file)
105
104
  except (OSError, ValueError) as exc:
106
105
  LOG.error("error reading TLD cache file %s: %s", cache_filepath, exc)
107
106
  raise KeyError("namespace: " + namespace + " key: " + repr(key)) from None
108
107
 
109
- def set(
108
+ def set( # noqa: A003
110
109
  self, namespace: str, key: str | dict[str, Hashable], value: object
111
110
  ) -> None:
112
111
  """Set a value in the disk cache."""
@@ -117,19 +116,16 @@ class DiskCache:
117
116
 
118
117
  try:
119
118
  _make_dir(cache_filepath)
120
- # pylint: disable-next=unspecified-encoding
121
119
  with open(cache_filepath, "w") as cache_file:
122
120
  json.dump(value, cache_file)
123
121
  except OSError as ioe:
124
- global _DID_LOG_UNABLE_TO_CACHE # pylint: disable=global-statement
122
+ global _DID_LOG_UNABLE_TO_CACHE
125
123
  if not _DID_LOG_UNABLE_TO_CACHE:
126
124
  LOG.warning(
127
- (
128
- "unable to cache %s.%s in %s. This could refresh the "
129
- "Public Suffix List over HTTP every app startup. "
130
- "Construct your `TLDExtract` with a writable `cache_dir` or "
131
- "set `cache_dir=None` to silence this warning. %s"
132
- ),
125
+ "unable to cache %s.%s in %s. This could refresh the "
126
+ "Public Suffix List over HTTP every app startup. "
127
+ "Construct your `TLDExtract` with a writable `cache_dir` or "
128
+ "set `cache_dir=None` to silence this warning. %s",
133
129
  namespace,
134
130
  key,
135
131
  cache_filepath,
@@ -181,15 +177,13 @@ class DiskCache:
181
177
  try:
182
178
  _make_dir(cache_filepath)
183
179
  except OSError as ioe:
184
- global _DID_LOG_UNABLE_TO_CACHE # pylint: disable=global-statement
180
+ global _DID_LOG_UNABLE_TO_CACHE
185
181
  if not _DID_LOG_UNABLE_TO_CACHE:
186
182
  LOG.warning(
187
- (
188
- "unable to cache %s.%s in %s. This could refresh the "
189
- "Public Suffix List over HTTP every app startup. "
190
- "Construct your `TLDExtract` with a writable `cache_dir` or "
191
- "set `cache_dir=None` to silence this warning. %s"
192
- ),
183
+ "unable to cache %s.%s in %s. This could refresh the "
184
+ "Public Suffix List over HTTP every app startup. "
185
+ "Construct your `TLDExtract` with a writable `cache_dir` or "
186
+ "set `cache_dir=None` to silence this warning. %s",
193
187
  namespace,
194
188
  key_args,
195
189
  cache_filepath,
@@ -199,8 +193,6 @@ class DiskCache:
199
193
 
200
194
  return func(**kwargs)
201
195
 
202
- # Disable lint of 3rd party (see also https://github.com/tox-dev/py-filelock/issues/102)
203
- # pylint: disable-next=abstract-class-instantiated
204
196
  with FileLock(lock_path, timeout=self.lock_timeout):
205
197
  try:
206
198
  result = cast(T, self.get(namespace=namespace, key=key_args))
tldextract/cli.py CHANGED
@@ -88,5 +88,5 @@ def main() -> None:
88
88
  sys.exit(1)
89
89
 
90
90
  for i in args.input:
91
- subdomain, domain, suffix, _ = tld_extract(i)
92
- print(f"{subdomain} {domain} {suffix}")
91
+ ext = tld_extract(i)
92
+ print(f"{ext.subdomain} {ext.domain} {ext.suffix}")
tldextract/suffix_list.py CHANGED
@@ -19,7 +19,7 @@ PUBLIC_SUFFIX_RE = re.compile(r"^(?P<suffix>[.*!]*\w[\S]*)", re.UNICODE | re.MUL
19
19
  PUBLIC_PRIVATE_SUFFIX_SEPARATOR = "// ===BEGIN PRIVATE DOMAINS==="
20
20
 
21
21
 
22
- class SuffixListNotFound(LookupError):
22
+ class SuffixListNotFound(LookupError): # noqa: N818
23
23
  """A recoverable error while looking up a suffix list.
24
24
 
25
25
  Recoverable because you can specify backups, or use this library's bundled
tldextract/tldextract.py CHANGED
@@ -13,18 +13,6 @@ It does this via the Public Suffix List (PSL).
13
13
  >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
14
14
  ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
15
15
 
16
- `ExtractResult` is a namedtuple, so it's simple to access the parts you want.
17
-
18
- >>> ext = tldextract.extract('http://forums.bbc.co.uk')
19
- >>> (ext.subdomain, ext.domain, ext.suffix)
20
- ('forums', 'bbc', 'co.uk')
21
- >>> # rejoin subdomain and domain
22
- >>> '.'.join(ext[:2])
23
- 'forums.bbc'
24
- >>> # a common alias
25
- >>> ext.registered_domain
26
- 'bbc.co.uk'
27
-
28
16
  Note subdomain and suffix are _optional_. Not all URL-like inputs have a
29
17
  subdomain or a valid suffix.
30
18
 
@@ -37,16 +25,13 @@ subdomain or a valid suffix.
37
25
  >>> tldextract.extract('http://127.0.0.1:8080/deployed/')
38
26
  ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
39
27
 
40
- If you want to rejoin the whole namedtuple, regardless of whether a subdomain
41
- or suffix were found:
28
+ To rejoin the original hostname, if it was indeed a valid, registered hostname:
42
29
 
43
- >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
44
- >>> # this has unwanted dots
45
- >>> '.'.join(part for part in ext[:3])
46
- '.127.0.0.1.'
47
- >>> # join part only if truthy
48
- >>> '.'.join(part for part in ext[:3] if part)
49
- '127.0.0.1'
30
+ >>> ext = tldextract.extract('http://forums.bbc.co.uk')
31
+ >>> ext.registered_domain
32
+ 'bbc.co.uk'
33
+ >>> ext.fqdn
34
+ 'forums.bbc.co.uk'
50
35
  """
51
36
 
52
37
  from __future__ import annotations
@@ -55,10 +40,8 @@ import logging
55
40
  import os
56
41
  import urllib.parse
57
42
  from collections.abc import Collection, Sequence
43
+ from dataclasses import dataclass
58
44
  from functools import wraps
59
- from typing import (
60
- NamedTuple,
61
- )
62
45
 
63
46
  import idna
64
47
 
@@ -77,14 +60,17 @@ PUBLIC_SUFFIX_LIST_URLS = (
77
60
  )
78
61
 
79
62
 
80
- class ExtractResult(NamedTuple):
81
- """namedtuple of a URL's subdomain, domain, suffix,
82
- and flag that indicates if URL has private suffix."""
63
+ @dataclass(order=True)
64
+ class ExtractResult:
65
+ """A URL's extracted subdomain, domain, and suffix.
66
+
67
+ Also contains metadata, like a flag that indicates if the URL has a private suffix.
68
+ """
83
69
 
84
70
  subdomain: str
85
71
  domain: str
86
72
  suffix: str
87
- is_private: bool = False
73
+ is_private: bool
88
74
 
89
75
  @property
90
76
  def registered_domain(self) -> str:
@@ -111,9 +97,7 @@ class ExtractResult(NamedTuple):
111
97
  ''
112
98
  """
113
99
  if self.suffix and (self.domain or self.is_private):
114
- # Disable bogus lint error (https://github.com/PyCQA/pylint/issues/2568)
115
- # pylint: disable-next=not-an-iterable,unsubscriptable-object
116
- return ".".join(i for i in self[:3] if i)
100
+ return ".".join(i for i in (self.subdomain, self.domain, self.suffix) if i)
117
101
  return ""
118
102
 
119
103
  @property
@@ -164,8 +148,8 @@ class ExtractResult(NamedTuple):
164
148
  class TLDExtract:
165
149
  """A callable for extracting, subdomain, domain, and suffix components from a URL."""
166
150
 
167
- # TODO: Agreed with Pylint: too-many-arguments
168
- def __init__( # pylint: disable=too-many-arguments
151
+ # TODO: too-many-arguments
152
+ def __init__(
169
153
  self,
170
154
  cache_dir: str | None = get_cache_dir(),
171
155
  suffix_list_urls: Sequence[str] = PUBLIC_SUFFIX_LIST_URLS,
@@ -294,7 +278,7 @@ class TLDExtract:
294
278
  and netloc_with_ascii_dots[-1] == "]"
295
279
  ):
296
280
  if looks_like_ipv6(netloc_with_ascii_dots[1:-1]):
297
- return ExtractResult("", netloc_with_ascii_dots, "")
281
+ return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)
298
282
 
299
283
  labels = netloc_with_ascii_dots.split(".")
300
284
 
@@ -371,6 +355,7 @@ class Trie:
371
355
  def __init__(
372
356
  self, matches: dict | None = None, end: bool = False, is_private: bool = False
373
357
  ) -> None:
358
+ """TODO."""
374
359
  self.matches = matches if matches else {}
375
360
  self.end = end
376
361
  self.is_private = is_private
@@ -411,16 +396,14 @@ class Trie:
411
396
 
412
397
 
413
398
  @wraps(TLD_EXTRACTOR.__call__)
414
- def extract( # pylint: disable=missing-function-docstring
399
+ def extract( # noqa: D103
415
400
  url: str, include_psl_private_domains: bool | None = False
416
401
  ) -> ExtractResult:
417
402
  return TLD_EXTRACTOR(url, include_psl_private_domains=include_psl_private_domains)
418
403
 
419
404
 
420
405
  @wraps(TLD_EXTRACTOR.update)
421
- def update( # type: ignore[no-untyped-def]
422
- *args, **kwargs
423
- ): # pylint: disable=missing-function-docstring
406
+ def update(*args, **kwargs): # type: ignore[no-untyped-def] # noqa: D103
424
407
  return TLD_EXTRACTOR.update(*args, **kwargs)
425
408
 
426
409
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tldextract
3
- Version: 4.0.0
3
+ Version: 5.0.0
4
4
  Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
5
5
  Author-email: John Kurkowski <john.kurkowski@gmail.com>
6
6
  License: BSD-3-Clause
@@ -10,12 +10,11 @@ Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Topic :: Utilities
11
11
  Classifier: License :: OSI Approved :: BSD License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.7
14
13
  Classifier: Programming Language :: Python :: 3.8
15
14
  Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
- Requires-Python: >=3.7
17
+ Requires-Python: >=3.8
19
18
  Description-Content-Type: text/markdown
20
19
  License-File: LICENSE
21
20
  Requires-Dist: idna
@@ -56,20 +55,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
56
55
  ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
57
56
  ```
58
57
 
59
- `ExtractResult` is a namedtuple, so it's simple to access the parts you want.
60
-
61
- ```python
62
- >>> ext = tldextract.extract('http://forums.bbc.co.uk')
63
- >>> (ext.subdomain, ext.domain, ext.suffix)
64
- ('forums', 'bbc', 'co.uk')
65
- >>> # rejoin subdomain and domain
66
- >>> '.'.join(ext[:2])
67
- 'forums.bbc'
68
- >>> # a common alias
69
- >>> ext.registered_domain
70
- 'bbc.co.uk'
71
- ```
72
-
73
58
  Note subdomain and suffix are _optional_. Not all URL-like inputs have a
74
59
  subdomain or a valid suffix.
75
60
 
@@ -84,17 +69,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
84
69
  ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
85
70
  ```
86
71
 
87
- If you want to rejoin the whole namedtuple, regardless of whether a subdomain
88
- or suffix were found:
72
+ To rejoin the original hostname, if it was indeed a valid, registered hostname:
89
73
 
90
74
  ```python
91
- >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
92
- >>> # this has unwanted dots
93
- >>> '.'.join(ext[:3])
94
- '.127.0.0.1.'
95
- >>> # join each part only if it's truthy
96
- >>> '.'.join(part for part in ext[:3] if part)
97
- '127.0.0.1'
75
+ >>> ext = tldextract.extract('http://forums.bbc.co.uk')
76
+ >>> ext.registered_domain
77
+ 'bbc.co.uk'
78
+ >>> ext.fqdn
79
+ 'forums.bbc.co.uk'
98
80
  ```
99
81
 
100
82
  By default, this package supports the public ICANN TLDs and their exceptions.
@@ -303,7 +285,7 @@ Run all tests against a specific Python environment configuration:
303
285
 
304
286
  ```zsh
305
287
  tox -l
306
- tox -e py37
288
+ tox -e py311
307
289
  ```
308
290
 
309
291
  ### Code Style
@@ -0,0 +1,16 @@
1
+ tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
2
+ tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
3
+ tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
4
+ tldextract/_version.py,sha256=U7HnWMtKn0QTFHRJAzsVjr4cELMq3Toi6P5afKP6ah0,411
5
+ tldextract/cache.py,sha256=kcSovX7j1V43s3gOuav8nlFl5Dgkl0O576H3_Tiqkc0,8323
6
+ tldextract/cli.py,sha256=F5w9Haz7rWdrgIgRwZJn04t7qRBQAHUKzQnYXwDUfLs,2465
7
+ tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ tldextract/remote.py,sha256=dpLz-s-1AP4Ai4XPVQe-uT2Nmev8CZEMKURdqGw5XiA,2550
9
+ tldextract/suffix_list.py,sha256=3N8jlmFY-EbQ-kxT2iTryFpCCeGqBUm9NiUXKiAbaOY,3443
10
+ tldextract/tldextract.py,sha256=2AxRAWtT70jNSPcvBdk7FlrksUwbLDOXM9W8eaB13Bg,17585
11
+ tldextract-5.0.0.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
12
+ tldextract-5.0.0.dist-info/METADATA,sha256=pwH-aKifyF_J6gDZ-o6nPDHBSQtll7zuK1v_ceH4YCQ,10739
13
+ tldextract-5.0.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
14
+ tldextract-5.0.0.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
15
+ tldextract-5.0.0.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
16
+ tldextract-5.0.0.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
2
- tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
3
- tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
4
- tldextract/_version.py,sha256=TgVqVkMXXQVomuTpZfj8uxnyooVWsiw-3pM8cC2qwwE,411
5
- tldextract/cache.py,sha256=_hUjP-cw4BpR2TG-_XAD6YL1pQMNe64b4O0nVWMVLAY,8790
6
- tldextract/cli.py,sha256=5BMCp-DjY3_-KlZ1wnpycHrUHzaqLL4r4TiHl2-xiCU,2478
7
- tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- tldextract/remote.py,sha256=dpLz-s-1AP4Ai4XPVQe-uT2Nmev8CZEMKURdqGw5XiA,2550
9
- tldextract/suffix_list.py,sha256=W797R-PLIw_8yBPnopoMTimPiv1P1csh9Wcs-_E4Ous,3429
10
- tldextract/tldextract.py,sha256=kHtO_xVbSgOEsLU526gxHa5Mz5Pjxe6fP-xvJmzWosM,18272
11
- tldextract-4.0.0.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
12
- tldextract-4.0.0.dist-info/METADATA,sha256=MYr1oUzvOfV47wNqn276x-7NxY0gTfm-9S__ltSLbMI,11260
13
- tldextract-4.0.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
14
- tldextract-4.0.0.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
15
- tldextract-4.0.0.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
16
- tldextract-4.0.0.dist-info/RECORD,,