tldextract 3.4.1__py3-none-any.whl → 3.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tldextract/_version.py +2 -2
- tldextract/cache.py +9 -9
- tldextract/cli.py +2 -2
- tldextract/remote.py +26 -11
- tldextract/suffix_list.py +8 -8
- tldextract/tldextract.py +113 -60
- {tldextract-3.4.1.dist-info → tldextract-3.4.3.dist-info}/METADATA +4 -3
- tldextract-3.4.3.dist-info/RECORD +16 -0
- tldextract-3.4.1.dist-info/RECORD +0 -16
- {tldextract-3.4.1.dist-info → tldextract-3.4.3.dist-info}/LICENSE +0 -0
- {tldextract-3.4.1.dist-info → tldextract-3.4.3.dist-info}/WHEEL +0 -0
- {tldextract-3.4.1.dist-info → tldextract-3.4.3.dist-info}/entry_points.txt +0 -0
- {tldextract-3.4.1.dist-info → tldextract-3.4.3.dist-info}/top_level.txt +0 -0
tldextract/_version.py
CHANGED
tldextract/cache.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Helpers
|
1
|
+
"""Helpers."""
|
2
2
|
import errno
|
3
3
|
import hashlib
|
4
4
|
import json
|
@@ -30,7 +30,7 @@ T = TypeVar("T") # pylint: disable=invalid-name
|
|
30
30
|
|
31
31
|
def get_pkg_unique_identifier() -> str:
|
32
32
|
"""
|
33
|
-
Generate an identifier unique to the python version, tldextract version, and python instance
|
33
|
+
Generate an identifier unique to the python version, tldextract version, and python instance.
|
34
34
|
|
35
35
|
This will prevent interference between virtualenvs and issues that might arise when installing
|
36
36
|
a new version of tldextract
|
@@ -61,7 +61,7 @@ def get_pkg_unique_identifier() -> str:
|
|
61
61
|
|
62
62
|
def get_cache_dir() -> str:
|
63
63
|
"""
|
64
|
-
Get a cache dir that we have permission to write to
|
64
|
+
Get a cache dir that we have permission to write to.
|
65
65
|
|
66
66
|
Try to follow the XDG standard, but if that doesn't work fallback to the package directory
|
67
67
|
http://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
@@ -86,7 +86,7 @@ def get_cache_dir() -> str:
|
|
86
86
|
|
87
87
|
|
88
88
|
class DiskCache:
|
89
|
-
"""Disk _cache that only works for jsonable values"""
|
89
|
+
"""Disk _cache that only works for jsonable values."""
|
90
90
|
|
91
91
|
def __init__(self, cache_dir: Optional[str], lock_timeout: int = 20):
|
92
92
|
self.enabled = bool(cache_dir)
|
@@ -115,7 +115,7 @@ class DiskCache:
|
|
115
115
|
def set(
|
116
116
|
self, namespace: str, key: Union[str, Dict[str, Hashable]], value: object
|
117
117
|
) -> None:
|
118
|
-
"""Set a value in the disk cache"""
|
118
|
+
"""Set a value in the disk cache."""
|
119
119
|
if not self.enabled:
|
120
120
|
return
|
121
121
|
|
@@ -142,7 +142,7 @@ class DiskCache:
|
|
142
142
|
_DID_LOG_UNABLE_TO_CACHE = True
|
143
143
|
|
144
144
|
def clear(self) -> None:
|
145
|
-
"""Clear the disk cache"""
|
145
|
+
"""Clear the disk cache."""
|
146
146
|
for root, _, files in os.walk(self.cache_dir):
|
147
147
|
for filename in files:
|
148
148
|
if filename.endswith(self.file_ext) or filename.endswith(
|
@@ -175,7 +175,7 @@ class DiskCache:
|
|
175
175
|
kwargs: Dict[str, Hashable],
|
176
176
|
hashed_argnames: Iterable[str],
|
177
177
|
) -> T:
|
178
|
-
"""Get a url but cache the response"""
|
178
|
+
"""Get a url but cache the response."""
|
179
179
|
if not self.enabled:
|
180
180
|
return func(**kwargs)
|
181
181
|
|
@@ -215,7 +215,7 @@ class DiskCache:
|
|
215
215
|
def cached_fetch_url(
|
216
216
|
self, session: requests.Session, url: str, timeout: Union[float, int, None]
|
217
217
|
) -> str:
|
218
|
-
"""Get a url but cache the response"""
|
218
|
+
"""Get a url but cache the response."""
|
219
219
|
return self.run_and_cache(
|
220
220
|
func=_fetch_url,
|
221
221
|
namespace="urls",
|
@@ -241,7 +241,7 @@ def _make_cache_key(inputs: Union[str, Dict[str, Hashable]]) -> str:
|
|
241
241
|
|
242
242
|
|
243
243
|
def _make_dir(filename: str) -> None:
|
244
|
-
"""Make a directory if it doesn't already exist"""
|
244
|
+
"""Make a directory if it doesn't already exist."""
|
245
245
|
if not os.path.exists(os.path.dirname(filename)):
|
246
246
|
try:
|
247
247
|
os.makedirs(os.path.dirname(filename))
|
tldextract/cli.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""tldextract CLI"""
|
1
|
+
"""tldextract CLI."""
|
2
2
|
|
3
3
|
|
4
4
|
import argparse
|
@@ -12,7 +12,7 @@ from .tldextract import TLDExtract
|
|
12
12
|
|
13
13
|
|
14
14
|
def main() -> None:
|
15
|
-
"""
|
15
|
+
"""Tldextract CLI main command."""
|
16
16
|
logging.basicConfig()
|
17
17
|
|
18
18
|
parser = argparse.ArgumentParser(
|
tldextract/remote.py
CHANGED
@@ -1,36 +1,51 @@
|
|
1
|
-
"tldextract helpers for testing and fetching remote resources."
|
1
|
+
"""tldextract helpers for testing and fetching remote resources."""
|
2
2
|
|
3
3
|
import re
|
4
4
|
import socket
|
5
5
|
from urllib.parse import scheme_chars
|
6
6
|
|
7
7
|
IP_RE = re.compile(
|
8
|
-
|
9
|
-
r"
|
8
|
+
r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.)"
|
9
|
+
r"{3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
|
10
10
|
)
|
11
11
|
|
12
|
-
|
12
|
+
scheme_chars_set = set(scheme_chars)
|
13
13
|
|
14
14
|
|
15
15
|
def lenient_netloc(url: str) -> str:
|
16
|
-
"""Extract the netloc of a URL-like string
|
17
|
-
returned by urllib.parse.{urlparse,urlsplit}, but extract more leniently,
|
18
|
-
without raising errors."""
|
16
|
+
"""Extract the netloc of a URL-like string.
|
19
17
|
|
18
|
+
Similar to the netloc attribute returned by
|
19
|
+
urllib.parse.{urlparse,urlsplit}, but extract more leniently, without
|
20
|
+
raising errors.
|
21
|
+
"""
|
20
22
|
return (
|
21
|
-
|
23
|
+
_schemeless_url(url)
|
22
24
|
.partition("/")[0]
|
23
25
|
.partition("?")[0]
|
24
26
|
.partition("#")[0]
|
25
|
-
.
|
27
|
+
.rpartition("@")[-1]
|
26
28
|
.partition(":")[0]
|
27
29
|
.strip()
|
28
|
-
.rstrip("
|
30
|
+
.rstrip(".\u3002\uff0e\uff61")
|
29
31
|
)
|
30
32
|
|
31
33
|
|
34
|
+
def _schemeless_url(url: str) -> str:
|
35
|
+
double_slashes_start = url.find("//")
|
36
|
+
if double_slashes_start == 0:
|
37
|
+
return url[2:]
|
38
|
+
if (
|
39
|
+
double_slashes_start < 2
|
40
|
+
or not url[double_slashes_start - 1] == ":"
|
41
|
+
or set(url[: double_slashes_start - 1]) - scheme_chars_set
|
42
|
+
):
|
43
|
+
return url
|
44
|
+
return url[double_slashes_start + 2 :]
|
45
|
+
|
46
|
+
|
32
47
|
def looks_like_ip(maybe_ip: str) -> bool:
|
33
|
-
"""
|
48
|
+
"""Check whether the given str looks like an IP address."""
|
34
49
|
if not maybe_ip[0].isdigit():
|
35
50
|
return False
|
36
51
|
|
tldextract/suffix_list.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"tldextract helpers for testing and fetching remote resources."
|
1
|
+
"""tldextract helpers for testing and fetching remote resources."""
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import pkgutil
|
@@ -17,8 +17,11 @@ PUBLIC_PRIVATE_SUFFIX_SEPARATOR = "// ===BEGIN PRIVATE DOMAINS==="
|
|
17
17
|
|
18
18
|
|
19
19
|
class SuffixListNotFound(LookupError):
|
20
|
-
"""A recoverable error while looking up a suffix list.
|
21
|
-
|
20
|
+
"""A recoverable error while looking up a suffix list.
|
21
|
+
|
22
|
+
Recoverable because you can specify backups, or use this library's bundled
|
23
|
+
snapshot.
|
24
|
+
"""
|
22
25
|
|
23
26
|
|
24
27
|
def find_first_response(
|
@@ -26,9 +29,7 @@ def find_first_response(
|
|
26
29
|
urls: Sequence[str],
|
27
30
|
cache_fetch_timeout: Union[float, int, None] = None,
|
28
31
|
) -> str:
|
29
|
-
"""Decode the first successfully fetched URL, from UTF-8 encoding to
|
30
|
-
Python unicode.
|
31
|
-
"""
|
32
|
+
"""Decode the first successfully fetched URL, from UTF-8 encoding to Python unicode."""
|
32
33
|
with requests.Session() as session:
|
33
34
|
session.mount("file://", FileAdapter())
|
34
35
|
|
@@ -46,8 +47,7 @@ def find_first_response(
|
|
46
47
|
|
47
48
|
|
48
49
|
def extract_tlds_from_suffix_list(suffix_list_text: str) -> Tuple[List[str], List[str]]:
|
49
|
-
"""Parse the raw suffix list text for its different designations of
|
50
|
-
suffixes."""
|
50
|
+
"""Parse the raw suffix list text for its different designations of suffixes."""
|
51
51
|
public_text, _, private_text = suffix_list_text.partition(
|
52
52
|
PUBLIC_PRIVATE_SUFFIX_SEPARATOR
|
53
53
|
)
|
tldextract/tldextract.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
"""
|
2
|
-
|
1
|
+
"""`tldextract` accurately separates a URL's subdomain, domain, and public suffix.
|
2
|
+
|
3
|
+
It does this via the Public Suffix List (PSL).
|
3
4
|
|
4
5
|
>>> import tldextract
|
5
6
|
|
@@ -48,12 +49,22 @@ or suffix were found:
|
|
48
49
|
'127.0.0.1'
|
49
50
|
"""
|
50
51
|
|
52
|
+
from __future__ import annotations
|
53
|
+
|
51
54
|
import logging
|
52
55
|
import os
|
53
|
-
import re
|
54
|
-
from functools import wraps
|
55
|
-
from typing import FrozenSet, List, NamedTuple, Optional, Sequence, Union
|
56
56
|
import urllib.parse
|
57
|
+
from functools import wraps
|
58
|
+
from typing import (
|
59
|
+
Collection,
|
60
|
+
Dict,
|
61
|
+
FrozenSet,
|
62
|
+
List,
|
63
|
+
NamedTuple,
|
64
|
+
Optional,
|
65
|
+
Sequence,
|
66
|
+
Union,
|
67
|
+
)
|
57
68
|
|
58
69
|
import idna
|
59
70
|
|
@@ -71,8 +82,6 @@ PUBLIC_SUFFIX_LIST_URLS = (
|
|
71
82
|
"https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat",
|
72
83
|
)
|
73
84
|
|
74
|
-
_UNICODE_DOTS_RE = re.compile("[\u002e\u3002\uff0e\uff61]")
|
75
|
-
|
76
85
|
|
77
86
|
class ExtractResult(NamedTuple):
|
78
87
|
"""namedtuple of a URL's subdomain, domain, and suffix."""
|
@@ -91,8 +100,8 @@ class ExtractResult(NamedTuple):
|
|
91
100
|
>>> extract('http://localhost:8080').registered_domain
|
92
101
|
''
|
93
102
|
"""
|
94
|
-
if self.
|
95
|
-
return self.domain
|
103
|
+
if self.suffix and self.domain:
|
104
|
+
return f"{self.domain}.{self.suffix}"
|
96
105
|
return ""
|
97
106
|
|
98
107
|
@property
|
@@ -105,7 +114,7 @@ class ExtractResult(NamedTuple):
|
|
105
114
|
>>> extract('http://localhost:8080').fqdn
|
106
115
|
''
|
107
116
|
"""
|
108
|
-
if self.
|
117
|
+
if self.suffix and self.domain:
|
109
118
|
# Disable bogus lint error (https://github.com/PyCQA/pylint/issues/2568)
|
110
119
|
# pylint: disable-next=not-an-iterable
|
111
120
|
return ".".join(i for i in self if i)
|
@@ -114,7 +123,7 @@ class ExtractResult(NamedTuple):
|
|
114
123
|
@property
|
115
124
|
def ipv4(self) -> str:
|
116
125
|
"""
|
117
|
-
Returns the ipv4 if that is what the presented domain/url is
|
126
|
+
Returns the ipv4 if that is what the presented domain/url is.
|
118
127
|
|
119
128
|
>>> extract('http://127.0.0.1/path/to/file').ipv4
|
120
129
|
'127.0.0.1'
|
@@ -129,8 +138,7 @@ class ExtractResult(NamedTuple):
|
|
129
138
|
|
130
139
|
|
131
140
|
class TLDExtract:
|
132
|
-
"""A callable for extracting, subdomain, domain, and suffix components from
|
133
|
-
a URL."""
|
141
|
+
"""A callable for extracting, subdomain, domain, and suffix components from a URL."""
|
134
142
|
|
135
143
|
# TODO: Agreed with Pylint: too-many-arguments
|
136
144
|
def __init__( # pylint: disable=too-many-arguments
|
@@ -142,9 +150,7 @@ class TLDExtract:
|
|
142
150
|
extra_suffixes: Sequence[str] = (),
|
143
151
|
cache_fetch_timeout: Union[str, float, None] = CACHE_TIMEOUT,
|
144
152
|
) -> None:
|
145
|
-
"""
|
146
|
-
Constructs a callable for extracting subdomain, domain, and suffix
|
147
|
-
components from a URL.
|
153
|
+
"""Construct a callable for extracting subdomain, domain, and suffix components from a URL.
|
148
154
|
|
149
155
|
Upon calling it, it first checks for a JSON in `cache_dir`. By default,
|
150
156
|
the `cache_dir` will live in the tldextract directory. You can disable
|
@@ -207,17 +213,17 @@ class TLDExtract:
|
|
207
213
|
self._cache = DiskCache(cache_dir)
|
208
214
|
|
209
215
|
def __call__(
|
210
|
-
self, url: str, include_psl_private_domains:
|
216
|
+
self, url: str, include_psl_private_domains: bool | None = None
|
211
217
|
) -> ExtractResult:
|
212
218
|
"""Alias for `extract_str`."""
|
213
219
|
return self.extract_str(url, include_psl_private_domains)
|
214
220
|
|
215
221
|
def extract_str(
|
216
|
-
self, url: str, include_psl_private_domains:
|
222
|
+
self, url: str, include_psl_private_domains: bool | None = None
|
217
223
|
) -> ExtractResult:
|
218
|
-
"""
|
219
|
-
|
220
|
-
|
224
|
+
"""Take a string URL and splits it into its subdomain, domain, and suffix components.
|
225
|
+
|
226
|
+
I.e. its effective TLD, gTLD, ccTLD, etc. components.
|
221
227
|
|
222
228
|
>>> extractor = TLDExtract()
|
223
229
|
>>> extractor.extract_str('http://forums.news.cnn.com/')
|
@@ -232,10 +238,10 @@ class TLDExtract:
|
|
232
238
|
url: Union[urllib.parse.ParseResult, urllib.parse.SplitResult],
|
233
239
|
include_psl_private_domains: Optional[bool] = None,
|
234
240
|
) -> ExtractResult:
|
235
|
-
"""
|
236
|
-
|
237
|
-
the parsed URL into its subdomain, domain, and suffix
|
238
|
-
gTLD, ccTLD, etc.
|
241
|
+
"""Take the output of urllib.parse URL parsing methods and further splits the parsed URL.
|
242
|
+
|
243
|
+
Splits the parsed URL into its subdomain, domain, and suffix
|
244
|
+
components, i.e. its effective TLD, gTLD, ccTLD, etc. components.
|
239
245
|
|
240
246
|
This method is like `extract_str` but faster, as the string's domain
|
241
247
|
name has already been parsed.
|
@@ -251,18 +257,22 @@ class TLDExtract:
|
|
251
257
|
def _extract_netloc(
|
252
258
|
self, netloc: str, include_psl_private_domains: Optional[bool]
|
253
259
|
) -> ExtractResult:
|
254
|
-
labels =
|
260
|
+
labels = (
|
261
|
+
netloc.replace("\u3002", "\u002e")
|
262
|
+
.replace("\uff0e", "\u002e")
|
263
|
+
.replace("\uff61", "\u002e")
|
264
|
+
.split(".")
|
265
|
+
)
|
255
266
|
|
256
|
-
translations = [_decode_punycode(label) for label in labels]
|
257
267
|
suffix_index = self._get_tld_extractor().suffix_index(
|
258
|
-
|
268
|
+
labels, include_psl_private_domains=include_psl_private_domains
|
259
269
|
)
|
260
270
|
|
261
|
-
|
262
|
-
if not suffix and netloc and looks_like_ip(netloc):
|
271
|
+
if suffix_index == len(labels) and netloc and looks_like_ip(netloc):
|
263
272
|
return ExtractResult("", netloc, "")
|
264
273
|
|
265
|
-
|
274
|
+
suffix = ".".join(labels[suffix_index:]) if suffix_index != len(labels) else ""
|
275
|
+
subdomain = ".".join(labels[: suffix_index - 1]) if suffix_index >= 2 else ""
|
266
276
|
domain = labels[suffix_index - 1] if suffix_index else ""
|
267
277
|
return ExtractResult(subdomain, domain, suffix)
|
268
278
|
|
@@ -276,22 +286,23 @@ class TLDExtract:
|
|
276
286
|
@property
|
277
287
|
def tlds(self) -> List[str]:
|
278
288
|
"""
|
279
|
-
Returns the list of tld's used by default
|
289
|
+
Returns the list of tld's used by default.
|
280
290
|
|
281
291
|
This will vary based on `include_psl_private_domains` and `extra_suffixes`
|
282
292
|
"""
|
283
293
|
return list(self._get_tld_extractor().tlds())
|
284
294
|
|
285
|
-
def _get_tld_extractor(self) ->
|
286
|
-
"""Get or compute this object's TLDExtractor.
|
287
|
-
|
288
|
-
|
295
|
+
def _get_tld_extractor(self) -> _PublicSuffixListTLDExtractor:
|
296
|
+
"""Get or compute this object's TLDExtractor.
|
297
|
+
|
298
|
+
Looks up the TLDExtractor in roughly the following order, based on the
|
299
|
+
settings passed to __init__:
|
289
300
|
|
290
301
|
1. Memoized on `self`
|
291
302
|
2. Local system _cache file
|
292
303
|
3. Remote PSL, over HTTP
|
293
|
-
4. Bundled PSL snapshot file
|
294
|
-
|
304
|
+
4. Bundled PSL snapshot file
|
305
|
+
"""
|
295
306
|
if self._extractor:
|
296
307
|
return self._extractor
|
297
308
|
|
@@ -317,6 +328,37 @@ class TLDExtract:
|
|
317
328
|
TLD_EXTRACTOR = TLDExtract()
|
318
329
|
|
319
330
|
|
331
|
+
class Trie:
|
332
|
+
"""Trie for storing eTLDs with their labels in reverse-order."""
|
333
|
+
|
334
|
+
def __init__(self, matches: Optional[Dict] = None, end: bool = False) -> None:
|
335
|
+
self.matches = matches if matches else {}
|
336
|
+
self.end = end
|
337
|
+
|
338
|
+
@staticmethod
|
339
|
+
def create(suffixes: Collection[str]) -> Trie:
|
340
|
+
"""Create a Trie from a list of suffixes and return its root node."""
|
341
|
+
root_node = Trie()
|
342
|
+
|
343
|
+
for suffix in suffixes:
|
344
|
+
suffix_labels = suffix.split(".")
|
345
|
+
suffix_labels.reverse()
|
346
|
+
root_node.add_suffix(suffix_labels)
|
347
|
+
|
348
|
+
return root_node
|
349
|
+
|
350
|
+
def add_suffix(self, labels: List[str]) -> None:
|
351
|
+
"""Append a suffix's labels to this Trie node."""
|
352
|
+
node = self
|
353
|
+
|
354
|
+
for label in labels:
|
355
|
+
if label not in node.matches:
|
356
|
+
node.matches[label] = Trie()
|
357
|
+
node = node.matches[label]
|
358
|
+
|
359
|
+
node.end = True
|
360
|
+
|
361
|
+
|
320
362
|
@wraps(TLD_EXTRACTOR.__call__)
|
321
363
|
def extract( # pylint: disable=missing-function-docstring
|
322
364
|
url: str, include_psl_private_domains: Optional[bool] = False
|
@@ -331,9 +373,7 @@ def update(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
331
373
|
|
332
374
|
|
333
375
|
class _PublicSuffixListTLDExtractor:
|
334
|
-
"""Wrapper around this project's main algo for PSL
|
335
|
-
lookups.
|
336
|
-
"""
|
376
|
+
"""Wrapper around this project's main algo for PSL lookups."""
|
337
377
|
|
338
378
|
def __init__(
|
339
379
|
self,
|
@@ -348,6 +388,8 @@ class _PublicSuffixListTLDExtractor:
|
|
348
388
|
self.private_tlds = private_tlds
|
349
389
|
self.tlds_incl_private = frozenset(public_tlds + private_tlds + extra_tlds)
|
350
390
|
self.tlds_excl_private = frozenset(public_tlds + extra_tlds)
|
391
|
+
self.tlds_incl_private_trie = Trie.create(self.tlds_incl_private)
|
392
|
+
self.tlds_excl_private_trie = Trie.create(self.tlds_excl_private)
|
351
393
|
|
352
394
|
def tlds(
|
353
395
|
self, include_psl_private_domains: Optional[bool] = None
|
@@ -363,27 +405,38 @@ class _PublicSuffixListTLDExtractor:
|
|
363
405
|
)
|
364
406
|
|
365
407
|
def suffix_index(
|
366
|
-
self,
|
408
|
+
self, spl: List[str], include_psl_private_domains: Optional[bool] = None
|
367
409
|
) -> int:
|
368
|
-
"""
|
369
|
-
|
410
|
+
"""Return the index of the first suffix label.
|
411
|
+
|
412
|
+
Returns len(spl) if no suffix is found.
|
370
413
|
"""
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
414
|
+
node = (
|
415
|
+
self.tlds_incl_private_trie
|
416
|
+
if include_psl_private_domains
|
417
|
+
else self.tlds_excl_private_trie
|
418
|
+
)
|
419
|
+
i = len(spl)
|
420
|
+
j = i
|
421
|
+
for label in reversed(spl):
|
422
|
+
decoded_label = _decode_punycode(label)
|
423
|
+
if decoded_label in node.matches:
|
424
|
+
j -= 1
|
425
|
+
if node.matches[decoded_label].end:
|
426
|
+
i = j
|
427
|
+
node = node.matches[decoded_label]
|
428
|
+
continue
|
429
|
+
|
430
|
+
is_wildcard = "*" in node.matches
|
431
|
+
if is_wildcard:
|
432
|
+
is_wildcard_exception = "!" + decoded_label in node.matches
|
433
|
+
if is_wildcard_exception:
|
434
|
+
return j
|
435
|
+
return j - 1
|
436
|
+
|
437
|
+
break
|
438
|
+
|
439
|
+
return i
|
387
440
|
|
388
441
|
|
389
442
|
def _decode_punycode(label: str) -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tldextract
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.3
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Home-page: https://github.com/john-kurkowski/tldextract
|
6
6
|
Author: John Kurkowski
|
@@ -24,8 +24,9 @@ Requires-Dist: requests (>=2.1.0)
|
|
24
24
|
Requires-Dist: requests-file (>=1.4)
|
25
25
|
Requires-Dist: filelock (>=3.0.8)
|
26
26
|
|
27
|
-
|
28
|
-
|
27
|
+
`tldextract` accurately separates a URL's subdomain, domain, and public suffix.
|
28
|
+
|
29
|
+
It does this via the Public Suffix List (PSL).
|
29
30
|
|
30
31
|
>>> import tldextract
|
31
32
|
>>> tldextract.extract('http://forums.news.cnn.com/')
|
@@ -0,0 +1,16 @@
|
|
1
|
+
tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
|
2
|
+
tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
|
3
|
+
tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
|
4
|
+
tldextract/_version.py,sha256=48Xmhq75m8Lwk0l2LFZcUFkfMiV7Qm7TtkGOfSedCXM,160
|
5
|
+
tldextract/cache.py,sha256=kPC5WcYRE-PbhW1CQRDdPOB17u8tAq6iaJDNVcUgybc,8725
|
6
|
+
tldextract/cli.py,sha256=F9FZ4Hx_E9Gct1d-dA40QZFrZVlzTAp_o6Fbka1k4tw,2416
|
7
|
+
tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
tldextract/remote.py,sha256=jbliDpMe6UXxVC8GPA6GowtyOtulc7vVT9SU9MZoNus,1541
|
9
|
+
tldextract/suffix_list.py,sha256=f0a4judZhYpbYhljtHNtD-eI7kYV7Ja0LNb1R2DWvbA,3400
|
10
|
+
tldextract/tldextract.py,sha256=-JTu0p_q_BFBWNEjY5E5ijKlGtinAfgJasvIp87R5G4,15971
|
11
|
+
tldextract-3.4.3.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
|
12
|
+
tldextract-3.4.3.dist-info/METADATA,sha256=HSwKKghsTmin-2BNyQ1OLnw6P38sBd6jQGpO-D8B4dM,2313
|
13
|
+
tldextract-3.4.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
14
|
+
tldextract-3.4.3.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
|
15
|
+
tldextract-3.4.3.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
|
16
|
+
tldextract-3.4.3.dist-info/RECORD,,
|
@@ -1,16 +0,0 @@
|
|
1
|
-
tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
|
2
|
-
tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
|
3
|
-
tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
|
4
|
-
tldextract/_version.py,sha256=WP-_SGCtdmJ9gIzEvNNB9286hJcL0KHHSpnjHpuhoiw,160
|
5
|
-
tldextract/cache.py,sha256=cML-WAB6D99AGepWgnuuZEaIBMan7ZUsvAfFvkrtVOs,8717
|
6
|
-
tldextract/cli.py,sha256=O9f_m60wvdtnzLHTlYnzdjHvaJU06H-vV40Pd7xKwOg,2415
|
7
|
-
tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
tldextract/remote.py,sha256=oh0JQ6Z3wx-8g3BaF9EMLSiqu4NoaMgU1G09LUYvbYI,1182
|
9
|
-
tldextract/suffix_list.py,sha256=LLen6BeFWyKTJJIiey3uz-vlnqsP5ApJX6tXNkHmW-s,3399
|
10
|
-
tldextract/tldextract.py,sha256=kSteeJQ11EPao3lUOCupTxCZm8jkqbth5PwyOpkBtQ8,14443
|
11
|
-
tldextract-3.4.1.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
|
12
|
-
tldextract-3.4.1.dist-info/METADATA,sha256=p_mFOdnkjtNCv4sE-ASdp2nZG7Yz6P5CBlDc7CDWwHM,2302
|
13
|
-
tldextract-3.4.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
14
|
-
tldextract-3.4.1.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
|
15
|
-
tldextract-3.4.1.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
|
16
|
-
tldextract-3.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|