tldextract 3.4.2__py3-none-any.whl → 3.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tldextract/_version.py +2 -2
- tldextract/cache.py +9 -9
- tldextract/cli.py +2 -2
- tldextract/remote.py +9 -6
- tldextract/suffix_list.py +8 -8
- tldextract/tldextract.py +96 -92
- {tldextract-3.4.2.dist-info → tldextract-3.4.3.dist-info}/METADATA +4 -3
- tldextract-3.4.3.dist-info/RECORD +16 -0
- tldextract-3.4.2.dist-info/RECORD +0 -16
- {tldextract-3.4.2.dist-info → tldextract-3.4.3.dist-info}/LICENSE +0 -0
- {tldextract-3.4.2.dist-info → tldextract-3.4.3.dist-info}/WHEEL +0 -0
- {tldextract-3.4.2.dist-info → tldextract-3.4.3.dist-info}/entry_points.txt +0 -0
- {tldextract-3.4.2.dist-info → tldextract-3.4.3.dist-info}/top_level.txt +0 -0
tldextract/_version.py
CHANGED
tldextract/cache.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Helpers
|
1
|
+
"""Helpers."""
|
2
2
|
import errno
|
3
3
|
import hashlib
|
4
4
|
import json
|
@@ -30,7 +30,7 @@ T = TypeVar("T") # pylint: disable=invalid-name
|
|
30
30
|
|
31
31
|
def get_pkg_unique_identifier() -> str:
|
32
32
|
"""
|
33
|
-
Generate an identifier unique to the python version, tldextract version, and python instance
|
33
|
+
Generate an identifier unique to the python version, tldextract version, and python instance.
|
34
34
|
|
35
35
|
This will prevent interference between virtualenvs and issues that might arise when installing
|
36
36
|
a new version of tldextract
|
@@ -61,7 +61,7 @@ def get_pkg_unique_identifier() -> str:
|
|
61
61
|
|
62
62
|
def get_cache_dir() -> str:
|
63
63
|
"""
|
64
|
-
Get a cache dir that we have permission to write to
|
64
|
+
Get a cache dir that we have permission to write to.
|
65
65
|
|
66
66
|
Try to follow the XDG standard, but if that doesn't work fallback to the package directory
|
67
67
|
http://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
@@ -86,7 +86,7 @@ def get_cache_dir() -> str:
|
|
86
86
|
|
87
87
|
|
88
88
|
class DiskCache:
|
89
|
-
"""Disk _cache that only works for jsonable values"""
|
89
|
+
"""Disk _cache that only works for jsonable values."""
|
90
90
|
|
91
91
|
def __init__(self, cache_dir: Optional[str], lock_timeout: int = 20):
|
92
92
|
self.enabled = bool(cache_dir)
|
@@ -115,7 +115,7 @@ class DiskCache:
|
|
115
115
|
def set(
|
116
116
|
self, namespace: str, key: Union[str, Dict[str, Hashable]], value: object
|
117
117
|
) -> None:
|
118
|
-
"""Set a value in the disk cache"""
|
118
|
+
"""Set a value in the disk cache."""
|
119
119
|
if not self.enabled:
|
120
120
|
return
|
121
121
|
|
@@ -142,7 +142,7 @@ class DiskCache:
|
|
142
142
|
_DID_LOG_UNABLE_TO_CACHE = True
|
143
143
|
|
144
144
|
def clear(self) -> None:
|
145
|
-
"""Clear the disk cache"""
|
145
|
+
"""Clear the disk cache."""
|
146
146
|
for root, _, files in os.walk(self.cache_dir):
|
147
147
|
for filename in files:
|
148
148
|
if filename.endswith(self.file_ext) or filename.endswith(
|
@@ -175,7 +175,7 @@ class DiskCache:
|
|
175
175
|
kwargs: Dict[str, Hashable],
|
176
176
|
hashed_argnames: Iterable[str],
|
177
177
|
) -> T:
|
178
|
-
"""Get a url but cache the response"""
|
178
|
+
"""Get a url but cache the response."""
|
179
179
|
if not self.enabled:
|
180
180
|
return func(**kwargs)
|
181
181
|
|
@@ -215,7 +215,7 @@ class DiskCache:
|
|
215
215
|
def cached_fetch_url(
|
216
216
|
self, session: requests.Session, url: str, timeout: Union[float, int, None]
|
217
217
|
) -> str:
|
218
|
-
"""Get a url but cache the response"""
|
218
|
+
"""Get a url but cache the response."""
|
219
219
|
return self.run_and_cache(
|
220
220
|
func=_fetch_url,
|
221
221
|
namespace="urls",
|
@@ -241,7 +241,7 @@ def _make_cache_key(inputs: Union[str, Dict[str, Hashable]]) -> str:
|
|
241
241
|
|
242
242
|
|
243
243
|
def _make_dir(filename: str) -> None:
|
244
|
-
"""Make a directory if it doesn't already exist"""
|
244
|
+
"""Make a directory if it doesn't already exist."""
|
245
245
|
if not os.path.exists(os.path.dirname(filename)):
|
246
246
|
try:
|
247
247
|
os.makedirs(os.path.dirname(filename))
|
tldextract/cli.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""tldextract CLI"""
|
1
|
+
"""tldextract CLI."""
|
2
2
|
|
3
3
|
|
4
4
|
import argparse
|
@@ -12,7 +12,7 @@ from .tldextract import TLDExtract
|
|
12
12
|
|
13
13
|
|
14
14
|
def main() -> None:
|
15
|
-
"""
|
15
|
+
"""Tldextract CLI main command."""
|
16
16
|
logging.basicConfig()
|
17
17
|
|
18
18
|
parser = argparse.ArgumentParser(
|
tldextract/remote.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"tldextract helpers for testing and fetching remote resources."
|
1
|
+
"""tldextract helpers for testing and fetching remote resources."""
|
2
2
|
|
3
3
|
import re
|
4
4
|
import socket
|
@@ -13,9 +13,12 @@ scheme_chars_set = set(scheme_chars)
|
|
13
13
|
|
14
14
|
|
15
15
|
def lenient_netloc(url: str) -> str:
|
16
|
-
"""Extract the netloc of a URL-like string
|
17
|
-
|
18
|
-
|
16
|
+
"""Extract the netloc of a URL-like string.
|
17
|
+
|
18
|
+
Similar to the netloc attribute returned by
|
19
|
+
urllib.parse.{urlparse,urlsplit}, but extract more leniently, without
|
20
|
+
raising errors.
|
21
|
+
"""
|
19
22
|
return (
|
20
23
|
_schemeless_url(url)
|
21
24
|
.partition("/")[0]
|
@@ -24,7 +27,7 @@ def lenient_netloc(url: str) -> str:
|
|
24
27
|
.rpartition("@")[-1]
|
25
28
|
.partition(":")[0]
|
26
29
|
.strip()
|
27
|
-
.rstrip("
|
30
|
+
.rstrip(".\u3002\uff0e\uff61")
|
28
31
|
)
|
29
32
|
|
30
33
|
|
@@ -42,7 +45,7 @@ def _schemeless_url(url: str) -> str:
|
|
42
45
|
|
43
46
|
|
44
47
|
def looks_like_ip(maybe_ip: str) -> bool:
|
45
|
-
"""
|
48
|
+
"""Check whether the given str looks like an IP address."""
|
46
49
|
if not maybe_ip[0].isdigit():
|
47
50
|
return False
|
48
51
|
|
tldextract/suffix_list.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"tldextract helpers for testing and fetching remote resources."
|
1
|
+
"""tldextract helpers for testing and fetching remote resources."""
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import pkgutil
|
@@ -17,8 +17,11 @@ PUBLIC_PRIVATE_SUFFIX_SEPARATOR = "// ===BEGIN PRIVATE DOMAINS==="
|
|
17
17
|
|
18
18
|
|
19
19
|
class SuffixListNotFound(LookupError):
|
20
|
-
"""A recoverable error while looking up a suffix list.
|
21
|
-
|
20
|
+
"""A recoverable error while looking up a suffix list.
|
21
|
+
|
22
|
+
Recoverable because you can specify backups, or use this library's bundled
|
23
|
+
snapshot.
|
24
|
+
"""
|
22
25
|
|
23
26
|
|
24
27
|
def find_first_response(
|
@@ -26,9 +29,7 @@ def find_first_response(
|
|
26
29
|
urls: Sequence[str],
|
27
30
|
cache_fetch_timeout: Union[float, int, None] = None,
|
28
31
|
) -> str:
|
29
|
-
"""Decode the first successfully fetched URL, from UTF-8 encoding to
|
30
|
-
Python unicode.
|
31
|
-
"""
|
32
|
+
"""Decode the first successfully fetched URL, from UTF-8 encoding to Python unicode."""
|
32
33
|
with requests.Session() as session:
|
33
34
|
session.mount("file://", FileAdapter())
|
34
35
|
|
@@ -46,8 +47,7 @@ def find_first_response(
|
|
46
47
|
|
47
48
|
|
48
49
|
def extract_tlds_from_suffix_list(suffix_list_text: str) -> Tuple[List[str], List[str]]:
|
49
|
-
"""Parse the raw suffix list text for its different designations of
|
50
|
-
suffixes."""
|
50
|
+
"""Parse the raw suffix list text for its different designations of suffixes."""
|
51
51
|
public_text, _, private_text = suffix_list_text.partition(
|
52
52
|
PUBLIC_PRIVATE_SUFFIX_SEPARATOR
|
53
53
|
)
|
tldextract/tldextract.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
"""
|
2
|
-
|
1
|
+
"""`tldextract` accurately separates a URL's subdomain, domain, and public suffix.
|
2
|
+
|
3
|
+
It does this via the Public Suffix List (PSL).
|
3
4
|
|
4
5
|
>>> import tldextract
|
5
6
|
|
@@ -48,11 +49,22 @@ or suffix were found:
|
|
48
49
|
'127.0.0.1'
|
49
50
|
"""
|
50
51
|
|
52
|
+
from __future__ import annotations
|
53
|
+
|
51
54
|
import logging
|
52
55
|
import os
|
53
56
|
import urllib.parse
|
54
57
|
from functools import wraps
|
55
|
-
from typing import
|
58
|
+
from typing import (
|
59
|
+
Collection,
|
60
|
+
Dict,
|
61
|
+
FrozenSet,
|
62
|
+
List,
|
63
|
+
NamedTuple,
|
64
|
+
Optional,
|
65
|
+
Sequence,
|
66
|
+
Union,
|
67
|
+
)
|
56
68
|
|
57
69
|
import idna
|
58
70
|
|
@@ -88,8 +100,8 @@ class ExtractResult(NamedTuple):
|
|
88
100
|
>>> extract('http://localhost:8080').registered_domain
|
89
101
|
''
|
90
102
|
"""
|
91
|
-
if self.
|
92
|
-
return self.domain
|
103
|
+
if self.suffix and self.domain:
|
104
|
+
return f"{self.domain}.{self.suffix}"
|
93
105
|
return ""
|
94
106
|
|
95
107
|
@property
|
@@ -102,7 +114,7 @@ class ExtractResult(NamedTuple):
|
|
102
114
|
>>> extract('http://localhost:8080').fqdn
|
103
115
|
''
|
104
116
|
"""
|
105
|
-
if self.
|
117
|
+
if self.suffix and self.domain:
|
106
118
|
# Disable bogus lint error (https://github.com/PyCQA/pylint/issues/2568)
|
107
119
|
# pylint: disable-next=not-an-iterable
|
108
120
|
return ".".join(i for i in self if i)
|
@@ -111,7 +123,7 @@ class ExtractResult(NamedTuple):
|
|
111
123
|
@property
|
112
124
|
def ipv4(self) -> str:
|
113
125
|
"""
|
114
|
-
Returns the ipv4 if that is what the presented domain/url is
|
126
|
+
Returns the ipv4 if that is what the presented domain/url is.
|
115
127
|
|
116
128
|
>>> extract('http://127.0.0.1/path/to/file').ipv4
|
117
129
|
'127.0.0.1'
|
@@ -126,8 +138,7 @@ class ExtractResult(NamedTuple):
|
|
126
138
|
|
127
139
|
|
128
140
|
class TLDExtract:
|
129
|
-
"""A callable for extracting, subdomain, domain, and suffix components from
|
130
|
-
a URL."""
|
141
|
+
"""A callable for extracting, subdomain, domain, and suffix components from a URL."""
|
131
142
|
|
132
143
|
# TODO: Agreed with Pylint: too-many-arguments
|
133
144
|
def __init__( # pylint: disable=too-many-arguments
|
@@ -139,9 +150,7 @@ class TLDExtract:
|
|
139
150
|
extra_suffixes: Sequence[str] = (),
|
140
151
|
cache_fetch_timeout: Union[str, float, None] = CACHE_TIMEOUT,
|
141
152
|
) -> None:
|
142
|
-
"""
|
143
|
-
Constructs a callable for extracting subdomain, domain, and suffix
|
144
|
-
components from a URL.
|
153
|
+
"""Construct a callable for extracting subdomain, domain, and suffix components from a URL.
|
145
154
|
|
146
155
|
Upon calling it, it first checks for a JSON in `cache_dir`. By default,
|
147
156
|
the `cache_dir` will live in the tldextract directory. You can disable
|
@@ -204,17 +213,17 @@ class TLDExtract:
|
|
204
213
|
self._cache = DiskCache(cache_dir)
|
205
214
|
|
206
215
|
def __call__(
|
207
|
-
self, url: str, include_psl_private_domains:
|
216
|
+
self, url: str, include_psl_private_domains: bool | None = None
|
208
217
|
) -> ExtractResult:
|
209
218
|
"""Alias for `extract_str`."""
|
210
219
|
return self.extract_str(url, include_psl_private_domains)
|
211
220
|
|
212
221
|
def extract_str(
|
213
|
-
self, url: str, include_psl_private_domains:
|
222
|
+
self, url: str, include_psl_private_domains: bool | None = None
|
214
223
|
) -> ExtractResult:
|
215
|
-
"""
|
216
|
-
|
217
|
-
|
224
|
+
"""Take a string URL and splits it into its subdomain, domain, and suffix components.
|
225
|
+
|
226
|
+
I.e. its effective TLD, gTLD, ccTLD, etc. components.
|
218
227
|
|
219
228
|
>>> extractor = TLDExtract()
|
220
229
|
>>> extractor.extract_str('http://forums.news.cnn.com/')
|
@@ -229,10 +238,10 @@ class TLDExtract:
|
|
229
238
|
url: Union[urllib.parse.ParseResult, urllib.parse.SplitResult],
|
230
239
|
include_psl_private_domains: Optional[bool] = None,
|
231
240
|
) -> ExtractResult:
|
232
|
-
"""
|
233
|
-
|
234
|
-
the parsed URL into its subdomain, domain, and suffix
|
235
|
-
gTLD, ccTLD, etc.
|
241
|
+
"""Take the output of urllib.parse URL parsing methods and further splits the parsed URL.
|
242
|
+
|
243
|
+
Splits the parsed URL into its subdomain, domain, and suffix
|
244
|
+
components, i.e. its effective TLD, gTLD, ccTLD, etc. components.
|
236
245
|
|
237
246
|
This method is like `extract_str` but faster, as the string's domain
|
238
247
|
name has already been parsed.
|
@@ -259,11 +268,11 @@ class TLDExtract:
|
|
259
268
|
labels, include_psl_private_domains=include_psl_private_domains
|
260
269
|
)
|
261
270
|
|
262
|
-
|
263
|
-
if not suffix and netloc and looks_like_ip(netloc):
|
271
|
+
if suffix_index == len(labels) and netloc and looks_like_ip(netloc):
|
264
272
|
return ExtractResult("", netloc, "")
|
265
273
|
|
266
|
-
|
274
|
+
suffix = ".".join(labels[suffix_index:]) if suffix_index != len(labels) else ""
|
275
|
+
subdomain = ".".join(labels[: suffix_index - 1]) if suffix_index >= 2 else ""
|
267
276
|
domain = labels[suffix_index - 1] if suffix_index else ""
|
268
277
|
return ExtractResult(subdomain, domain, suffix)
|
269
278
|
|
@@ -277,22 +286,23 @@ class TLDExtract:
|
|
277
286
|
@property
|
278
287
|
def tlds(self) -> List[str]:
|
279
288
|
"""
|
280
|
-
Returns the list of tld's used by default
|
289
|
+
Returns the list of tld's used by default.
|
281
290
|
|
282
291
|
This will vary based on `include_psl_private_domains` and `extra_suffixes`
|
283
292
|
"""
|
284
293
|
return list(self._get_tld_extractor().tlds())
|
285
294
|
|
286
|
-
def _get_tld_extractor(self) ->
|
287
|
-
"""Get or compute this object's TLDExtractor.
|
288
|
-
|
289
|
-
|
295
|
+
def _get_tld_extractor(self) -> _PublicSuffixListTLDExtractor:
|
296
|
+
"""Get or compute this object's TLDExtractor.
|
297
|
+
|
298
|
+
Looks up the TLDExtractor in roughly the following order, based on the
|
299
|
+
settings passed to __init__:
|
290
300
|
|
291
301
|
1. Memoized on `self`
|
292
302
|
2. Local system _cache file
|
293
303
|
3. Remote PSL, over HTTP
|
294
|
-
4. Bundled PSL snapshot file
|
295
|
-
|
304
|
+
4. Bundled PSL snapshot file
|
305
|
+
"""
|
296
306
|
if self._extractor:
|
297
307
|
return self._extractor
|
298
308
|
|
@@ -318,6 +328,37 @@ class TLDExtract:
|
|
318
328
|
TLD_EXTRACTOR = TLDExtract()
|
319
329
|
|
320
330
|
|
331
|
+
class Trie:
|
332
|
+
"""Trie for storing eTLDs with their labels in reverse-order."""
|
333
|
+
|
334
|
+
def __init__(self, matches: Optional[Dict] = None, end: bool = False) -> None:
|
335
|
+
self.matches = matches if matches else {}
|
336
|
+
self.end = end
|
337
|
+
|
338
|
+
@staticmethod
|
339
|
+
def create(suffixes: Collection[str]) -> Trie:
|
340
|
+
"""Create a Trie from a list of suffixes and return its root node."""
|
341
|
+
root_node = Trie()
|
342
|
+
|
343
|
+
for suffix in suffixes:
|
344
|
+
suffix_labels = suffix.split(".")
|
345
|
+
suffix_labels.reverse()
|
346
|
+
root_node.add_suffix(suffix_labels)
|
347
|
+
|
348
|
+
return root_node
|
349
|
+
|
350
|
+
def add_suffix(self, labels: List[str]) -> None:
|
351
|
+
"""Append a suffix's labels to this Trie node."""
|
352
|
+
node = self
|
353
|
+
|
354
|
+
for label in labels:
|
355
|
+
if label not in node.matches:
|
356
|
+
node.matches[label] = Trie()
|
357
|
+
node = node.matches[label]
|
358
|
+
|
359
|
+
node.end = True
|
360
|
+
|
361
|
+
|
321
362
|
@wraps(TLD_EXTRACTOR.__call__)
|
322
363
|
def extract( # pylint: disable=missing-function-docstring
|
323
364
|
url: str, include_psl_private_domains: Optional[bool] = False
|
@@ -332,9 +373,7 @@ def update(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
332
373
|
|
333
374
|
|
334
375
|
class _PublicSuffixListTLDExtractor:
|
335
|
-
"""Wrapper around this project's main algo for PSL
|
336
|
-
lookups.
|
337
|
-
"""
|
376
|
+
"""Wrapper around this project's main algo for PSL lookups."""
|
338
377
|
|
339
378
|
def __init__(
|
340
379
|
self,
|
@@ -349,28 +388,8 @@ class _PublicSuffixListTLDExtractor:
|
|
349
388
|
self.private_tlds = private_tlds
|
350
389
|
self.tlds_incl_private = frozenset(public_tlds + private_tlds + extra_tlds)
|
351
390
|
self.tlds_excl_private = frozenset(public_tlds + extra_tlds)
|
352
|
-
|
353
|
-
|
354
|
-
private_false_tlds = self.get_false_intermediate_suffixes(private_tlds)
|
355
|
-
extra_false_tlds = self.get_false_intermediate_suffixes(extra_tlds)
|
356
|
-
self.false_tlds_incl_private = frozenset(
|
357
|
-
public_false_tlds + private_false_tlds + extra_false_tlds
|
358
|
-
)
|
359
|
-
self.false_tlds_excl_private = frozenset(public_false_tlds + extra_false_tlds)
|
360
|
-
|
361
|
-
def get_false_intermediate_suffixes(self, tlds: List[str]) -> List[str]:
|
362
|
-
"""From list of suffixes, identify false intermediate suffixes.
|
363
|
-
|
364
|
-
Example: If valid TLDs include only ["a.b.c.d", "d"], then
|
365
|
-
["b.c.d", "c.d"] are false intermediate suffixes.
|
366
|
-
"""
|
367
|
-
valid_tlds = set(tlds)
|
368
|
-
false_tlds = set()
|
369
|
-
for tld in valid_tlds:
|
370
|
-
labels = tld.split(".")
|
371
|
-
variants = {".".join(labels[-i:]) for i in range(1, len(labels))}
|
372
|
-
false_tlds.update(variants)
|
373
|
-
return list(false_tlds.difference(valid_tlds))
|
391
|
+
self.tlds_incl_private_trie = Trie.create(self.tlds_incl_private)
|
392
|
+
self.tlds_excl_private_trie = Trie.create(self.tlds_excl_private)
|
374
393
|
|
375
394
|
def tlds(
|
376
395
|
self, include_psl_private_domains: Optional[bool] = None
|
@@ -385,53 +404,38 @@ class _PublicSuffixListTLDExtractor:
|
|
385
404
|
else self.tlds_excl_private
|
386
405
|
)
|
387
406
|
|
388
|
-
def false_tlds(
|
389
|
-
self, include_psl_private_domains: Optional[bool] = None
|
390
|
-
) -> FrozenSet[str]:
|
391
|
-
"""Get the currently filtered list of false intermediate suffixes."""
|
392
|
-
if include_psl_private_domains is None:
|
393
|
-
include_psl_private_domains = self.include_psl_private_domains
|
394
|
-
|
395
|
-
return (
|
396
|
-
self.false_tlds_incl_private
|
397
|
-
if include_psl_private_domains
|
398
|
-
else self.false_tlds_excl_private
|
399
|
-
)
|
400
|
-
|
401
407
|
def suffix_index(
|
402
408
|
self, spl: List[str], include_psl_private_domains: Optional[bool] = None
|
403
409
|
) -> int:
|
404
|
-
"""
|
405
|
-
|
410
|
+
"""Return the index of the first suffix label.
|
411
|
+
|
412
|
+
Returns len(spl) if no suffix is found.
|
406
413
|
"""
|
407
|
-
|
408
|
-
|
414
|
+
node = (
|
415
|
+
self.tlds_incl_private_trie
|
416
|
+
if include_psl_private_domains
|
417
|
+
else self.tlds_excl_private_trie
|
418
|
+
)
|
409
419
|
i = len(spl)
|
410
420
|
j = i
|
411
|
-
maybe_tld = ""
|
412
|
-
prev_maybe_tld = ""
|
413
421
|
for label in reversed(spl):
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
422
|
+
decoded_label = _decode_punycode(label)
|
423
|
+
if decoded_label in node.matches:
|
424
|
+
j -= 1
|
425
|
+
if node.matches[decoded_label].end:
|
426
|
+
i = j
|
427
|
+
node = node.matches[decoded_label]
|
428
|
+
continue
|
419
429
|
|
420
|
-
|
421
|
-
|
422
|
-
|
430
|
+
is_wildcard = "*" in node.matches
|
431
|
+
if is_wildcard:
|
432
|
+
is_wildcard_exception = "!" + decoded_label in node.matches
|
433
|
+
if is_wildcard_exception:
|
434
|
+
return j
|
423
435
|
return j - 1
|
424
436
|
|
425
|
-
if maybe_tld in tlds:
|
426
|
-
j -= 1
|
427
|
-
i = j
|
428
|
-
prev_maybe_tld = maybe_tld
|
429
|
-
continue
|
430
|
-
if maybe_tld in false_tlds:
|
431
|
-
j -= 1
|
432
|
-
prev_maybe_tld = maybe_tld
|
433
|
-
continue
|
434
437
|
break
|
438
|
+
|
435
439
|
return i
|
436
440
|
|
437
441
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tldextract
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.3
|
4
4
|
Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
|
5
5
|
Home-page: https://github.com/john-kurkowski/tldextract
|
6
6
|
Author: John Kurkowski
|
@@ -24,8 +24,9 @@ Requires-Dist: requests (>=2.1.0)
|
|
24
24
|
Requires-Dist: requests-file (>=1.4)
|
25
25
|
Requires-Dist: filelock (>=3.0.8)
|
26
26
|
|
27
|
-
|
28
|
-
|
27
|
+
`tldextract` accurately separates a URL's subdomain, domain, and public suffix.
|
28
|
+
|
29
|
+
It does this via the Public Suffix List (PSL).
|
29
30
|
|
30
31
|
>>> import tldextract
|
31
32
|
>>> tldextract.extract('http://forums.news.cnn.com/')
|
@@ -0,0 +1,16 @@
|
|
1
|
+
tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
|
2
|
+
tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
|
3
|
+
tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
|
4
|
+
tldextract/_version.py,sha256=48Xmhq75m8Lwk0l2LFZcUFkfMiV7Qm7TtkGOfSedCXM,160
|
5
|
+
tldextract/cache.py,sha256=kPC5WcYRE-PbhW1CQRDdPOB17u8tAq6iaJDNVcUgybc,8725
|
6
|
+
tldextract/cli.py,sha256=F9FZ4Hx_E9Gct1d-dA40QZFrZVlzTAp_o6Fbka1k4tw,2416
|
7
|
+
tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
tldextract/remote.py,sha256=jbliDpMe6UXxVC8GPA6GowtyOtulc7vVT9SU9MZoNus,1541
|
9
|
+
tldextract/suffix_list.py,sha256=f0a4judZhYpbYhljtHNtD-eI7kYV7Ja0LNb1R2DWvbA,3400
|
10
|
+
tldextract/tldextract.py,sha256=-JTu0p_q_BFBWNEjY5E5ijKlGtinAfgJasvIp87R5G4,15971
|
11
|
+
tldextract-3.4.3.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
|
12
|
+
tldextract-3.4.3.dist-info/METADATA,sha256=HSwKKghsTmin-2BNyQ1OLnw6P38sBd6jQGpO-D8B4dM,2313
|
13
|
+
tldextract-3.4.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
14
|
+
tldextract-3.4.3.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
|
15
|
+
tldextract-3.4.3.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
|
16
|
+
tldextract-3.4.3.dist-info/RECORD,,
|
@@ -1,16 +0,0 @@
|
|
1
|
-
tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
|
2
|
-
tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
|
3
|
-
tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
|
4
|
-
tldextract/_version.py,sha256=53TBnObrVrn0HIZDYPVciP_r4bSFTkPY4CPek8i62wc,160
|
5
|
-
tldextract/cache.py,sha256=cML-WAB6D99AGepWgnuuZEaIBMan7ZUsvAfFvkrtVOs,8717
|
6
|
-
tldextract/cli.py,sha256=O9f_m60wvdtnzLHTlYnzdjHvaJU06H-vV40Pd7xKwOg,2415
|
7
|
-
tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
tldextract/remote.py,sha256=NOoPNBwfkgUlJA95d83zK6dx9ARgu4_UWJO54ByCT3c,1499
|
9
|
-
tldextract/suffix_list.py,sha256=LLen6BeFWyKTJJIiey3uz-vlnqsP5ApJX6tXNkHmW-s,3399
|
10
|
-
tldextract/tldextract.py,sha256=R-eAlawiiqnmQzw66LCc0XBv695HNUxIVZSkekvtMiI,16315
|
11
|
-
tldextract-3.4.2.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
|
12
|
-
tldextract-3.4.2.dist-info/METADATA,sha256=E92tKx_pFMqoXBhlxejOCxrjMb_gofOwAOmKtoTJ-OU,2302
|
13
|
-
tldextract-3.4.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
14
|
-
tldextract-3.4.2.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
|
15
|
-
tldextract-3.4.2.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
|
16
|
-
tldextract-3.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|