PyPI - tldextract - Versions diffs - 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl - Mend

tldextract 4.0.0py3-none-any.whl → 5.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

tldextract/_version.py +2 -2
tldextract/cache.py +13 -21
tldextract/cli.py +2 -2
tldextract/suffix_list.py +1 -1
tldextract/tldextract.py +21 -38
{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/METADATA +9 -27
tldextract-5.0.0.dist-info/RECORD +16 -0
tldextract-4.0.0.dist-info/RECORD +0 -16
{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/LICENSE +0 -0
{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/WHEEL +0 -0
{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/entry_points.txt +0 -0
{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/top_level.txt +0 -0

tldextract/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '4.0.0'
-__version_tuple__ = version_tuple = (4, 0, 0)
+__version__ = version = '5.0.0'
+__version_tuple__ = version_tuple = (5, 0, 0)

tldextract/cache.py CHANGED Viewed

@@ -21,7 +21,7 @@ LOG = logging.getLogger(__name__)
 _DID_LOG_UNABLE_TO_CACHE = False
-T = TypeVar("T")  # pylint: disable=invalid-name
+T = TypeVar("T")
 def get_pkg_unique_identifier() -> str:
@@ -32,7 +32,6 @@ def get_pkg_unique_identifier() -> str:
     a new version of tldextract
     """
     try:
-        # pylint: disable=import-outside-toplevel
         from tldextract._version import version
     except ImportError:
         version = "dev"
@@ -83,6 +82,7 @@ class DiskCache:
     """Disk _cache that only works for jsonable values."""
     def __init__(self, cache_dir: str | None, lock_timeout: int = 20):
+        """Construct a disk cache in the given directory."""
         self.enabled = bool(cache_dir)
         self.cache_dir = os.path.expanduser(str(cache_dir) or "")
         self.lock_timeout = lock_timeout
@@ -99,14 +99,13 @@ class DiskCache:
         if not os.path.isfile(cache_filepath):
             raise KeyError("namespace: " + namespace + " key: " + repr(key))
         try:
-            # pylint: disable-next=unspecified-encoding
             with open(cache_filepath) as cache_file:
                 return json.load(cache_file)
         except (OSError, ValueError) as exc:
             LOG.error("error reading TLD cache file %s: %s", cache_filepath, exc)
             raise KeyError("namespace: " + namespace + " key: " + repr(key)) from None
-    def set(
+    def set(  # noqa: A003
         self, namespace: str, key: str | dict[str, Hashable], value: object
     ) -> None:
         """Set a value in the disk cache."""
@@ -117,19 +116,16 @@ class DiskCache:
         try:
             _make_dir(cache_filepath)
-            # pylint: disable-next=unspecified-encoding
             with open(cache_filepath, "w") as cache_file:
                 json.dump(value, cache_file)
         except OSError as ioe:
-            global _DID_LOG_UNABLE_TO_CACHE  # pylint: disable=global-statement
+            global _DID_LOG_UNABLE_TO_CACHE
             if not _DID_LOG_UNABLE_TO_CACHE:
                 LOG.warning(
-                    (
-                        "unable to cache %s.%s in %s. This could refresh the "
-                        "Public Suffix List over HTTP every app startup. "
-                        "Construct your `TLDExtract` with a writable `cache_dir` or "
-                        "set `cache_dir=None` to silence this warning. %s"
-                    ),
+                    "unable to cache %s.%s in %s. This could refresh the "
+                    "Public Suffix List over HTTP every app startup. "
+                    "Construct your `TLDExtract` with a writable `cache_dir` or "
+                    "set `cache_dir=None` to silence this warning. %s",
                     namespace,
                     key,
                     cache_filepath,
@@ -181,15 +177,13 @@ class DiskCache:
         try:
             _make_dir(cache_filepath)
         except OSError as ioe:
-            global _DID_LOG_UNABLE_TO_CACHE  # pylint: disable=global-statement
+            global _DID_LOG_UNABLE_TO_CACHE
             if not _DID_LOG_UNABLE_TO_CACHE:
                 LOG.warning(
-                    (
-                        "unable to cache %s.%s in %s. This could refresh the "
-                        "Public Suffix List over HTTP every app startup. "
-                        "Construct your `TLDExtract` with a writable `cache_dir` or "
-                        "set `cache_dir=None` to silence this warning. %s"
-                    ),
+                    "unable to cache %s.%s in %s. This could refresh the "
+                    "Public Suffix List over HTTP every app startup. "
+                    "Construct your `TLDExtract` with a writable `cache_dir` or "
+                    "set `cache_dir=None` to silence this warning. %s",
                     namespace,
                     key_args,
                     cache_filepath,
@@ -199,8 +193,6 @@ class DiskCache:
             return func(**kwargs)
-        # Disable lint of 3rd party (see also https://github.com/tox-dev/py-filelock/issues/102)
-        # pylint: disable-next=abstract-class-instantiated
         with FileLock(lock_path, timeout=self.lock_timeout):
             try:
                 result = cast(T, self.get(namespace=namespace, key=key_args))

tldextract/cli.py CHANGED Viewed

@@ -88,5 +88,5 @@ def main() -> None:
         sys.exit(1)
     for i in args.input:
-        subdomain, domain, suffix, _ = tld_extract(i)
-        print(f"{subdomain} {domain} {suffix}")
+        ext = tld_extract(i)
+        print(f"{ext.subdomain} {ext.domain} {ext.suffix}")

tldextract/suffix_list.py CHANGED Viewed

@@ -19,7 +19,7 @@ PUBLIC_SUFFIX_RE = re.compile(r"^(?P<suffix>[.*!]*\w[\S]*)", re.UNICODE | re.MUL
 PUBLIC_PRIVATE_SUFFIX_SEPARATOR = "// ===BEGIN PRIVATE DOMAINS==="
-class SuffixListNotFound(LookupError):
+class SuffixListNotFound(LookupError):  # noqa: N818
     """A recoverable error while looking up a suffix list.
     Recoverable because you can specify backups, or use this library's bundled

tldextract/tldextract.py CHANGED Viewed

@@ -13,18 +13,6 @@ It does this via the Public Suffix List (PSL).
     >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
     ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
-`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
-    >>> ext = tldextract.extract('http://forums.bbc.co.uk')
-    >>> (ext.subdomain, ext.domain, ext.suffix)
-    ('forums', 'bbc', 'co.uk')
-    >>> # rejoin subdomain and domain
-    >>> '.'.join(ext[:2])
-    'forums.bbc'
-    >>> # a common alias
-    >>> ext.registered_domain
-    'bbc.co.uk'
 Note subdomain and suffix are _optional_. Not all URL-like inputs have a
 subdomain or a valid suffix.
@@ -37,16 +25,13 @@ subdomain or a valid suffix.
     >>> tldextract.extract('http://127.0.0.1:8080/deployed/')
     ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
-If you want to rejoin the whole namedtuple, regardless of whether a subdomain
-or suffix were found:
+To rejoin the original hostname, if it was indeed a valid, registered hostname:
-    >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
-    >>> # this has unwanted dots
-    >>> '.'.join(part for part in ext[:3])
-    '.127.0.0.1.'
-    >>> # join part only if truthy
-    >>> '.'.join(part for part in ext[:3] if part)
-    '127.0.0.1'
+    >>> ext = tldextract.extract('http://forums.bbc.co.uk')
+    >>> ext.registered_domain
+    'bbc.co.uk'
+    >>> ext.fqdn
+    'forums.bbc.co.uk'
 """
 from __future__ import annotations
@@ -55,10 +40,8 @@ import logging
 import os
 import urllib.parse
 from collections.abc import Collection, Sequence
+from dataclasses import dataclass
 from functools import wraps
-from typing import (
-    NamedTuple,
-)
 import idna
@@ -77,14 +60,17 @@ PUBLIC_SUFFIX_LIST_URLS = (
 )
-class ExtractResult(NamedTuple):
-    """namedtuple of a URL's subdomain, domain, suffix,
-    and flag that indicates if URL has private suffix."""
+@dataclass(order=True)
+class ExtractResult:
+    """A URL's extracted subdomain, domain, and suffix.
+    Also contains metadata, like a flag that indicates if the URL has a private suffix.
+    """
     subdomain: str
     domain: str
     suffix: str
-    is_private: bool = False
+    is_private: bool
     @property
     def registered_domain(self) -> str:
@@ -111,9 +97,7 @@ class ExtractResult(NamedTuple):
         ''
         """
         if self.suffix and (self.domain or self.is_private):
-            # Disable bogus lint error (https://github.com/PyCQA/pylint/issues/2568)
-            # pylint: disable-next=not-an-iterable,unsubscriptable-object
-            return ".".join(i for i in self[:3] if i)
+            return ".".join(i for i in (self.subdomain, self.domain, self.suffix) if i)
         return ""
     @property
@@ -164,8 +148,8 @@ class ExtractResult(NamedTuple):
 class TLDExtract:
     """A callable for extracting, subdomain, domain, and suffix components from a URL."""
-    # TODO: Agreed with Pylint: too-many-arguments
-    def __init__(  # pylint: disable=too-many-arguments
+    # TODO: too-many-arguments
+    def __init__(
         self,
         cache_dir: str | None = get_cache_dir(),
         suffix_list_urls: Sequence[str] = PUBLIC_SUFFIX_LIST_URLS,
@@ -294,7 +278,7 @@ class TLDExtract:
             and netloc_with_ascii_dots[-1] == "]"
         ):
             if looks_like_ipv6(netloc_with_ascii_dots[1:-1]):
-                return ExtractResult("", netloc_with_ascii_dots, "")
+                return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)
         labels = netloc_with_ascii_dots.split(".")
@@ -371,6 +355,7 @@ class Trie:
     def __init__(
         self, matches: dict | None = None, end: bool = False, is_private: bool = False
     ) -> None:
+        """TODO."""
         self.matches = matches if matches else {}
         self.end = end
         self.is_private = is_private
@@ -411,16 +396,14 @@ class Trie:
 @wraps(TLD_EXTRACTOR.__call__)
-def extract(  # pylint: disable=missing-function-docstring
+def extract(  # noqa: D103
     url: str, include_psl_private_domains: bool | None = False
 ) -> ExtractResult:
     return TLD_EXTRACTOR(url, include_psl_private_domains=include_psl_private_domains)
 @wraps(TLD_EXTRACTOR.update)
-def update(  # type: ignore[no-untyped-def]
-    *args, **kwargs
-):  # pylint: disable=missing-function-docstring
+def update(*args, **kwargs):  # type: ignore[no-untyped-def]  # noqa: D103
     return TLD_EXTRACTOR.update(*args, **kwargs)

{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tldextract
-Version: 4.0.0
+Version: 5.0.0
 Summary: Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well.
 Author-email: John Kurkowski <john.kurkowski@gmail.com>
 License: BSD-3-Clause
@@ -10,12 +10,11 @@ Classifier: Development Status :: 5 - Production/Stable
 Classifier: Topic :: Utilities
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.7
+Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: idna
@@ -56,20 +55,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
 ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
 ```
-`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
-```python
->>> ext = tldextract.extract('http://forums.bbc.co.uk')
->>> (ext.subdomain, ext.domain, ext.suffix)
-('forums', 'bbc', 'co.uk')
->>> # rejoin subdomain and domain
->>> '.'.join(ext[:2])
-'forums.bbc'
->>> # a common alias
->>> ext.registered_domain
-'bbc.co.uk'
-```
 Note subdomain and suffix are _optional_. Not all URL-like inputs have a
 subdomain or a valid suffix.
@@ -84,17 +69,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
 ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
 ```
-If you want to rejoin the whole namedtuple, regardless of whether a subdomain
-or suffix were found:
+To rejoin the original hostname, if it was indeed a valid, registered hostname:
 ```python
->>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
->>> # this has unwanted dots
->>> '.'.join(ext[:3])
-'.127.0.0.1.'
->>> # join each part only if it's truthy
->>> '.'.join(part for part in ext[:3] if part)
-'127.0.0.1'
+>>> ext = tldextract.extract('http://forums.bbc.co.uk')
+>>> ext.registered_domain
+'bbc.co.uk'
+>>> ext.fqdn
+'forums.bbc.co.uk'
 ```
 By default, this package supports the public ICANN TLDs and their exceptions.
@@ -303,7 +285,7 @@ Run all tests against a specific Python environment configuration:
 ```zsh
 tox -l
-tox -e py37
+tox -e py311
 ```
 ### Code Style

tldextract-5.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
+tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
+tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
+tldextract/_version.py,sha256=U7HnWMtKn0QTFHRJAzsVjr4cELMq3Toi6P5afKP6ah0,411
+tldextract/cache.py,sha256=kcSovX7j1V43s3gOuav8nlFl5Dgkl0O576H3_Tiqkc0,8323
+tldextract/cli.py,sha256=F5w9Haz7rWdrgIgRwZJn04t7qRBQAHUKzQnYXwDUfLs,2465
+tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tldextract/remote.py,sha256=dpLz-s-1AP4Ai4XPVQe-uT2Nmev8CZEMKURdqGw5XiA,2550
+tldextract/suffix_list.py,sha256=3N8jlmFY-EbQ-kxT2iTryFpCCeGqBUm9NiUXKiAbaOY,3443
+tldextract/tldextract.py,sha256=2AxRAWtT70jNSPcvBdk7FlrksUwbLDOXM9W8eaB13Bg,17585
+tldextract-5.0.0.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
+tldextract-5.0.0.dist-info/METADATA,sha256=pwH-aKifyF_J6gDZ-o6nPDHBSQtll7zuK1v_ceH4YCQ,10739
+tldextract-5.0.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+tldextract-5.0.0.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
+tldextract-5.0.0.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
+tldextract-5.0.0.dist-info/RECORD,,

tldextract-4.0.0.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-tldextract/.tld_set_snapshot,sha256=TVya0bCcmRKl_16oPKPIlNmWS09rXrjOKGgYjhvAGLE,238022
-tldextract/__init__.py,sha256=rZg3DKzS9CTARuF4Tuq50ViILwUktDED89Av8nStNuM,216
-tldextract/__main__.py,sha256=FxfCNOozXSaJP2GTjgWLAn03oNMd_EUUOWkfT1_YRgM,90
-tldextract/_version.py,sha256=TgVqVkMXXQVomuTpZfj8uxnyooVWsiw-3pM8cC2qwwE,411
-tldextract/cache.py,sha256=_hUjP-cw4BpR2TG-_XAD6YL1pQMNe64b4O0nVWMVLAY,8790
-tldextract/cli.py,sha256=5BMCp-DjY3_-KlZ1wnpycHrUHzaqLL4r4TiHl2-xiCU,2478
-tldextract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tldextract/remote.py,sha256=dpLz-s-1AP4Ai4XPVQe-uT2Nmev8CZEMKURdqGw5XiA,2550
-tldextract/suffix_list.py,sha256=W797R-PLIw_8yBPnopoMTimPiv1P1csh9Wcs-_E4Ous,3429
-tldextract/tldextract.py,sha256=kHtO_xVbSgOEsLU526gxHa5Mz5Pjxe6fP-xvJmzWosM,18272
-tldextract-4.0.0.dist-info/LICENSE,sha256=oqlDTqZaKpeJ6jYsQYqTkmV8gGGg-o7cO_OnH79KjsE,1522
-tldextract-4.0.0.dist-info/METADATA,sha256=MYr1oUzvOfV47wNqn276x-7NxY0gTfm-9S__ltSLbMI,11260
-tldextract-4.0.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-tldextract-4.0.0.dist-info/entry_points.txt,sha256=EStkXC80BetCMp1UDhU3kWuXBo3qDpgKltZTJ1x4x1U,51
-tldextract-4.0.0.dist-info/top_level.txt,sha256=DWZIjV49WP30tyC1KOEP7t-EaS4IRCXQzc0KXAOn_bk,11
-tldextract-4.0.0.dist-info/RECORD,,

{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tldextract-4.0.0.dist-info → tldextract-5.0.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

tldextract 4.0.0__py3-none-any.whl → 5.0.0__py3-none-any.whl

tldextract 4.0.0py3-none-any.whl → 5.0.0py3-none-any.whl