PyPI - ultimate-sitemap-parser - Versions diffs - 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

ultimate-sitemap-parser 1.2.0py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultimate-sitemap-parser might be problematic. Click here for more details.

Files changed (16) hide show

{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/METADATA +2 -3
ultimate_sitemap_parser-1.3.1.dist-info/RECORD +21 -0
usp/cli/_ls.py +19 -2
usp/cli/_util.py +57 -1
usp/cli/cli.py +8 -2
usp/fetch_parse.py +70 -17
usp/helpers.py +12 -1
usp/objects/sitemap.py +8 -5
usp/tree.py +6 -1
usp/web_client/abstract_client.py +30 -0
usp/web_client/requests_client.py +4 -1
ultimate_sitemap_parser-1.2.0.dist-info/RECORD +0 -21
{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/LICENSE +0 -0
{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/NOTICE +0 -0
{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/WHEEL +0 -0
{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/entry_points.txt +0 -0

{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: ultimate-sitemap-parser
-Version: 1.2.0
+Version: 1.3.1
 Summary: A performant library for parsing and crawling sitemaps
 License: GPL-3.0-or-later
 Keywords: sitemap,crawler,indexing,xml,rss,atom,google news
@@ -8,7 +8,7 @@ Author: Linas Valiukas
 Author-email: linas@media.mit.edu
 Maintainer: Freddy Heppell
 Maintainer-email: f.heppell@sheffield.ac.uk
-Requires-Python: >=3.8
+Requires-Python: >=3.9
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Information Technology
@@ -16,7 +16,6 @@ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (G
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11

ultimate_sitemap_parser-1.3.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,21 @@
+usp/__init__.py,sha256=_jshbOBBUHRZ5ko4SdI7GRFiF9xKGJVCEPgL9lZJ81o,124
+usp/cli/__init__.py,sha256=mGrjSftUYfM2SGp9yEN2dTJndl5thOdv77-EAe6ocWo,37
+usp/cli/_ls.py,sha256=V0pMsDiQK_9RZ5MyUS2toW8b6e2FJ4spb3Grw6PayAI,3419
+usp/cli/_util.py,sha256=OrT9en350tATnaUrUn0peXr7aFPyYaaHGbEXGY6O4wI,2015
+usp/cli/cli.py,sha256=2byuqhBUhb7c1qUpBfTTufG-jvtiEWWq97GvCgv-s44,777
+usp/exceptions.py,sha256=9KTgnocYYZCfyaCf9BrBN7Ok4cwn7_DlrNFbhUfFsGM,634
+usp/fetch_parse.py,sha256=69U1uAKawUym41N4nwJXLW9tQ0WXO4Pi63hnljYCXPM,43524
+usp/helpers.py,sha256=FeIZcEuEM3Uz8tHeNucgoB3_27Ax6qCatfalPIHHGUY,8862
+usp/objects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+usp/objects/page.py,sha256=vz2QXC9Z3E65Cxf68tKfQkubIc_OB0m6pNYH146Qx_8,14253
+usp/objects/sitemap.py,sha256=_t0ej2UmNsIb0NkxYkwYGxBqX_LHEJfNc-cRulQXyIk,11495
+usp/tree.py,sha256=MdnVxfIIMqWrudsYxFI8yQTXnlmNLFEcQEOkXbnuBr4,4395
+usp/web_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+usp/web_client/abstract_client.py,sha256=EWY4lPYJqpV7ge0DTZESTAOofAjNMIJnDm_2PPeZ9z4,7007
+usp/web_client/requests_client.py,sha256=sFYtJ8Q5z27WlTG1PgBzcvbS75pJ0pYUastEFmxa95U,5888
+ultimate_sitemap_parser-1.3.1.dist-info/LICENSE,sha256=ixuiBLtpoK3iv89l7ylKkg9rs2GzF9ukPH7ynZYzK5s,35148
+ultimate_sitemap_parser-1.3.1.dist-info/METADATA,sha256=GUU8qLo24ZGBtAd4CYaHxY927eFFGvKlVPhc6jfg5so,4397
+ultimate_sitemap_parser-1.3.1.dist-info/NOTICE,sha256=3ANZA5R9rYnCOnUoroGfFUOZ__ww_yG01NUAx0X6J7E,632
+ultimate_sitemap_parser-1.3.1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
+ultimate_sitemap_parser-1.3.1.dist-info/entry_points.txt,sha256=v60w5WzqYlPOucntZUy0ydzlYwuAPSwoQY0KdT5ragQ,36
+ultimate_sitemap_parser-1.3.1.dist-info/RECORD,,

usp/cli/_ls.py CHANGED Viewed

@@ -2,7 +2,7 @@ import argparse
 import sys
 from typing import Iterator
-from usp.cli._util import format_help, tabs
+from usp.cli._util import CountAction, format_help, setup_logging, tabs
 from usp.objects.sitemap import AbstractSitemap
 from usp.tree import sitemap_tree_for_homepage
@@ -26,7 +26,7 @@ def register(subparsers):
         choices=LS_FORMATS,
         default="tabtree",
         help=format_help(LS_FORMATS, "set output format"),
-        metavar="",
+        metavar="FORMAT",
     )
     ls_parser.add_argument(
         "-r",
@@ -46,6 +46,21 @@ def register(subparsers):
         action="store_true",
         help="strip the supplied URL from each page and sitemap URL",
     )
+    ls_parser.add_argument(
+        "-v",
+        "--verbose",
+        action=CountAction,
+        help="increase output verbosity (-v=INFO, -vv=DEBUG)",
+        dest="verbosity",
+        default=0,
+        max_count=2,
+    )
+    ls_parser.add_argument(
+        "-l",
+        "--log-file",
+        type=str,
+        help="write log to this file and suppress console output",
+    )
     ls_parser.set_defaults(no_robots=False, no_known=False, strip_url=False)
     ls_parser.set_defaults(func=ls)
@@ -85,6 +100,8 @@ def _output_pages(sitemap: AbstractSitemap, strip_prefix: str = ""):
 def ls(args):
+    setup_logging(args.verbosity, args.log_file)
     tree = sitemap_tree_for_homepage(
         args.url,
         use_robots=not args.no_robots,

usp/cli/_util.py CHANGED Viewed

@@ -1,4 +1,6 @@
-from typing import Dict
+import logging
+from argparse import Action
+from typing import Dict, Optional
 def format_help(choices: Dict[str, str], opt_help: str) -> str:
@@ -19,3 +21,57 @@ def format_help(choices: Dict[str, str], opt_help: str) -> str:
 def tabs(n: int):
     """Generate n tabs."""
     return "\t" * n
+_log_levels = {
+    0: logging.WARNING,
+    1: logging.INFO,
+    2: logging.DEBUG,
+}
+class CountAction(Action):
+    """Modified version of argparse._CountAction to output better help."""
+    def __init__(
+        self,
+        option_strings,
+        dest,
+        default=None,
+        required=False,
+        help=None,
+        max_count=None,
+    ):
+        super().__init__(
+            option_strings=option_strings,
+            dest=dest,
+            nargs=0,
+            default=default,
+            required=required,
+            help=help,
+        )
+        self.max_count = max_count
+    def __call__(self, parser, namespace, values, option_string=None):
+        count = getattr(namespace, self.dest, None)
+        if count is None:
+            count = 0
+        if self.max_count:
+            count = min(count, self.max_count)
+        setattr(namespace, self.dest, count + 1)
+    def format_usage(self):
+        option_str = self.option_strings[0]
+        if self.max_count is None:
+            return option_str
+        letter = self.option_strings[0][1]
+        usages = [f"-{letter * i}" for i in range(1, self.max_count + 1)]
+        return "/".join(usages)
+def setup_logging(verbosity: int, log_path: Optional[str]) -> None:
+    log_level = _log_levels.get(verbosity, logging.DEBUG)
+    if log_path is not None:
+        logging.basicConfig(level=log_level, filename=log_path)
+    else:
+        logging.basicConfig(level=log_level)

usp/cli/cli.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from argparse import ArgumentParser
+from typing import Optional
 from usp import __version__
 from usp.cli import _ls as ls_cmd
-def main():
+def parse_args(arg_list: Optional[list[str]]):
     parser = ArgumentParser(prog="usp", description="Ultimate Sitemap Parser")
     parser.add_argument(
         "-v", "--version", action="version", version=f"%(prog)s v{__version__}"
@@ -13,7 +14,12 @@ def main():
     subparsers = parser.add_subparsers(required=False, title="commands", metavar="")
     ls_cmd.register(subparsers)
-    args = parser.parse_args()
+    args = parser.parse_args(arg_list)
+    return args, parser
+def main(arg_list: Optional[list[str]] = None):
+    args, parser = parse_args(arg_list)
     if "func" in args:
         args.func(args)

usp/fetch_parse.py CHANGED Viewed

@@ -13,7 +13,7 @@ import re
 import xml.parsers.expat
 from collections import OrderedDict
 from decimal import Decimal, InvalidOperation
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Set
 from .exceptions import SitemapException, SitemapXMLParsingException
 from .helpers import (
@@ -43,8 +43,10 @@ from .objects.sitemap import (
 )
 from .web_client.abstract_client import (
     AbstractWebClient,
+    AbstractWebClientResponse,
     AbstractWebClientSuccessResponse,
     LocalWebClient,
+    LocalWebClientSuccessResponse,
     NoWebClientException,
     WebClientErrorResponse,
 )
@@ -64,12 +66,17 @@ class SitemapFetcher:
     Spec says it might be up to 50 MB but let's go for the full 100 MB here."""
     __MAX_RECURSION_LEVEL = 11
-    """Max. recursion level in iterating over sub-sitemaps."""
+    """Max. depth level in iterating over sub-sitemaps.
+    Recursive sitemaps (i.e. child sitemaps pointing to their parent) are stopped immediately.
+    """
     __slots__ = [
         "_url",
         "_recursion_level",
         "_web_client",
+        "_parent_urls",
+        "_quiet_404",
     ]
     def __init__(
@@ -77,14 +84,19 @@ class SitemapFetcher:
         url: str,
         recursion_level: int,
         web_client: Optional[AbstractWebClient] = None,
+        parent_urls: Optional[Set[str]] = None,
+        quiet_404: bool = False,
     ):
         """
         :param url: URL of the sitemap to fetch and parse.
         :param recursion_level: current recursion level of parser
         :param web_client: Web client to use. If ``None``, a :class:`~.RequestsWebClient` will be used.
+        :param parent_urls: Set of parent URLs that led to this sitemap.
+        :param quiet_404: Whether 404 errors are expected and should be logged at a reduced level, useful for speculative fetching of known URLs.
         :raises SitemapException: If the maximum recursion depth is exceeded.
+        :raises SitemapException: If the URL is in the parent URLs set.
         :raises SitemapException: If the URL is not an HTTP(S) URL
         """
         if recursion_level > self.__MAX_RECURSION_LEVEL:
@@ -92,9 +104,19 @@ class SitemapFetcher:
                 f"Recursion level exceeded {self.__MAX_RECURSION_LEVEL} for URL {url}."
             )
+        log.debug(f"Parent URLs is {parent_urls}")
         if not is_http_url(url):
             raise SitemapException(f"URL {url} is not a HTTP(s) URL.")
+        parent_urls = parent_urls or set()
+        if url in parent_urls:
+            # Likely a sitemap index points to itself/a higher level index
+            raise SitemapException(
+                f"Recursion detected in URL {url} with parent URLs {parent_urls}."
+            )
         if not web_client:
             web_client = RequestsWebClient()
@@ -103,19 +125,15 @@ class SitemapFetcher:
         self._url = url
         self._web_client = web_client
         self._recursion_level = recursion_level
+        self._parent_urls = parent_urls or set()
+        self._quiet_404 = quiet_404
-    def _fetch(self) -> Union[str, WebClientErrorResponse]:
+    def _fetch(self) -> AbstractWebClientResponse:
         log.info(f"Fetching level {self._recursion_level} sitemap from {self._url}...")
         response = get_url_retry_on_client_errors(
-            url=self._url, web_client=self._web_client
+            url=self._url, web_client=self._web_client, quiet_404=self._quiet_404
         )
-        if isinstance(response, WebClientErrorResponse):
-            return response
-        assert isinstance(response, AbstractWebClientSuccessResponse)
-        return ungzipped_response_content(url=self._url, response=response)
+        return response
     def sitemap(self) -> AbstractSitemap:
         """
@@ -124,13 +142,27 @@ class SitemapFetcher:
         :return: the parsed sitemap. Will be a child of :class:`~.AbstractSitemap`.
             If an HTTP error is encountered, or the sitemap cannot be parsed, will be :class:`~.InvalidSitemap`.
         """
-        response_content = self._fetch()
+        response = self._fetch()
-        if isinstance(response_content, WebClientErrorResponse):
+        if isinstance(response, WebClientErrorResponse):
             return InvalidSitemap(
                 url=self._url,
-                reason=f"Unable to fetch sitemap from {self._url}: {response_content.message()}",
+                reason=f"Unable to fetch sitemap from {self._url}: {response.message()}",
             )
+        assert isinstance(response, AbstractWebClientSuccessResponse)
+        response_url = response.url()
+        log.debug(f"Response URL is {response_url}")
+        if response_url in self._parent_urls:
+            # Likely a sitemap has redirected to a parent URL
+            return InvalidSitemap(
+                url=self._url,
+                reason=f"Recursion detected when {self._url} redirected to {response_url} with parent URLs {self._parent_urls}.",
+            )
+        self._url = response_url
+        response_content = ungzipped_response_content(url=self._url, response=response)
         # MIME types returned in Content-Type are unpredictable, so peek into the content instead
         if response_content[:20].strip().startswith("<"):
@@ -140,6 +172,7 @@ class SitemapFetcher:
                 content=response_content,
                 recursion_level=self._recursion_level,
                 web_client=self._web_client,
+                parent_urls=self._parent_urls,
             )
         else:
@@ -150,6 +183,7 @@ class SitemapFetcher:
                     content=response_content,
                     recursion_level=self._recursion_level,
                     web_client=self._web_client,
+                    parent_urls=self._parent_urls,
                 )
             else:
                 parser = PlainTextSitemapParser(
@@ -157,6 +191,7 @@ class SitemapFetcher:
                     content=response_content,
                     recursion_level=self._recursion_level,
                     web_client=self._web_client,
+                    parent_urls=self._parent_urls,
                 )
         log.info(f"Parsing sitemap from URL {self._url}...")
@@ -186,8 +221,8 @@ class SitemapStrParser(SitemapFetcher):
         )
         self._static_content = static_content
-    def _fetch(self) -> Union[str, WebClientErrorResponse]:
-        return self._static_content
+    def _fetch(self) -> AbstractWebClientResponse:
+        return LocalWebClientSuccessResponse(url=self._url, data=self._static_content)
 class AbstractSitemapParser(metaclass=abc.ABCMeta):
@@ -198,6 +233,7 @@ class AbstractSitemapParser(metaclass=abc.ABCMeta):
         "_content",
         "_web_client",
         "_recursion_level",
+        "_parent_urls",
     ]
     def __init__(
@@ -206,11 +242,13 @@ class AbstractSitemapParser(metaclass=abc.ABCMeta):
         content: str,
         recursion_level: int,
         web_client: AbstractWebClient,
+        parent_urls: Set[str],
     ):
         self._url = url
         self._content = content
         self._recursion_level = recursion_level
         self._web_client = web_client
+        self._parent_urls = parent_urls
     @abc.abstractmethod
     def sitemap(self) -> AbstractSitemap:
@@ -231,12 +269,14 @@ class IndexRobotsTxtSitemapParser(AbstractSitemapParser):
         content: str,
         recursion_level: int,
         web_client: AbstractWebClient,
+        parent_urls: Set[str],
     ):
         super().__init__(
             url=url,
             content=content,
             recursion_level=recursion_level,
             web_client=web_client,
+            parent_urls=parent_urls,
         )
         if not self._url.endswith("/robots.txt"):
@@ -271,6 +311,7 @@ class IndexRobotsTxtSitemapParser(AbstractSitemapParser):
                     url=sitemap_url,
                     recursion_level=self._recursion_level + 1,
                     web_client=self._web_client,
+                    parent_urls=self._parent_urls | {self._url},
                 )
                 fetched_sitemap = fetcher.sitemap()
             except NoWebClientException:
@@ -333,12 +374,14 @@ class XMLSitemapParser(AbstractSitemapParser):
         content: str,
         recursion_level: int,
         web_client: AbstractWebClient,
+        parent_urls: Set[str],
     ):
         super().__init__(
             url=url,
             content=content,
             recursion_level=recursion_level,
             web_client=web_client,
+            parent_urls=parent_urls,
         )
         # Will be initialized when the type of sitemap is known
@@ -432,6 +475,7 @@ class XMLSitemapParser(AbstractSitemapParser):
                     url=self._url,
                     web_client=self._web_client,
                     recursion_level=self._recursion_level,
+                    parent_urls=self._parent_urls,
                 )
             elif name == "rss":
@@ -545,14 +589,22 @@ class IndexXMLSitemapParser(AbstractXMLSitemapParser):
         "_recursion_level",
         # List of sub-sitemap URLs found in this index sitemap
         "_sub_sitemap_urls",
+        "_parent_urls",
     ]
-    def __init__(self, url: str, web_client: AbstractWebClient, recursion_level: int):
+    def __init__(
+        self,
+        url: str,
+        web_client: AbstractWebClient,
+        recursion_level: int,
+        parent_urls: Set[str],
+    ):
         super().__init__(url=url)
         self._web_client = web_client
         self._recursion_level = recursion_level
         self._sub_sitemap_urls = []
+        self._parent_urls = parent_urls
     def xml_element_end(self, name: str) -> None:
         if name == "sitemap:loc":
@@ -578,6 +630,7 @@ class IndexXMLSitemapParser(AbstractXMLSitemapParser):
                     url=sub_sitemap_url,
                     recursion_level=self._recursion_level + 1,
                     web_client=self._web_client,
+                    parent_urls=self._parent_urls | {self._url},
                 )
                 fetched_sitemap = fetcher.sitemap()
             except NoWebClientException:

usp/helpers.py CHANGED Viewed

@@ -7,6 +7,7 @@ import logging
 import re
 import sys
 import time
+from http import HTTPStatus
 from typing import Optional
 from urllib.parse import unquote_plus, urlparse, urlunparse
@@ -130,11 +131,15 @@ def parse_rfc2822_date(date_string: str) -> Optional[datetime.datetime]:
         return None
+_404_log_message = f"{HTTPStatus.NOT_FOUND} {HTTPStatus.NOT_FOUND.phrase}"
 def get_url_retry_on_client_errors(
     url: str,
     web_client: AbstractWebClient,
     retry_count: int = 5,
     sleep_between_retries: int = 1,
+    quiet_404: bool = False,
 ) -> AbstractWebClientResponse:
     """
     Fetch URL, retry on retryable errors.
@@ -143,6 +148,8 @@ def get_url_retry_on_client_errors(
     :param web_client: Web client object to use for fetching.
     :param retry_count: How many times to retry fetching the same URL.
     :param sleep_between_retries: How long to sleep between retries, in seconds.
+    :param quiet_404: Whether to log 404 errors at a lower level.
     :return: Web client response object.
     """
     assert retry_count > 0, "Retry count must be positive."
@@ -153,7 +160,11 @@ def get_url_retry_on_client_errors(
         response = web_client.get(url)
         if isinstance(response, WebClientErrorResponse):
-            log.warning(f"Request for URL {url} failed: {response.message()}")
+            if quiet_404 and response.message() == _404_log_message:
+                log_level = logging.INFO
+            else:
+                log_level = logging.WARNING
+            log.log(log_level, f"Request for URL {url} failed: {response.message()}")
             if response.retryable():
                 log.info(f"Retrying URL {url} in {sleep_between_retries} seconds...")

usp/objects/sitemap.py CHANGED Viewed

@@ -9,17 +9,19 @@
 """
 import abc
+import logging
 import os
 import pickle
 import tempfile
-from functools import lru_cache
+from functools import cache
 from typing import Iterator, List, Tuple
 from .page import SitemapPage
+log = logging.getLogger(__name__)
-# TODO: change to functools.cache when dropping py3.8
-@lru_cache(maxsize=None)
+@cache
 def _all_slots(target_cls):
     mro = target_cls.__mro__
@@ -153,6 +155,7 @@ class InvalidSitemap(AbstractSitemap):
         """
         super().__init__(url=url)
         self.__reason = reason
+        log.info(f"Invalid sitemap: {url}, reason: {reason}")
     def __eq__(self, other) -> bool:
         if not isinstance(other, InvalidSitemap):
@@ -222,8 +225,8 @@ class AbstractPagesSitemap(AbstractSitemap, metaclass=abc.ABCMeta):
         self._dump_pages(pages)
     def _dump_pages(self, pages: List[SitemapPage]):
-        temp_file, self.__pages_temp_file_path = tempfile.mkstemp()
-        with open(self.__pages_temp_file_path, "wb") as tmp:
+        fd, self.__pages_temp_file_path = tempfile.mkstemp()
+        with os.fdopen(fd, "wb") as tmp:
             pickle.dump(pages, tmp, protocol=pickle.HIGHEST_PROTOCOL)
     def __del__(self):

usp/tree.py CHANGED Viewed

@@ -75,7 +75,10 @@ def sitemap_tree_for_homepage(
     sitemap_urls_found_in_robots_txt = set()
     if use_robots:
         robots_txt_fetcher = SitemapFetcher(
-            url=robots_txt_url, web_client=web_client, recursion_level=0
+            url=robots_txt_url,
+            web_client=web_client,
+            recursion_level=0,
+            parent_urls=set(),
         )
         robots_txt_sitemap = robots_txt_fetcher.sitemap()
         if not isinstance(robots_txt_sitemap, InvalidSitemap):
@@ -95,6 +98,8 @@ def sitemap_tree_for_homepage(
                     url=unpublished_sitemap_url,
                     web_client=web_client,
                     recursion_level=0,
+                    parent_urls=sitemap_urls_found_in_robots_txt,
+                    quiet_404=True,
                 )
                 unpublished_sitemap = unpublished_sitemap_fetcher.sitemap()

usp/web_client/abstract_client.py CHANGED Viewed

@@ -101,6 +101,15 @@ class AbstractWebClientSuccessResponse(
         """
         raise NotImplementedError("Abstract method.")
+    @abc.abstractmethod
+    def url(self) -> str:
+        """
+        Return the actual URL fetched, after any redirects.
+        :return: URL fetched.
+        """
+        raise NotImplementedError("Abstract method.")
 class WebClientErrorResponse(AbstractWebClientResponse, metaclass=abc.ABCMeta):
     """
@@ -191,6 +200,27 @@ class LocalWebClient(AbstractWebClient):
         raise NoWebClientException
+class LocalWebClientSuccessResponse(AbstractWebClientSuccessResponse):
+    def __init__(self, url: str, data: str):
+        self._url = url
+        self._data = data
+    def status_code(self) -> int:
+        return 200
+    def status_message(self) -> str:
+        return "OK"
+    def header(self, case_insensitive_name: str) -> Optional[str]:
+        return None
+    def raw_data(self) -> bytes:
+        return self._data.encode("utf-8")
+    def url(self) -> str:
+        return self._url
 class RequestWaiter:
     """
     Manages waiting between requests.

usp/web_client/requests_client.py CHANGED Viewed

@@ -62,6 +62,9 @@ class RequestsWebClientSuccessResponse(AbstractWebClientSuccessResponse):
         return data
+    def url(self) -> str:
+        return self.__requests_response.url
 class RequestsWebClientErrorResponse(WebClientErrorResponse):
     """
@@ -162,7 +165,7 @@ class RequestsWebClient(AbstractWebClient):
                 )
             else:
                 message = f"{response.status_code} {response.reason}"
-                log.info(f"Response content: {response.text}")
+                log.debug(f"Response content: {response.text}")
                 if response.status_code in RETRYABLE_HTTP_STATUS_CODES:
                     return RequestsWebClientErrorResponse(

ultimate_sitemap_parser-1.2.0.dist-info/RECORD DELETED Viewed

@@ -1,21 +0,0 @@
-usp/__init__.py,sha256=_jshbOBBUHRZ5ko4SdI7GRFiF9xKGJVCEPgL9lZJ81o,124
-usp/cli/__init__.py,sha256=mGrjSftUYfM2SGp9yEN2dTJndl5thOdv77-EAe6ocWo,37
-usp/cli/_ls.py,sha256=BjF5bGuhe_E_Ak-yyY0cDM83LFstl5tA3XNIrGZJujs,2954
-usp/cli/_util.py,sha256=UL5WiRZlpiDOI_QvSU1PdjcS6iCmfcLQlO1Mm1wjSAw,505
-usp/cli/cli.py,sha256=ySNyYHoCQ440KfxmpTkzLXgqtbnt5ru-TgPs2Zw2-LI,592
-usp/exceptions.py,sha256=9KTgnocYYZCfyaCf9BrBN7Ok4cwn7_DlrNFbhUfFsGM,634
-usp/fetch_parse.py,sha256=VJrJSAG1X8oQyW2p9wSepuGWfHlMDNoJG8jn3an2XUY,41396
-usp/helpers.py,sha256=S9d8fEhHzZqVCx3SkcWVTgW1JYKujH-tM86urjORNWA,8482
-usp/objects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-usp/objects/page.py,sha256=vz2QXC9Z3E65Cxf68tKfQkubIc_OB0m6pNYH146Qx_8,14253
-usp/objects/sitemap.py,sha256=yt5qe6fyKfmvJmV60mB8kc7yooGcpYhuIcNlmUqFGFA,11486
-usp/tree.py,sha256=pwSTp1Zok4evzrNFavP-hh5i9xGGzObj_sKUqjk72UU,4237
-usp/web_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-usp/web_client/abstract_client.py,sha256=7MpIfqQpi1_yojEmuReT8iy9kFUWCD3i2LMpHmBOwV0,6291
-usp/web_client/requests_client.py,sha256=1nyXXBxiapDNN5jNpCAXRL5rgjptK4oKvaJhV5nhLsA,5816
-ultimate_sitemap_parser-1.2.0.dist-info/LICENSE,sha256=ixuiBLtpoK3iv89l7ylKkg9rs2GzF9ukPH7ynZYzK5s,35148
-ultimate_sitemap_parser-1.2.0.dist-info/METADATA,sha256=46wVZspA5eUgbXefu2Fu7xtE03TbFsgjEwLL5BT-mj0,4447
-ultimate_sitemap_parser-1.2.0.dist-info/NOTICE,sha256=3ANZA5R9rYnCOnUoroGfFUOZ__ww_yG01NUAx0X6J7E,632
-ultimate_sitemap_parser-1.2.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
-ultimate_sitemap_parser-1.2.0.dist-info/entry_points.txt,sha256=v60w5WzqYlPOucntZUy0ydzlYwuAPSwoQY0KdT5ragQ,36
-ultimate_sitemap_parser-1.2.0.dist-info/RECORD,,

{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/NOTICE RENAMED Viewed

File without changes

{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{ultimate_sitemap_parser-1.2.0.dist-info → ultimate_sitemap_parser-1.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

ultimate-sitemap-parser 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

Potentially problematic release.

ultimate-sitemap-parser 1.2.0py3-none-any.whl → 1.3.1py3-none-any.whl