ultimate-sitemap-parser 1.0.0rc1__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultimate-sitemap-parser might be problematic. Click here for more details.

Files changed (21) hide show
  1. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/PKG-INFO +6 -5
  2. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/pyproject.toml +42 -22
  3. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/cli/_ls.py +1 -1
  4. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/fetch_parse.py +34 -14
  5. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/helpers.py +8 -7
  6. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/objects/page.py +29 -3
  7. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/objects/sitemap.py +4 -9
  8. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/tree.py +5 -4
  9. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/web_client/abstract_client.py +35 -0
  10. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/web_client/requests_client.py +23 -5
  11. ultimate_sitemap_parser-1.0.0rc1/usp/log.py +0 -77
  12. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/LICENSE +0 -0
  13. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/NOTICE +0 -0
  14. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/README.rst +0 -0
  15. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/__init__.py +0 -0
  16. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/cli/__init__.py +0 -0
  17. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/cli/_util.py +0 -0
  18. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/cli/cli.py +1 -1
  19. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/exceptions.py +0 -0
  20. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/objects/__init__.py +0 -0
  21. {ultimate_sitemap_parser-1.0.0rc1 → ultimate_sitemap_parser-1.1.1}/usp/web_client/__init__.py +0 -0
@@ -1,15 +1,14 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: ultimate-sitemap-parser
3
- Version: 1.0.0rc1
3
+ Version: 1.1.1
4
4
  Summary: A performant library for parsing and crawling sitemaps
5
- Home-page: https://ultimate-sitemap-parser.readthedocs.io/
6
5
  License: GPL-3.0-or-later
7
6
  Keywords: sitemap,crawler,indexing,xml,rss,atom,google news
8
7
  Author: Linas Valiukas
9
8
  Author-email: linas@media.mit.edu
10
9
  Maintainer: Freddy Heppell
11
10
  Maintainer-email: f.heppell@sheffield.ac.uk
12
- Requires-Python: >=3.8,<4.0
11
+ Requires-Python: >=3.8
13
12
  Classifier: Development Status :: 5 - Production/Stable
14
13
  Classifier: Intended Audience :: Developers
15
14
  Classifier: Intended Audience :: Information Technology
@@ -22,12 +21,14 @@ Classifier: Programming Language :: Python :: 3.9
22
21
  Classifier: Programming Language :: Python :: 3.10
23
22
  Classifier: Programming Language :: Python :: 3.11
24
23
  Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
25
  Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
26
26
  Classifier: Topic :: Text Processing :: Indexing
27
27
  Classifier: Topic :: Text Processing :: Markup :: XML
28
28
  Requires-Dist: python-dateutil (>=2.7,<3.0.0)
29
- Requires-Dist: requests (>=2.2.1)
29
+ Requires-Dist: requests (>=2.2.1,<3.0.0)
30
30
  Project-URL: Documentation, https://ultimate-sitemap-parser.readthedocs.io/
31
+ Project-URL: Homepage, https://ultimate-sitemap-parser.readthedocs.io/
31
32
  Project-URL: Repository, https://github.com/GateNLP/ultimate-sitemap-parser
32
33
  Description-Content-Type: text/x-rst
33
34
 
@@ -1,20 +1,36 @@
1
- [tool.poetry]
1
+ [project]
2
2
  name = "ultimate-sitemap-parser"
3
- version = "1.0.0rc1"
3
+ version = "1.1.1"
4
4
  description = "A performant library for parsing and crawling sitemaps"
5
5
  authors = [
6
- "Linas Valiukas <linas@media.mit.edu>",
7
- "Hal Roberts <hroberts@cyber.law.harvard.edu>",
8
- "Freddy Heppell <f.heppell@sheffield.ac.uk>"
6
+ { name = "Linas Valiukas", email = "linas@media.mit.edu"},
7
+ { name = "Hal Roberts", email = "hroberts@cyber.law.harvard.edu"},
8
+ { name = "Freddy Heppell", email = "f.heppell@sheffield.ac.uk"},
9
9
  ]
10
10
  maintainers = [
11
- "Freddy Heppell <f.heppell@sheffield.ac.uk>"
11
+ { name = "Freddy Heppell", email = "f.heppell@sheffield.ac.uk"},
12
12
  ]
13
+ license = "GPL-3.0-or-later"
14
+ readme = "README.rst"
15
+ keywords = ["sitemap", "crawler", "indexing", "xml", "rss", "atom", "google news"]
16
+ dynamic = ["classifiers"]
17
+
18
+ requires-python = ">=3.8"
19
+ dependencies = [
20
+ "python-dateutil (>=2.7,<3.0.0)",
21
+ "requests (>=2.2.1,<3.0.0)"
22
+ ]
23
+
24
+ [project.urls]
13
25
  homepage = "https://ultimate-sitemap-parser.readthedocs.io/"
14
26
  documentation = "https://ultimate-sitemap-parser.readthedocs.io/"
15
27
  repository = "https://github.com/GateNLP/ultimate-sitemap-parser"
16
- license = "GPL-3.0-or-later"
17
- readme = "README.rst"
28
+
29
+ [project.scripts]
30
+ usp = 'usp.cli:main'
31
+
32
+ [tool.poetry]
33
+ requires-poetry = ">=2.0"
18
34
  classifiers=[
19
35
  'Development Status :: 5 - Production/Stable',
20
36
  'Intended Audience :: Developers',
@@ -26,24 +42,20 @@ classifiers=[
26
42
  'Topic :: Text Processing :: Indexing',
27
43
  'Topic :: Text Processing :: Markup :: XML',
28
44
  ]
29
- keywords = ["sitemap", "crawler", "indexing", "xml", "rss", "atom", "google news"]
30
45
  packages = [
31
46
  { include = "usp" }
32
47
  ]
33
48
 
34
- [tool.poetry.scripts]
35
- usp = 'usp.cli:main'
36
-
37
49
  [tool.poetry.dependencies]
38
- python = "^3.8"
39
- python-dateutil = ">=2.7,<3.0.0"
40
- requests = ">=2.2.1"
50
+ # Specify upper bound for locking
51
+ python = ">=3.8,<4.0"
41
52
 
42
53
  [tool.poetry.group.dev.dependencies]
43
54
  requests-mock = ">=1.6.0,<2.0"
44
55
  pytest = "^8.3.0"
45
- ruff = "^0.6.1"
56
+ ruff = "^0.9.3"
46
57
  vcrpy = "6.0.1"
58
+ pytest-mock = "^3.14.0"
47
59
 
48
60
  [tool.poetry.group.perf]
49
61
  optional = true
@@ -71,12 +83,20 @@ extend-exclude = ["docs/*"]
71
83
 
72
84
  [tool.ruff.lint]
73
85
  select = [
74
- "E4",
75
- "E7",
76
- "E9",
77
- "F",
78
- "UP",
79
- "PT"
86
+ "E4", # pycodestyle Import
87
+ "E7", # pycodestyle Statement
88
+ "E9", # pycodestyle Runtime
89
+ "F", # pyflakes
90
+ "UP", # pyupgrde
91
+ "PT", # flake8-pytest-style
92
+ "I", # isort
93
+ "T20", # flake8-print
94
+ "LOG", # flake8-logging
95
+ ]
96
+
97
+ [tool.ruff.lint.per-file-ignores]
98
+ "**/tests/*" = [
99
+ "T20", # Allow print in tests
80
100
  ]
81
101
 
82
102
  [tool.pytest.ini_options]
@@ -2,7 +2,7 @@ import argparse
2
2
  import sys
3
3
  from typing import Iterator
4
4
 
5
- from usp.cli._util import tabs, format_help
5
+ from usp.cli._util import format_help, tabs
6
6
  from usp.objects.sitemap import AbstractSitemap
7
7
  from usp.tree import sitemap_tree_for_homepage
8
8
 
@@ -8,49 +8,49 @@
8
8
  """
9
9
 
10
10
  import abc
11
+ import logging
11
12
  import re
12
13
  import xml.parsers.expat
13
14
  from collections import OrderedDict
14
15
  from decimal import Decimal, InvalidOperation
15
- from typing import Optional, Dict, Union
16
-
16
+ from typing import Dict, Optional, Union
17
17
 
18
18
  from .exceptions import SitemapException, SitemapXMLParsingException
19
19
  from .helpers import (
20
- html_unescape_strip,
21
- parse_iso8601_date,
22
20
  get_url_retry_on_client_errors,
23
- ungzipped_response_content,
21
+ html_unescape_strip,
24
22
  is_http_url,
23
+ parse_iso8601_date,
25
24
  parse_rfc2822_date,
25
+ ungzipped_response_content,
26
26
  )
27
- from .log import create_logger
28
27
  from .objects.page import (
28
+ SITEMAP_PAGE_DEFAULT_PRIORITY,
29
29
  SitemapImage,
30
- SitemapPage,
31
30
  SitemapNewsStory,
31
+ SitemapPage,
32
32
  SitemapPageChangeFrequency,
33
- SITEMAP_PAGE_DEFAULT_PRIORITY,
34
33
  )
35
34
  from .objects.sitemap import (
36
35
  AbstractSitemap,
37
- InvalidSitemap,
38
36
  IndexRobotsTxtSitemap,
39
37
  IndexXMLSitemap,
40
- PagesXMLSitemap,
41
- PagesTextSitemap,
42
- PagesRSSSitemap,
38
+ InvalidSitemap,
43
39
  PagesAtomSitemap,
40
+ PagesRSSSitemap,
41
+ PagesTextSitemap,
42
+ PagesXMLSitemap,
44
43
  )
45
44
  from .web_client.abstract_client import (
46
45
  AbstractWebClient,
47
46
  AbstractWebClientSuccessResponse,
47
+ LocalWebClient,
48
+ NoWebClientException,
48
49
  WebClientErrorResponse,
49
50
  )
50
- from .web_client.abstract_client import LocalWebClient, NoWebClientException
51
51
  from .web_client.requests_client import RequestsWebClient
52
52
 
53
- log = create_logger(__name__)
53
+ log = logging.getLogger(__name__)
54
54
 
55
55
 
56
56
  class SitemapFetcher:
@@ -643,6 +643,7 @@ class PagesXMLSitemapParser(AbstractXMLSitemapParser):
643
643
  "news_keywords",
644
644
  "news_stock_tickers",
645
645
  "images",
646
+ "alternates",
646
647
  ]
647
648
 
648
649
  def __init__(self):
@@ -659,6 +660,7 @@ class PagesXMLSitemapParser(AbstractXMLSitemapParser):
659
660
  self.news_keywords = None
660
661
  self.news_stock_tickers = None
661
662
  self.images = []
663
+ self.alternates = []
662
664
 
663
665
  def __hash__(self):
664
666
  return hash(
@@ -763,6 +765,10 @@ class PagesXMLSitemapParser(AbstractXMLSitemapParser):
763
765
  for image in self.images
764
766
  ]
765
767
 
768
+ alternates = None
769
+ if len(self.alternates) > 0:
770
+ alternates = self.alternates
771
+
766
772
  return SitemapPage(
767
773
  url=url,
768
774
  last_modified=last_modified,
@@ -770,6 +776,7 @@ class PagesXMLSitemapParser(AbstractXMLSitemapParser):
770
776
  priority=priority,
771
777
  news_story=sitemap_news_story,
772
778
  images=sitemap_images,
779
+ alternates=alternates,
773
780
  )
774
781
 
775
782
  __slots__ = ["_current_page", "_pages", "_page_urls", "_current_image"]
@@ -801,6 +808,19 @@ class PagesXMLSitemapParser(AbstractXMLSitemapParser):
801
808
  "Page is expected to be set before <image:image>."
802
809
  )
803
810
  self._current_image = self.Image()
811
+ elif name == "link":
812
+ if not self._current_page:
813
+ raise SitemapXMLParsingException(
814
+ "Page is expected to be set before <link>."
815
+ )
816
+ if "rel" not in attrs or attrs["rel"] != "alternate":
817
+ log.warning(f"<link> element is missing rel attribute: {attrs}.")
818
+ elif "hreflang" not in attrs or "href" not in attrs:
819
+ log.warning(
820
+ f"<link> element is missing hreflang or href attributes: {attrs}."
821
+ )
822
+ else:
823
+ self._current_page.alternates.append((attrs["hreflang"], attrs["href"]))
804
824
 
805
825
  def __require_last_char_data_to_be_set(self, name: str) -> None:
806
826
  if not self._last_char_data:
@@ -3,24 +3,25 @@
3
3
  import datetime
4
4
  import gzip as gzip_lib
5
5
  import html
6
+ import logging
6
7
  import re
7
8
  import sys
8
9
  import time
9
10
  from typing import Optional
10
- from urllib.parse import urlparse, unquote_plus, urlunparse
11
- from dateutil.parser import parse as dateutil_parse
11
+ from urllib.parse import unquote_plus, urlparse, urlunparse
12
+
12
13
  from dateutil.parser import isoparse as dateutil_isoparse
14
+ from dateutil.parser import parse as dateutil_parse
13
15
 
14
- from .exceptions import SitemapException, GunzipException, StripURLToHomepageException
15
- from .log import create_logger
16
+ from .exceptions import GunzipException, SitemapException, StripURLToHomepageException
16
17
  from .web_client.abstract_client import (
17
18
  AbstractWebClient,
19
+ AbstractWebClientResponse,
18
20
  AbstractWebClientSuccessResponse,
19
21
  WebClientErrorResponse,
20
- AbstractWebClientResponse,
21
22
  )
22
23
 
23
- log = create_logger(__name__)
24
+ log = logging.getLogger(__name__)
24
25
 
25
26
  __URL_REGEX = re.compile(r"^https?://[^\s/$.?#].[^\s]*$", re.IGNORECASE)
26
27
  """Regular expression to match HTTP(s) URLs."""
@@ -247,7 +248,7 @@ def ungzipped_response_content(
247
248
  data = gunzip(data)
248
249
  except GunzipException as ex:
249
250
  # In case of an error, just assume that it's one of the non-gzipped sitemaps with ".gz" extension
250
- log.error(
251
+ log.warning(
251
252
  f"Unable to gunzip response {response}, maybe it's a non-gzipped sitemap: {ex}"
252
253
  )
253
254
 
@@ -3,7 +3,7 @@
3
3
  import datetime
4
4
  from decimal import Decimal
5
5
  from enum import Enum, unique
6
- from typing import List, Optional
6
+ from typing import List, Optional, Tuple
7
7
 
8
8
  SITEMAP_PAGE_DEFAULT_PRIORITY = Decimal("0.5")
9
9
  """Default sitemap page priority, as per the spec."""
@@ -331,6 +331,7 @@ class SitemapPage:
331
331
  "__change_frequency",
332
332
  "__news_story",
333
333
  "__images",
334
+ "__alternates",
334
335
  ]
335
336
 
336
337
  def __init__(
@@ -341,6 +342,7 @@ class SitemapPage:
341
342
  change_frequency: Optional[SitemapPageChangeFrequency] = None,
342
343
  news_story: Optional[SitemapNewsStory] = None,
343
344
  images: Optional[List[SitemapImage]] = None,
345
+ alternates: Optional[List[Tuple[str, str]]] = None,
344
346
  ):
345
347
  """
346
348
  Initialize a new sitemap-derived page.
@@ -357,6 +359,7 @@ class SitemapPage:
357
359
  self.__change_frequency = change_frequency
358
360
  self.__news_story = news_story
359
361
  self.__images = images
362
+ self.__alternates = alternates
360
363
 
361
364
  def __eq__(self, other) -> bool:
362
365
  if not isinstance(other, SitemapPage):
@@ -380,6 +383,9 @@ class SitemapPage:
380
383
  if self.images != other.images:
381
384
  return False
382
385
 
386
+ if self.alternates != other.alternates:
387
+ return False
388
+
383
389
  return True
384
390
 
385
391
  def __hash__(self):
@@ -442,10 +448,30 @@ class SitemapPage:
442
448
 
443
449
  @property
444
450
  def news_story(self) -> Optional[SitemapNewsStory]:
445
- """Get the Google News story attached to the URL."""
451
+ """Get the Google News story attached to the URL.
452
+
453
+ See :ref:`google-news-ext` reference
454
+ """
446
455
  return self.__news_story
447
456
 
448
457
  @property
449
458
  def images(self) -> Optional[List[SitemapImage]]:
450
- """Get the images attached to the URL."""
459
+ """Get the images attached to the URL.
460
+
461
+ See :ref:`google-image-ext` reference
462
+ """
451
463
  return self.__images
464
+
465
+ @property
466
+ def alternates(self) -> Optional[List[Tuple[str, str]]]:
467
+ """Get the alternate URLs for the URL.
468
+
469
+ A tuple of (language code, URL) for each ``<xhtml:link>`` element with ``rel="alternate"`` attribute.
470
+
471
+ See :ref:`sitemap-extra-localisation` reference
472
+
473
+ Example::
474
+
475
+ [('fr', 'https://www.example.com/fr/page'), ('de', 'https://www.example.com/de/page')]
476
+ """
477
+ return self.__alternates
@@ -9,11 +9,11 @@
9
9
  """
10
10
 
11
11
  import abc
12
- from functools import lru_cache
13
12
  import os
14
13
  import pickle
15
14
  import tempfile
16
- from typing import List, Iterator, Tuple
15
+ from functools import lru_cache
16
+ from typing import Iterator, List, Tuple
17
17
 
18
18
  from .page import SitemapPage
19
19
 
@@ -72,7 +72,7 @@ class AbstractSitemap(metaclass=abc.ABCMeta):
72
72
  return hash((self.url,))
73
73
 
74
74
  def __repr__(self):
75
- return f"{self.__class__.__name__}(" f"url={self.url}" ")"
75
+ return f"{self.__class__.__name__}(url={self.url})"
76
76
 
77
77
  @property
78
78
  def url(self) -> str:
@@ -167,12 +167,7 @@ class InvalidSitemap(AbstractSitemap):
167
167
  return True
168
168
 
169
169
  def __repr__(self):
170
- return (
171
- f"{self.__class__.__name__}("
172
- f"url={self.url}, "
173
- f"reason={self.reason}"
174
- ")"
175
- )
170
+ return f"{self.__class__.__name__}(url={self.url}, reason={self.reason})"
176
171
 
177
172
  def to_dict(self, with_pages=True) -> dict:
178
173
  return {
@@ -1,19 +1,20 @@
1
1
  """Helpers to generate a sitemap tree."""
2
2
 
3
+ import logging
3
4
  from typing import Optional
5
+
4
6
  from .exceptions import SitemapException
5
7
  from .fetch_parse import SitemapFetcher, SitemapStrParser
6
8
  from .helpers import is_http_url, strip_url_to_homepage
7
- from .log import create_logger
8
9
  from .objects.sitemap import (
9
10
  AbstractSitemap,
10
- InvalidSitemap,
11
- IndexWebsiteSitemap,
12
11
  IndexRobotsTxtSitemap,
12
+ IndexWebsiteSitemap,
13
+ InvalidSitemap,
13
14
  )
14
15
  from .web_client.abstract_client import AbstractWebClient
15
16
 
16
- log = create_logger(__name__)
17
+ log = logging.getLogger(__name__)
17
18
 
18
19
  _UNPUBLISHED_SITEMAP_PATHS = {
19
20
  "sitemap.xml",
@@ -1,6 +1,8 @@
1
1
  """Abstract web client class."""
2
2
 
3
3
  import abc
4
+ import random
5
+ import time
4
6
  from http import HTTPStatus
5
7
  from typing import Optional
6
8
 
@@ -187,3 +189,36 @@ class LocalWebClient(AbstractWebClient):
187
189
 
188
190
  def get(self, url: str) -> AbstractWebClientResponse:
189
191
  raise NoWebClientException
192
+
193
+
194
+ class RequestWaiter:
195
+ """
196
+ Manages waiting between requests.
197
+ """
198
+
199
+ def __init__(self, wait: Optional[float] = None, random_wait: bool = True):
200
+ """
201
+ :param wait: time to wait between requests, in seconds.
202
+ :param random_wait: if true, wait time is multiplied by a random number between 0.5 and 1.5.
203
+ """
204
+ self.wait_s = wait or 0
205
+ self.random_wait = random_wait
206
+ self.is_first = True
207
+
208
+ def wait(self) -> None:
209
+ """Perform a wait if needed. Should be called before each request.
210
+
211
+ Will skip wait if this is the first request.
212
+ """
213
+ if self.wait_s == 0:
214
+ return
215
+
216
+ if self.is_first:
217
+ self.is_first = False
218
+ return
219
+
220
+ wait_f = 1.0
221
+ if self.random_wait:
222
+ wait_f = random.uniform(0.5, 1.5)
223
+
224
+ time.sleep(self.wait_s * wait_f)
@@ -1,18 +1,23 @@
1
1
  """Implementation of :mod:`usp.web_client.abstract_client` with Requests."""
2
2
 
3
+ import logging
3
4
  from http import HTTPStatus
4
- from typing import Optional, Dict, Tuple, Union
5
+ from typing import Dict, Optional, Tuple, Union
5
6
 
6
7
  import requests
7
8
 
9
+ from usp import __version__
10
+
8
11
  from .abstract_client import (
12
+ RETRYABLE_HTTP_STATUS_CODES,
9
13
  AbstractWebClient,
10
14
  AbstractWebClientResponse,
11
15
  AbstractWebClientSuccessResponse,
16
+ RequestWaiter,
12
17
  WebClientErrorResponse,
13
- RETRYABLE_HTTP_STATUS_CODES,
14
18
  )
15
- from usp import __version__
19
+
20
+ log = logging.getLogger(__name__)
16
21
 
17
22
 
18
23
  class RequestsWebClientSuccessResponse(AbstractWebClientSuccessResponse):
@@ -78,16 +83,27 @@ class RequestsWebClient(AbstractWebClient):
78
83
  Some webservers might be generating huge sitemaps on the fly, so this is why it's rather big.
79
84
  """
80
85
 
81
- __slots__ = ["__max_response_data_length", "__timeout", "__proxies", "__verify"]
86
+ __slots__ = [
87
+ "__max_response_data_length",
88
+ "__timeout",
89
+ "__proxies",
90
+ "__verify",
91
+ "__waiter",
92
+ ]
82
93
 
83
- def __init__(self, verify=True):
94
+ def __init__(
95
+ self, verify=True, wait: Optional[float] = None, random_wait: bool = False
96
+ ):
84
97
  """
85
98
  :param verify: whether certificates should be verified for HTTPS requests.
99
+ :param wait: time to wait between requests, in seconds.
100
+ :param random_wait: if true, wait time is multiplied by a random number between 0.5 and 1.5.
86
101
  """
87
102
  self.__max_response_data_length = None
88
103
  self.__timeout = self.__HTTP_REQUEST_TIMEOUT
89
104
  self.__proxies = {}
90
105
  self.__verify = verify
106
+ self.__waiter = RequestWaiter(wait, random_wait)
91
107
 
92
108
  def set_timeout(self, timeout: Union[int, Tuple[int, int], None]) -> None:
93
109
  """Set HTTP request timeout.
@@ -114,6 +130,7 @@ class RequestsWebClient(AbstractWebClient):
114
130
  self.__max_response_data_length = max_response_data_length
115
131
 
116
132
  def get(self, url: str) -> AbstractWebClientResponse:
133
+ self.__waiter.wait()
117
134
  try:
118
135
  response = requests.get(
119
136
  url,
@@ -139,6 +156,7 @@ class RequestsWebClient(AbstractWebClient):
139
156
  )
140
157
  else:
141
158
  message = f"{response.status_code} {response.reason}"
159
+ log.info(f"Response content: {response.text}")
142
160
 
143
161
  if response.status_code in RETRYABLE_HTTP_STATUS_CODES:
144
162
  return RequestsWebClientErrorResponse(
@@ -1,77 +0,0 @@
1
- """Logging utilities."""
2
-
3
- import logging
4
-
5
-
6
- class Logger:
7
- """
8
- Logging helper class.
9
- """
10
-
11
- __LEVELS = {
12
- "CRITICAL": logging.CRITICAL,
13
- "ERROR": logging.ERROR,
14
- "WARNING": logging.WARNING,
15
- "INFO": logging.INFO,
16
- "DEBUG": logging.DEBUG,
17
- }
18
- """Valid logging levels and their "logging" counterparts."""
19
-
20
- __DEFAULT_LEVEL = "INFO"
21
- """Default logging level."""
22
-
23
- __slots__ = [
24
- # "logging" object
25
- "__l",
26
- ]
27
-
28
- def __init__(self, name: str):
29
- """
30
- Initialize logger object for a given name.
31
-
32
- :param name: Module name that the logger should be initialized for.
33
- """
34
-
35
- self.__l = logging.getLogger(name)
36
-
37
- def error(self, message: str) -> None:
38
- """
39
- Log error message.
40
-
41
- :param message: Message to log.
42
- """
43
- self.__l.error(message)
44
-
45
- def warning(self, message: str) -> None:
46
- """
47
- Log warning message.
48
-
49
- :param message: Message to log.
50
- """
51
- self.__l.warning(message)
52
-
53
- def info(self, message: str) -> None:
54
- """
55
- Log informational message.
56
-
57
- :param message: Message to log.
58
- """
59
- self.__l.info(message)
60
-
61
- def debug(self, message: str) -> None:
62
- """
63
- Log debugging message.
64
-
65
- :param message: Message to log.
66
- """
67
- self.__l.debug(message)
68
-
69
-
70
- def create_logger(name: str) -> Logger:
71
- """
72
- Create and return Logger object.
73
-
74
- :param name: Module name that the logger should be initialized for.
75
- :return: Logger object.
76
- """
77
- return Logger(name=name)
@@ -1,7 +1,7 @@
1
1
  from argparse import ArgumentParser
2
2
 
3
- from usp.cli import _ls as ls_cmd
4
3
  from usp import __version__
4
+ from usp.cli import _ls as ls_cmd
5
5
 
6
6
 
7
7
  def main():