webtoolkit 0.1.48__tar.gz → 0.1.49__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/PKG-INFO +1 -1
  2. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/pyproject.toml +1 -1
  3. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fakeinternet.py +5 -0
  4. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/urllocation.py +26 -0
  5. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/LICENSE +0 -0
  6. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/README.md +0 -0
  7. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/__init__.py +0 -0
  8. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/baseurl.py +0 -0
  9. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/contentinterface.py +0 -0
  10. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/contentlinkparser.py +0 -0
  11. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/contentmoderation.py +0 -0
  12. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/contenttext.py +0 -0
  13. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/crawlers/__init__.py +0 -0
  14. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/crawlers/crawlerinterface.py +0 -0
  15. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/crawlers/requestscrawler.py +0 -0
  16. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/domaincache.py +0 -0
  17. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/__init__.py +0 -0
  18. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/defaulturlhandler.py +0 -0
  19. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
  20. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
  21. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlerhttppage.py +0 -0
  22. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlerinterface.py +0 -0
  23. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlers.py +0 -0
  24. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlervideoodysee.py +0 -0
  25. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
  26. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/pages.py +0 -0
  27. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/remoteserver.py +0 -0
  28. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/remoteurl.py +0 -0
  29. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/request.py +0 -0
  30. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/response.py +0 -0
  31. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/statuses.py +0 -0
  32. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/__init__.py +0 -0
  33. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/__init__.py +0 -0
  34. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/codeproject.py +0 -0
  35. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/firebog.py +0 -0
  36. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/geekwirecom.py +0 -0
  37. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/githubcom.py +0 -0
  38. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/hackernews.py +0 -0
  39. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/instance.py +0 -0
  40. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/opmlfile.py +0 -0
  41. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/reddit.py +0 -0
  42. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/returndislike.py +0 -0
  43. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
  44. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/thehill.py +0 -0
  45. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
  46. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fake/youtube.py +0 -0
  47. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fakeinternetcontents.py +0 -0
  48. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/fakeresponse.py +0 -0
  49. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/tests/mocks.py +0 -0
  50. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/utils/dateutils.py +0 -0
  51. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/utils/logger.py +0 -0
  52. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/utils/memorychecker.py +0 -0
  53. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/webconfig.py +0 -0
  54. {webtoolkit-0.1.48 → webtoolkit-0.1.49}/webtoolkit/webtools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: webtoolkit
3
- Version: 0.1.48
3
+ Version: 0.1.49
4
4
  Summary: Web tools and interfaces for Internet data processing.
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "webtoolkit"
6
- version = "0.1.48"
6
+ version = "0.1.49"
7
7
  description = "Web tools and interfaces for Internet data processing."
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"
@@ -8,6 +8,7 @@ This module provides replacement for the Internet.
8
8
  import logging
9
9
  import unittest
10
10
  import traceback
11
+ import os
11
12
 
12
13
  from webtoolkit.utils.dateutils import DateUtils
13
14
  from webtoolkit import (
@@ -143,6 +144,10 @@ class FakeInternetTestCase(unittest.TestCase):
143
144
  infos = AppLogging.objects.filter(level=int(logging.ERROR))
144
145
  return infos.count() == 0
145
146
 
147
+ def is_memory_test(self):
148
+ test_memory = os.getenv("TEST_MEMORY")
149
+ return test_memory
150
+
146
151
  def create_example_data(self):
147
152
  self.create_example_sources()
148
153
  self.create_example_links()
@@ -3,6 +3,7 @@ Internet location parsing and processing.
3
3
  """
4
4
 
5
5
  from urllib.parse import unquote, urlparse, parse_qs
6
+ import base64
6
7
  import mimetypes
7
8
  import ipaddress
8
9
 
@@ -538,6 +539,7 @@ class UrlLocation(object):
538
539
  url = UrlLocation.get_google_redirect_fix2(url)
539
540
  url = UrlLocation.get_youtube_redirect_fix(url)
540
541
  url = UrlLocation.get_linkedin_redirect_fix(url)
542
+ url = UrlLocation.get_bing_redirect_fix(url)
541
543
  url = UrlLocation.get_trackless_url(url)
542
544
 
543
545
  return url.url
@@ -613,6 +615,30 @@ class UrlLocation(object):
613
615
 
614
616
  return url
615
617
 
618
+ def get_bing_redirect_fix(bing_url):
619
+ try:
620
+ parsed = urlparse(bing_url)
621
+ params = parse_qs(parsed.query)
622
+
623
+ encoded = params.get("u", [None])[0]
624
+ if not encoded:
625
+ return bing_url
626
+
627
+ # Remove prefix like 'a1'
628
+ if encoded.startswith("a1"):
629
+ encoded = encoded[2:]
630
+
631
+ # handle missing padding
632
+ padding = '=' * (-len(encoded) % 4)
633
+ decoded_bytes = base64.b64decode(encoded + padding)
634
+
635
+ url = decoded_bytes.decode("utf-8", errors="ignore")
636
+ url = UrlLocation.get_cleaned_link(url)
637
+ return url
638
+
639
+ except Exception:
640
+ return bing_url
641
+
616
642
  def get_url_arg(self):
617
643
  url = self.url
618
644
  if not url:
File without changes
File without changes