webtoolkit 0.1.48__tar.gz → 0.1.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/PKG-INFO +1 -1
  2. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/pyproject.toml +1 -1
  3. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/baseurl.py +12 -1
  4. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fakeinternet.py +5 -0
  5. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/urllocation.py +26 -0
  6. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/LICENSE +0 -0
  7. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/README.md +0 -0
  8. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/__init__.py +0 -0
  9. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contentinterface.py +0 -0
  10. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contentlinkparser.py +0 -0
  11. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contentmoderation.py +0 -0
  12. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contenttext.py +0 -0
  13. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/crawlers/__init__.py +0 -0
  14. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/crawlers/crawlerinterface.py +0 -0
  15. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/crawlers/requestscrawler.py +0 -0
  16. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/domaincache.py +0 -0
  17. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/__init__.py +0 -0
  18. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/defaulturlhandler.py +0 -0
  19. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
  20. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
  21. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerhttppage.py +0 -0
  22. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerinterface.py +0 -0
  23. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlers.py +0 -0
  24. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlervideoodysee.py +0 -0
  25. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
  26. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/pages.py +0 -0
  27. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/remoteserver.py +0 -0
  28. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/remoteurl.py +0 -0
  29. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/request.py +0 -0
  30. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/response.py +0 -0
  31. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/statuses.py +0 -0
  32. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/__init__.py +0 -0
  33. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/__init__.py +0 -0
  34. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/codeproject.py +0 -0
  35. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/firebog.py +0 -0
  36. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/geekwirecom.py +0 -0
  37. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/githubcom.py +0 -0
  38. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/hackernews.py +0 -0
  39. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/instance.py +0 -0
  40. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/opmlfile.py +0 -0
  41. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/reddit.py +0 -0
  42. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/returndislike.py +0 -0
  43. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
  44. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/thehill.py +0 -0
  45. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
  46. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/youtube.py +0 -0
  47. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fakeinternetcontents.py +0 -0
  48. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fakeresponse.py +0 -0
  49. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/mocks.py +0 -0
  50. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/utils/dateutils.py +0 -0
  51. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/utils/logger.py +0 -0
  52. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/utils/memorychecker.py +0 -0
  53. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/webconfig.py +0 -0
  54. {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/webtools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: webtoolkit
3
- Version: 0.1.48
3
+ Version: 0.1.50
4
4
  Summary: Web tools and interfaces for Internet data processing.
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "webtoolkit"
6
- version = "0.1.48"
6
+ version = "0.1.50"
7
7
  description = "Web tools and interfaces for Internet data processing."
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"
@@ -32,7 +32,7 @@ from .urllocation import (
32
32
  )
33
33
 
34
34
  from .statuses import status_code_to_text
35
- from .response import response_to_json
35
+ from .response import response_to_json, PageResponseObject
36
36
  from .request import request_to_json, PageRequestObject
37
37
  from .handlers import (
38
38
  HandlerInterface,
@@ -234,6 +234,11 @@ class BaseUrl(ContentInterface):
234
234
 
235
235
  return self.response
236
236
 
237
+ location = UrlLocation(self.request.url)
238
+ if location.is_onion():
239
+ self.response = PageResponseObject(url=self.request.url, status_code=0)
240
+ return self.response
241
+
237
242
  def get_streams(self):
238
243
  """
239
244
  Returns all responses
@@ -275,6 +280,12 @@ class BaseUrl(ContentInterface):
275
280
  return
276
281
 
277
282
  p = UrlLocation(url)
283
+ if p.is_onion():
284
+ """
285
+ Currently there is no handler to support onions
286
+ """
287
+ return
288
+
278
289
  short_url = p.get_protocolless()
279
290
 
280
291
  if not short_url:
@@ -8,6 +8,7 @@ This module provides replacement for the Internet.
8
8
  import logging
9
9
  import unittest
10
10
  import traceback
11
+ import os
11
12
 
12
13
  from webtoolkit.utils.dateutils import DateUtils
13
14
  from webtoolkit import (
@@ -143,6 +144,10 @@ class FakeInternetTestCase(unittest.TestCase):
143
144
  infos = AppLogging.objects.filter(level=int(logging.ERROR))
144
145
  return infos.count() == 0
145
146
 
147
+ def is_memory_test(self):
148
+ test_memory = os.getenv("TEST_MEMORY")
149
+ return test_memory
150
+
146
151
  def create_example_data(self):
147
152
  self.create_example_sources()
148
153
  self.create_example_links()
@@ -3,6 +3,7 @@ Internet location parsing and processing.
3
3
  """
4
4
 
5
5
  from urllib.parse import unquote, urlparse, parse_qs
6
+ import base64
6
7
  import mimetypes
7
8
  import ipaddress
8
9
 
@@ -538,6 +539,7 @@ class UrlLocation(object):
538
539
  url = UrlLocation.get_google_redirect_fix2(url)
539
540
  url = UrlLocation.get_youtube_redirect_fix(url)
540
541
  url = UrlLocation.get_linkedin_redirect_fix(url)
542
+ url = UrlLocation.get_bing_redirect_fix(url)
541
543
  url = UrlLocation.get_trackless_url(url)
542
544
 
543
545
  return url.url
@@ -613,6 +615,30 @@ class UrlLocation(object):
613
615
 
614
616
  return url
615
617
 
618
+ def get_bing_redirect_fix(bing_url):
619
+ try:
620
+ parsed = urlparse(bing_url)
621
+ params = parse_qs(parsed.query)
622
+
623
+ encoded = params.get("u", [None])[0]
624
+ if not encoded:
625
+ return bing_url
626
+
627
+ # Remove prefix like 'a1'
628
+ if encoded.startswith("a1"):
629
+ encoded = encoded[2:]
630
+
631
+ # handle missing padding
632
+ padding = '=' * (-len(encoded) % 4)
633
+ decoded_bytes = base64.b64decode(encoded + padding)
634
+
635
+ url = decoded_bytes.decode("utf-8", errors="ignore")
636
+ url = UrlLocation.get_cleaned_link(url)
637
+ return url
638
+
639
+ except Exception:
640
+ return bing_url
641
+
616
642
  def get_url_arg(self):
617
643
  url = self.url
618
644
  if not url:
File without changes
File without changes