webtoolkit 0.1.48__tar.gz → 0.1.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/PKG-INFO +1 -1
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/pyproject.toml +1 -1
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/baseurl.py +12 -1
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fakeinternet.py +5 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/urllocation.py +26 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/LICENSE +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/README.md +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/__init__.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contentinterface.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contentlinkparser.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contentmoderation.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/contenttext.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/crawlers/__init__.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/crawlers/crawlerinterface.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/crawlers/requestscrawler.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/domaincache.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/__init__.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/defaulturlhandler.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerhttppage.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlerinterface.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlers.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlervideoodysee.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/pages.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/remoteserver.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/remoteurl.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/request.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/response.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/statuses.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/__init__.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/__init__.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/codeproject.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/firebog.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/geekwirecom.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/githubcom.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/hackernews.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/instance.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/opmlfile.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/reddit.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/returndislike.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/thehill.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fake/youtube.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fakeinternetcontents.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/fakeresponse.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/tests/mocks.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/utils/dateutils.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/utils/logger.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/utils/memorychecker.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/webconfig.py +0 -0
- {webtoolkit-0.1.48 → webtoolkit-0.1.50}/webtoolkit/webtools.py +0 -0
|
@@ -32,7 +32,7 @@ from .urllocation import (
|
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
from .statuses import status_code_to_text
|
|
35
|
-
from .response import response_to_json
|
|
35
|
+
from .response import response_to_json, PageResponseObject
|
|
36
36
|
from .request import request_to_json, PageRequestObject
|
|
37
37
|
from .handlers import (
|
|
38
38
|
HandlerInterface,
|
|
@@ -234,6 +234,11 @@ class BaseUrl(ContentInterface):
|
|
|
234
234
|
|
|
235
235
|
return self.response
|
|
236
236
|
|
|
237
|
+
location = UrlLocation(self.request.url)
|
|
238
|
+
if location.is_onion():
|
|
239
|
+
self.response = PageResponseObject(url=self.request.url, status_code=0)
|
|
240
|
+
return self.response
|
|
241
|
+
|
|
237
242
|
def get_streams(self):
|
|
238
243
|
"""
|
|
239
244
|
Returns all responses
|
|
@@ -275,6 +280,12 @@ class BaseUrl(ContentInterface):
|
|
|
275
280
|
return
|
|
276
281
|
|
|
277
282
|
p = UrlLocation(url)
|
|
283
|
+
if p.is_onion():
|
|
284
|
+
"""
|
|
285
|
+
Currently there is no handler to support onions
|
|
286
|
+
"""
|
|
287
|
+
return
|
|
288
|
+
|
|
278
289
|
short_url = p.get_protocolless()
|
|
279
290
|
|
|
280
291
|
if not short_url:
|
|
@@ -8,6 +8,7 @@ This module provides replacement for the Internet.
|
|
|
8
8
|
import logging
|
|
9
9
|
import unittest
|
|
10
10
|
import traceback
|
|
11
|
+
import os
|
|
11
12
|
|
|
12
13
|
from webtoolkit.utils.dateutils import DateUtils
|
|
13
14
|
from webtoolkit import (
|
|
@@ -143,6 +144,10 @@ class FakeInternetTestCase(unittest.TestCase):
|
|
|
143
144
|
infos = AppLogging.objects.filter(level=int(logging.ERROR))
|
|
144
145
|
return infos.count() == 0
|
|
145
146
|
|
|
147
|
+
def is_memory_test(self):
|
|
148
|
+
test_memory = os.getenv("TEST_MEMORY")
|
|
149
|
+
return test_memory
|
|
150
|
+
|
|
146
151
|
def create_example_data(self):
|
|
147
152
|
self.create_example_sources()
|
|
148
153
|
self.create_example_links()
|
|
@@ -3,6 +3,7 @@ Internet location parsing and processing.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from urllib.parse import unquote, urlparse, parse_qs
|
|
6
|
+
import base64
|
|
6
7
|
import mimetypes
|
|
7
8
|
import ipaddress
|
|
8
9
|
|
|
@@ -538,6 +539,7 @@ class UrlLocation(object):
|
|
|
538
539
|
url = UrlLocation.get_google_redirect_fix2(url)
|
|
539
540
|
url = UrlLocation.get_youtube_redirect_fix(url)
|
|
540
541
|
url = UrlLocation.get_linkedin_redirect_fix(url)
|
|
542
|
+
url = UrlLocation.get_bing_redirect_fix(url)
|
|
541
543
|
url = UrlLocation.get_trackless_url(url)
|
|
542
544
|
|
|
543
545
|
return url.url
|
|
@@ -613,6 +615,30 @@ class UrlLocation(object):
|
|
|
613
615
|
|
|
614
616
|
return url
|
|
615
617
|
|
|
618
|
+
def get_bing_redirect_fix(bing_url):
|
|
619
|
+
try:
|
|
620
|
+
parsed = urlparse(bing_url)
|
|
621
|
+
params = parse_qs(parsed.query)
|
|
622
|
+
|
|
623
|
+
encoded = params.get("u", [None])[0]
|
|
624
|
+
if not encoded:
|
|
625
|
+
return bing_url
|
|
626
|
+
|
|
627
|
+
# Remove prefix like 'a1'
|
|
628
|
+
if encoded.startswith("a1"):
|
|
629
|
+
encoded = encoded[2:]
|
|
630
|
+
|
|
631
|
+
# handle missing padding
|
|
632
|
+
padding = '=' * (-len(encoded) % 4)
|
|
633
|
+
decoded_bytes = base64.b64decode(encoded + padding)
|
|
634
|
+
|
|
635
|
+
url = decoded_bytes.decode("utf-8", errors="ignore")
|
|
636
|
+
url = UrlLocation.get_cleaned_link(url)
|
|
637
|
+
return url
|
|
638
|
+
|
|
639
|
+
except Exception:
|
|
640
|
+
return bing_url
|
|
641
|
+
|
|
616
642
|
def get_url_arg(self):
|
|
617
643
|
url = self.url
|
|
618
644
|
if not url:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|