webtoolkit 0.1.51__tar.gz → 0.1.54__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/PKG-INFO +1 -1
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/pyproject.toml +1 -1
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/baseurl.py +4 -6
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/remoteserver.py +7 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/remoteurl.py +3 -4
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fakeinternetcontents.py +12 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fakeresponse.py +4 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/LICENSE +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/README.md +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/__init__.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contentinterface.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contentlinkparser.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contentmoderation.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contenttext.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/crawlers/__init__.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/crawlers/crawlerinterface.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/crawlers/requestscrawler.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/domaincache.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/__init__.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/defaulturlhandler.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerhttppage.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerinterface.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlers.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlervideoodysee.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/pages.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/request.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/response.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/statuses.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/__init__.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/__init__.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/codeproject.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/firebog.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/geekwirecom.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/githubcom.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/hackernews.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/instance.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/opmlfile.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/reddit.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/returndislike.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/thehill.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/youtube.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fakeinternet.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/mocks.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/urllocation.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/utils/dateutils.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/utils/logger.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/utils/memorychecker.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/webconfig.py +0 -0
- {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/webtools.py +0 -0
|
@@ -487,26 +487,24 @@ class BaseUrl(ContentInterface):
|
|
|
487
487
|
return calculate_hash(text)
|
|
488
488
|
|
|
489
489
|
def get_hash(self):
|
|
490
|
-
"""Returns hash for URL"""
|
|
490
|
+
""" Returns binary hash for URL"""
|
|
491
491
|
handler = self.get_handler()
|
|
492
492
|
if handler:
|
|
493
493
|
return handler.get_hash()
|
|
494
494
|
|
|
495
495
|
def get_body_hash(self):
|
|
496
|
-
"""Returns body hash for URL"""
|
|
496
|
+
"""Returns binary body hash for URL"""
|
|
497
497
|
handler = self.get_handler()
|
|
498
498
|
if handler:
|
|
499
499
|
return handler.get_body_hash()
|
|
500
500
|
|
|
501
501
|
def get_meta_hash(self) -> Optional[str]:
|
|
502
502
|
"""
|
|
503
|
-
Calculates and returns a hash of the page's metadata properties.
|
|
504
|
-
:return: A base64-encoded hash of the properties.
|
|
503
|
+
Calculates and returns a binary hash of the page's metadata properties.
|
|
505
504
|
"""
|
|
506
505
|
self.get_response()
|
|
507
506
|
properties_data = self.get_properties_data()
|
|
508
|
-
|
|
509
|
-
return properties_hash
|
|
507
|
+
return calculate_hash(str(properties_data))
|
|
510
508
|
|
|
511
509
|
def get_properties(self):
|
|
512
510
|
"""
|
|
@@ -190,6 +190,13 @@ class RemoteServer(object):
|
|
|
190
190
|
return
|
|
191
191
|
return json_obj
|
|
192
192
|
|
|
193
|
+
def is_remote_server_ok(link_call):
|
|
194
|
+
try:
|
|
195
|
+
with requests.get(url=link_call, timeout=timeout_s, verify=False) as result:
|
|
196
|
+
return result.status_code == 200
|
|
197
|
+
except Exception as E:
|
|
198
|
+
return False
|
|
199
|
+
|
|
193
200
|
def get_properties(self, url=None, request=None):
|
|
194
201
|
json_obj = self.get_getj(url=url, request=request)
|
|
195
202
|
|
|
@@ -251,7 +251,7 @@ class RemoteUrl(ContentInterface):
|
|
|
251
251
|
|
|
252
252
|
def get_hash(self):
|
|
253
253
|
"""
|
|
254
|
-
Retrieves the hash of the response.
|
|
254
|
+
Retrieves the binary hash of the response.
|
|
255
255
|
:return: The hash of the response, or None if not available.
|
|
256
256
|
"""
|
|
257
257
|
response = self.get_response()
|
|
@@ -260,7 +260,7 @@ class RemoteUrl(ContentInterface):
|
|
|
260
260
|
|
|
261
261
|
def get_body_hash(self):
|
|
262
262
|
"""
|
|
263
|
-
Retrieves the body hash of the response.
|
|
263
|
+
Retrieves the binary body hash of the response.
|
|
264
264
|
:return: The body hash, or None if not available.
|
|
265
265
|
"""
|
|
266
266
|
response = self.get_response()
|
|
@@ -269,8 +269,7 @@ class RemoteUrl(ContentInterface):
|
|
|
269
269
|
|
|
270
270
|
def get_meta_hash(self):
|
|
271
271
|
"""
|
|
272
|
-
Retrieves the meta hash from the URL's properties.
|
|
273
|
-
:return: The decoded meta hash, or None if not available.
|
|
272
|
+
Retrieves the binary meta hash from the URL's properties.
|
|
274
273
|
"""
|
|
275
274
|
hash_section = RemoteServer.read_properties_section(
|
|
276
275
|
"PropertiesHash", self.all_properties
|
|
@@ -181,3 +181,15 @@ webpage_with_date_published = """
|
|
|
181
181
|
</body>
|
|
182
182
|
</html>
|
|
183
183
|
"""
|
|
184
|
+
|
|
185
|
+
webpage_with_language = """
|
|
186
|
+
<html lang="it">
|
|
187
|
+
<head>
|
|
188
|
+
<link type="application/rss+xml" href="https://www.codeproject.com/WebServices/NewsRSS.aspx" />
|
|
189
|
+
<title>Page with a 'it' language</title>
|
|
190
|
+
</head>
|
|
191
|
+
|
|
192
|
+
<body>
|
|
193
|
+
</body>
|
|
194
|
+
</html>
|
|
195
|
+
"""
|
|
@@ -32,6 +32,7 @@ from webtoolkit.tests.fakeinternetcontents import (
|
|
|
32
32
|
webpage_html_casinos,
|
|
33
33
|
webpage_html_canonical_1,
|
|
34
34
|
webpage_with_date_published,
|
|
35
|
+
webpage_with_language,
|
|
35
36
|
)
|
|
36
37
|
from webtoolkit.tests.fake.geekwirecom import (
|
|
37
38
|
geekwire_feed,
|
|
@@ -400,6 +401,9 @@ class TestResponseObject(PageResponseObject):
|
|
|
400
401
|
elif url == "https://empty-page.com":
|
|
401
402
|
return ""
|
|
402
403
|
|
|
404
|
+
if url == "https://page-with-language.com":
|
|
405
|
+
return webpage_with_language
|
|
406
|
+
|
|
403
407
|
elif url == "https://www.codeproject.com/WebServices/NewsRSS.aspx":
|
|
404
408
|
return webpage_code_project_rss
|
|
405
409
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|