webtoolkit 0.1.51__tar.gz → 0.1.54__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/PKG-INFO +1 -1
  2. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/pyproject.toml +1 -1
  3. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/baseurl.py +4 -6
  4. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/remoteserver.py +7 -0
  5. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/remoteurl.py +3 -4
  6. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fakeinternetcontents.py +12 -0
  7. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fakeresponse.py +4 -0
  8. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/LICENSE +0 -0
  9. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/README.md +0 -0
  10. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/__init__.py +0 -0
  11. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contentinterface.py +0 -0
  12. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contentlinkparser.py +0 -0
  13. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contentmoderation.py +0 -0
  14. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/contenttext.py +0 -0
  15. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/crawlers/__init__.py +0 -0
  16. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/crawlers/crawlerinterface.py +0 -0
  17. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/crawlers/requestscrawler.py +0 -0
  18. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/domaincache.py +0 -0
  19. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/__init__.py +0 -0
  20. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/defaulturlhandler.py +0 -0
  21. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
  22. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
  23. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerhttppage.py +0 -0
  24. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlerinterface.py +0 -0
  25. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlers.py +0 -0
  26. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlervideoodysee.py +0 -0
  27. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
  28. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/pages.py +0 -0
  29. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/request.py +0 -0
  30. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/response.py +0 -0
  31. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/statuses.py +0 -0
  32. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/__init__.py +0 -0
  33. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/__init__.py +0 -0
  34. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/codeproject.py +0 -0
  35. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/firebog.py +0 -0
  36. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/geekwirecom.py +0 -0
  37. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/githubcom.py +0 -0
  38. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/hackernews.py +0 -0
  39. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/instance.py +0 -0
  40. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/opmlfile.py +0 -0
  41. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/reddit.py +0 -0
  42. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/returndislike.py +0 -0
  43. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
  44. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/thehill.py +0 -0
  45. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
  46. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fake/youtube.py +0 -0
  47. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/fakeinternet.py +0 -0
  48. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/tests/mocks.py +0 -0
  49. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/urllocation.py +0 -0
  50. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/utils/dateutils.py +0 -0
  51. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/utils/logger.py +0 -0
  52. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/utils/memorychecker.py +0 -0
  53. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/webconfig.py +0 -0
  54. {webtoolkit-0.1.51 → webtoolkit-0.1.54}/webtoolkit/webtools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: webtoolkit
3
- Version: 0.1.51
3
+ Version: 0.1.54
4
4
  Summary: Web tools and interfaces for Internet data processing.
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "webtoolkit"
6
- version = "0.1.51"
6
+ version = "0.1.54"
7
7
  description = "Web tools and interfaces for Internet data processing."
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"
@@ -487,26 +487,24 @@ class BaseUrl(ContentInterface):
487
487
  return calculate_hash(text)
488
488
 
489
489
  def get_hash(self):
490
- """Returns hash for URL"""
490
+ """ Returns binary hash for URL"""
491
491
  handler = self.get_handler()
492
492
  if handler:
493
493
  return handler.get_hash()
494
494
 
495
495
  def get_body_hash(self):
496
- """Returns body hash for URL"""
496
+ """Returns binary body hash for URL"""
497
497
  handler = self.get_handler()
498
498
  if handler:
499
499
  return handler.get_body_hash()
500
500
 
501
501
  def get_meta_hash(self) -> Optional[str]:
502
502
  """
503
- Calculates and returns a hash of the page's metadata properties.
504
- :return: A base64-encoded hash of the properties.
503
+ Calculates and returns a binary hash of the page's metadata properties.
505
504
  """
506
505
  self.get_response()
507
506
  properties_data = self.get_properties_data()
508
- properties_hash = self.property_encode(calculate_hash(str(properties_data)))
509
- return properties_hash
507
+ return calculate_hash(str(properties_data))
510
508
 
511
509
  def get_properties(self):
512
510
  """
@@ -190,6 +190,13 @@ class RemoteServer(object):
190
190
  return
191
191
  return json_obj
192
192
 
193
+ def is_remote_server_ok(link_call):
194
+ try:
195
+ with requests.get(url=link_call, timeout=timeout_s, verify=False) as result:
196
+ return result.status_code == 200
197
+ except Exception as E:
198
+ return False
199
+
193
200
  def get_properties(self, url=None, request=None):
194
201
  json_obj = self.get_getj(url=url, request=request)
195
202
 
@@ -251,7 +251,7 @@ class RemoteUrl(ContentInterface):
251
251
 
252
252
  def get_hash(self):
253
253
  """
254
- Retrieves the hash of the response.
254
+ Retrieves the binary hash of the response.
255
255
  :return: The hash of the response, or None if not available.
256
256
  """
257
257
  response = self.get_response()
@@ -260,7 +260,7 @@ class RemoteUrl(ContentInterface):
260
260
 
261
261
  def get_body_hash(self):
262
262
  """
263
- Retrieves the body hash of the response.
263
+ Retrieves the binary body hash of the response.
264
264
  :return: The body hash, or None if not available.
265
265
  """
266
266
  response = self.get_response()
@@ -269,8 +269,7 @@ class RemoteUrl(ContentInterface):
269
269
 
270
270
  def get_meta_hash(self):
271
271
  """
272
- Retrieves the meta hash from the URL's properties.
273
- :return: The decoded meta hash, or None if not available.
272
+ Retrieves the binary meta hash from the URL's properties.
274
273
  """
275
274
  hash_section = RemoteServer.read_properties_section(
276
275
  "PropertiesHash", self.all_properties
@@ -181,3 +181,15 @@ webpage_with_date_published = """
181
181
  </body>
182
182
  </html>
183
183
  """
184
+
185
+ webpage_with_language = """
186
+ <html lang="it">
187
+ <head>
188
+ <link type="application/rss+xml" href="https://www.codeproject.com/WebServices/NewsRSS.aspx" />
189
+ <title>Page with a 'it' language</title>
190
+ </head>
191
+
192
+ <body>
193
+ </body>
194
+ </html>
195
+ """
@@ -32,6 +32,7 @@ from webtoolkit.tests.fakeinternetcontents import (
32
32
  webpage_html_casinos,
33
33
  webpage_html_canonical_1,
34
34
  webpage_with_date_published,
35
+ webpage_with_language,
35
36
  )
36
37
  from webtoolkit.tests.fake.geekwirecom import (
37
38
  geekwire_feed,
@@ -400,6 +401,9 @@ class TestResponseObject(PageResponseObject):
400
401
  elif url == "https://empty-page.com":
401
402
  return ""
402
403
 
404
+ if url == "https://page-with-language.com":
405
+ return webpage_with_language
406
+
403
407
  elif url == "https://www.codeproject.com/WebServices/NewsRSS.aspx":
404
408
  return webpage_code_project_rss
405
409
 
File without changes
File without changes