webtoolkit 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/PKG-INFO +1 -1
  2. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/pyproject.toml +1 -1
  3. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/crawlers/crawlerinterface.py +6 -1
  4. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/statuses.py +1 -1
  5. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/urllocation.py +1 -0
  6. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/LICENSE +0 -0
  7. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/README.md +0 -0
  8. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/__init__.py +0 -0
  9. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/baseurl.py +0 -0
  10. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/contentinterface.py +0 -0
  11. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/contentlinkparser.py +0 -0
  12. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/contentmoderation.py +0 -0
  13. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/contenttext.py +0 -0
  14. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/crawlers/__init__.py +0 -0
  15. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/crawlers/requestscrawler.py +0 -0
  16. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/domaincache.py +0 -0
  17. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/__init__.py +0 -0
  18. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/defaulturlhandler.py +0 -0
  19. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
  20. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
  21. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlerhttppage.py +0 -0
  22. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlerinterface.py +0 -0
  23. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlers.py +0 -0
  24. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlervideoodysee.py +0 -0
  25. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
  26. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/pages.py +0 -0
  27. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/remoteserver.py +0 -0
  28. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/remoteurl.py +0 -0
  29. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/request.py +0 -0
  30. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/response.py +0 -0
  31. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/__init__.py +0 -0
  32. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/__init__.py +0 -0
  33. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/codeproject.py +0 -0
  34. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/firebog.py +0 -0
  35. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/geekwirecom.py +0 -0
  36. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/githubcom.py +0 -0
  37. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/hackernews.py +0 -0
  38. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/instance.py +0 -0
  39. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/opmlfile.py +0 -0
  40. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/reddit.py +0 -0
  41. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/returndislike.py +0 -0
  42. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
  43. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/thehill.py +0 -0
  44. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
  45. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fake/youtube.py +0 -0
  46. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fakeinternet.py +0 -0
  47. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fakeinternetcontents.py +0 -0
  48. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/fakeresponse.py +0 -0
  49. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/tests/mocks.py +0 -0
  50. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/utils/dateutils.py +0 -0
  51. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/utils/logger.py +0 -0
  52. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/utils/memorychecker.py +0 -0
  53. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/webconfig.py +0 -0
  54. {webtoolkit-0.1.8 → webtoolkit-0.1.10}/webtoolkit/webtools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: webtoolkit
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Web tools and interfaces for Internet data processing.
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "webtoolkit"
6
- version = "0.1.8"
6
+ version = "0.1.10"
7
7
  description = "Web tools and interfaces for Internet data processing."
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"
@@ -219,8 +219,13 @@ class CrawlerInterface(object):
219
219
  target=crawl_with_thread_wrapper,
220
220
  args=(request, result),
221
221
  )
222
+
222
223
  thread.start()
223
- thread.join(request.timeout_s)
224
+
225
+ # give additional wait time
226
+ # requests (or other mechanisms) sohuld timeout first
227
+ # give it some 'time space' to timeout gracefully
228
+ thread.join(request.timeout_s + 5)
224
229
 
225
230
  if thread.is_alive():
226
231
  raise WebToolsTimeoutException("Request timed out")
@@ -41,7 +41,7 @@ HTTP_STATUS_CLIENT_CLOSED_REQUEST = 499 # Client closed request before s
41
41
 
42
42
  HTTP_STATUS_CODE_EXCEPTION = 600
43
43
  HTTP_STATUS_CODE_CONNECTION_ERROR = 603
44
- HTTP_STATUS_CODE_TIMEOUT = 604
44
+ HTTP_STATUS_CODE_TIMEOUT = 604 # web page access timeout
45
45
  HTTP_STATUS_CODE_FILE_TOO_BIG = 612
46
46
  HTTP_STATUS_CODE_PAGE_UNSUPPORTED = 613
47
47
  HTTP_STATUS_CODE_SERVER_ERROR = 614
@@ -293,6 +293,7 @@ class UrlLocation(object):
293
293
  def get_protocol_url(self, protocol="https"):
294
294
  """
295
295
  replaces any protocol with input protocol
296
+ TODO return UrlLocation
296
297
  """
297
298
  protocol_pos = self.url.find("://")
298
299
  if protocol_pos >= 0:
File without changes
File without changes