webtoolkit 0.0.193__tar.gz → 0.0.194__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/PKG-INFO +3 -2
  2. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/pyproject.toml +1 -1
  3. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/remoteserver.py +13 -7
  4. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/remoteurl.py +5 -2
  5. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/LICENSE +0 -0
  6. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/README.md +0 -0
  7. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/__init__.py +0 -0
  8. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/baseurl.py +0 -0
  9. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/contentinterface.py +0 -0
  10. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/contentlinkparser.py +0 -0
  11. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/contentmoderation.py +0 -0
  12. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/contenttext.py +0 -0
  13. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/crawlers/__init__.py +0 -0
  14. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/crawlers/crawlerinterface.py +0 -0
  15. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/crawlers/requestscrawler.py +0 -0
  16. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/domaincache.py +0 -0
  17. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/__init__.py +0 -0
  18. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/defaulturlhandler.py +0 -0
  19. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlerchannelodysee.py +0 -0
  20. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlerchannelyoutube.py +0 -0
  21. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlerhttppage.py +0 -0
  22. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlerinterface.py +0 -0
  23. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlers.py +0 -0
  24. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlervideoodysee.py +0 -0
  25. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/handlers/handlervideoyoutube.py +0 -0
  26. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/pages.py +0 -0
  27. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/request.py +0 -0
  28. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/response.py +0 -0
  29. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/statuses.py +0 -0
  30. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/__init__.py +0 -0
  31. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/__init__.py +0 -0
  32. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/codeproject.py +0 -0
  33. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/firebog.py +0 -0
  34. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/geekwirecom.py +0 -0
  35. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/githubcom.py +0 -0
  36. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/hackernews.py +0 -0
  37. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/instance.py +0 -0
  38. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/opmlfile.py +0 -0
  39. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/reddit.py +0 -0
  40. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/returndislike.py +0 -0
  41. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/robotstxtcom.py +0 -0
  42. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/thehill.py +0 -0
  43. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/warhammercommunity.py +0 -0
  44. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fake/youtube.py +0 -0
  45. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fakeinternet.py +0 -0
  46. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fakeinternetcontents.py +0 -0
  47. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/fakeresponse.py +0 -0
  48. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/tests/mocks.py +0 -0
  49. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/urllocation.py +0 -0
  50. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/utils/dateutils.py +0 -0
  51. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/utils/logger.py +0 -0
  52. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/webconfig.py +0 -0
  53. {webtoolkit-0.0.193 → webtoolkit-0.0.194}/webtoolkit/webtools.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: webtoolkit
3
- Version: 0.0.193
3
+ Version: 0.0.194
4
4
  Summary: Web tools and interfaces for Internet data processing.
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
15
16
  Requires-Dist: beautifulsoup4 (>=4.13.5,<5.0.0)
16
17
  Requires-Dist: brutefeedparser (>=0.10.5,<0.11.0)
17
18
  Requires-Dist: lxml (>=5.4.0,<6.0.0)
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "webtoolkit"
6
- version = "0.0.193"
6
+ version = "0.0.194"
7
7
  description = "Web tools and interfaces for Internet data processing."
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"
@@ -25,15 +25,16 @@ class RemoteServer(object):
25
25
  def __init__(self, remote_server=None, timeout_s=30):
26
26
  self.remote_server = remote_server
27
27
  if not self.remote_server:
28
- CRAWLER_BUDDY_SERVER = os.environ.get("CRAWLER_BUDDY_SERVER")
29
- CRAWLER_BUDDY_PORT = os.environ.get("CRAWLER_BUDDY_PORT")
30
- if CRAWLER_BUDDY_SERVER and CRAWLER_BUDDY_PORT:
31
- self.remote_server = (
32
- f"http://{CRAWLER_BUDDY_SERVER}:{CRAWLER_BUDDY_PORT}"
33
- )
28
+ self.remote_server = RemoteServer.get_remote_server_location()
34
29
 
35
30
  self.timeout_s = timeout_s
36
31
 
32
+ def get_remote_server_location():
33
+ CRAWLER_BUDDY_SERVER = os.environ.get("CRAWLER_BUDDY_SERVER")
34
+ CRAWLER_BUDDY_PORT = os.environ.get("CRAWLER_BUDDY_PORT")
35
+ if CRAWLER_BUDDY_SERVER and CRAWLER_BUDDY_PORT:
36
+ return f"http://{CRAWLER_BUDDY_SERVER}:{CRAWLER_BUDDY_PORT}"
37
+
37
38
  def get_getj(self, request=None, url=None):
38
39
  """
39
40
  @returns None in case of error
@@ -145,6 +146,9 @@ class RemoteServer(object):
145
146
  """
146
147
  @param link_call Remote server endpoint
147
148
  @param url Url for which we call Remote server
149
+
150
+ Note: there should always be a timeout. Server might stop responding,
151
+ it could have hanged, etc.
148
152
  """
149
153
  url = request.url
150
154
 
@@ -154,8 +158,10 @@ class RemoteServer(object):
154
158
 
155
159
  text = None
156
160
 
157
- timeout_s = 50
161
+ # it is hard to think of a good deafult value
162
+ timeout_s = 60
158
163
  if request.timeout_s is not None:
164
+ # remote server will have timeout_s we add some wiggle room for transmission
159
165
  timeout_s = request.timeout_s
160
166
  timeout_s += 5
161
167
 
@@ -43,8 +43,8 @@ class RemoteUrl(ContentInterface):
43
43
  """
44
44
  super().__init__(url=url, contents=None)
45
45
  self.request = request
46
- self.remote_server_location = remote_server_location
47
- self.server = RemoteServer(remote_server_location)
46
+ self.remote_server_location=remote_server_location
47
+ self.server = RemoteServer(remote_server=self.remote_server_location)
48
48
  self.all_properties = all_properties
49
49
  self.social_properties = social_properties
50
50
 
@@ -52,6 +52,9 @@ class RemoteUrl(ContentInterface):
52
52
  if self.all_properties:
53
53
  self.get_responses()
54
54
 
55
+ def get_remote_server_location():
56
+ return RemoteServer.get_remote_server_location()
57
+
55
58
  def get_responses(self):
56
59
  """Provides URL responses"""
57
60
  if self.all_properties is None:
File without changes
File without changes