zenx 0.6.3__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {zenx-0.6.3 → zenx-0.6.5}/PKG-INFO +1 -1
  2. {zenx-0.6.3 → zenx-0.6.5}/pyproject.toml +1 -1
  3. {zenx-0.6.3 → zenx-0.6.5}/zenx/clients/http.py +21 -2
  4. {zenx-0.6.3 → zenx-0.6.5}/zenx/spiders/base.py +1 -19
  5. {zenx-0.6.3 → zenx-0.6.5}/zenx/utils.py +12 -0
  6. {zenx-0.6.3 → zenx-0.6.5}/zenx.egg-info/PKG-INFO +1 -1
  7. {zenx-0.6.3 → zenx-0.6.5}/setup.cfg +0 -0
  8. {zenx-0.6.3 → zenx-0.6.5}/zenx/cli.py +0 -0
  9. {zenx-0.6.3 → zenx-0.6.5}/zenx/clients/__init__.py +0 -0
  10. {zenx-0.6.3 → zenx-0.6.5}/zenx/clients/database.py +0 -0
  11. {zenx-0.6.3 → zenx-0.6.5}/zenx/debug_runner.py +0 -0
  12. {zenx-0.6.3 → zenx-0.6.5}/zenx/discovery.py +0 -0
  13. {zenx-0.6.3 → zenx-0.6.5}/zenx/engine.py +0 -0
  14. {zenx-0.6.3 → zenx-0.6.5}/zenx/exceptions.py +0 -0
  15. {zenx-0.6.3 → zenx-0.6.5}/zenx/logger.py +0 -0
  16. {zenx-0.6.3 → zenx-0.6.5}/zenx/pipelines/__init__.py +0 -0
  17. {zenx-0.6.3 → zenx-0.6.5}/zenx/pipelines/base.py +0 -0
  18. {zenx-0.6.3 → zenx-0.6.5}/zenx/pipelines/google_rpc.py +0 -0
  19. {zenx-0.6.3 → zenx-0.6.5}/zenx/pipelines/manager.py +0 -0
  20. {zenx-0.6.3 → zenx-0.6.5}/zenx/pipelines/preprocess.py +0 -0
  21. {zenx-0.6.3 → zenx-0.6.5}/zenx/pipelines/websocket.py +0 -0
  22. {zenx-0.6.3 → zenx-0.6.5}/zenx/resources/proto/__init__.py +0 -0
  23. {zenx-0.6.3 → zenx-0.6.5}/zenx/resources/proto/feed_pb2.py +0 -0
  24. {zenx-0.6.3 → zenx-0.6.5}/zenx/resources/proto/feed_pb2_grpc.py +0 -0
  25. {zenx-0.6.3 → zenx-0.6.5}/zenx/settings.py +0 -0
  26. {zenx-0.6.3 → zenx-0.6.5}/zenx/spiders/__init__.py +0 -0
  27. {zenx-0.6.3 → zenx-0.6.5}/zenx.egg-info/SOURCES.txt +0 -0
  28. {zenx-0.6.3 → zenx-0.6.5}/zenx.egg-info/dependency_links.txt +0 -0
  29. {zenx-0.6.3 → zenx-0.6.5}/zenx.egg-info/entry_points.txt +0 -0
  30. {zenx-0.6.3 → zenx-0.6.5}/zenx.egg-info/requires.txt +0 -0
  31. {zenx-0.6.3 → zenx-0.6.5}/zenx.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zenx
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Summary: mini-framework
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: curl-cffi>=0.12.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zenx"
3
- version = "0.6.3"
3
+ version = "0.6.5"
4
4
  description = "mini-framework"
5
5
  requires-python = ">=3.12"
6
6
  dependencies = [
@@ -1,4 +1,6 @@
1
1
  from __future__ import annotations
2
+ import time
3
+ from collections import deque
2
4
  import parsel
3
5
  import random
4
6
  from curl_cffi.requests.impersonate import BrowserTypeLiteral
@@ -11,7 +13,7 @@ import json
11
13
  from structlog import BoundLogger
12
14
 
13
15
  from zenx.settings import Settings
14
- from zenx.utils import get_time
16
+ from zenx.utils import get_time, record_request
15
17
 
16
18
 
17
19
  @dataclass
@@ -56,8 +58,24 @@ class HttpClient(ABC):
56
58
  self.logger = logger
57
59
  self.settings = settings
58
60
  self._session_pool: asyncio.Queue
61
+ # stats
62
+ self._requests_timestamps = deque()
63
+ self._total_requests = 0
64
+
59
65
 
66
+ def get_stats(self) -> Dict:
67
+ # calculate RPM based on rolling window of 1 min
68
+ now = time.time()
69
+ # remove requests older than 60 sec
70
+ while self._requests_timestamps and self._requests_timestamps[0] < (now - 60):
71
+ self._requests_timestamps.popleft()
72
+ rpm = len(self._requests_timestamps)
73
+ return {
74
+ "rpm": rpm,
75
+ "total_requests": self._total_requests,
76
+ }
60
77
 
78
+
61
79
  @abstractmethod
62
80
  async def request(
63
81
  self,
@@ -96,6 +114,7 @@ class CurlCffi(HttpClient):
96
114
  return chosen_fingerprint
97
115
 
98
116
 
117
+ @record_request
99
118
  async def request(
100
119
  self,
101
120
  url: str,
@@ -124,7 +143,7 @@ class CurlCffi(HttpClient):
124
143
  )
125
144
  recv_at = get_time()
126
145
  latency = recv_at - req_at
127
- self.logger.debug("response", status=response.status_code, url=url, impersonate=impersonate, client=self.name, requested_at=req_at, responded_at=recv_at, latency_ms=latency)
146
+ self.logger.debug("response", status=response.status_code, url=url, impersonate=session.impersonate, client=self.name, requested_at=req_at, responded_at=recv_at, latency_ms=latency)
128
147
  except Exception:
129
148
  self.logger.exception("request", url=url, client=self.name)
130
149
  raise
@@ -1,7 +1,5 @@
1
1
  from __future__ import annotations
2
2
  from abc import ABC, abstractmethod
3
- from collections import deque
4
- import time
5
3
  from typing import ClassVar, Dict, List, Literal, Type
6
4
  from structlog import BoundLogger
7
5
 
@@ -15,7 +13,7 @@ class Spider(ABC):
15
13
  # central registry
16
14
  name: ClassVar[str]
17
15
  _registry: ClassVar[Dict[str, Type[Spider]]] = {}
18
- pipelines: ClassVar[List[Literal["synoptic_websocket","synoptic_grpc"]]]
16
+ pipelines: ClassVar[List[Literal["preprocess","synoptic_websocket","synoptic_grpc"]]]
19
17
  client_name: ClassVar[Literal["curl_cffi"]] = "curl_cffi"
20
18
 
21
19
 
@@ -46,22 +44,6 @@ class Spider(ABC):
46
44
  self.pm = pm
47
45
  self.logger = logger
48
46
  self.settings = settings
49
- # stats
50
- self.requests_timestamps = deque()
51
- self.total_requests = 0
52
-
53
-
54
- def get_stats(self) -> Dict:
55
- # calculate RPM based on rolling window of 1 min
56
- now = time.time()
57
- # remove requests older than 60 sec
58
- while self.requests_timestamps and self.requests_timestamps[0] < (now - 60):
59
- self.requests_timestamps.popleft()
60
- rpm = len(self.requests_timestamps)
61
- return {
62
- "rpm": rpm,
63
- "total_requests": self.total_requests,
64
- }
65
47
 
66
48
 
67
49
  @abstractmethod
@@ -2,6 +2,8 @@ import time
2
2
  from typing import Dict
3
3
  import functools
4
4
 
5
+ from zenx.clients.http import HttpClient, Response
6
+
5
7
 
6
8
  def get_time() -> int:
7
9
  """ current unix time in milliseconds """
@@ -17,3 +19,13 @@ def log_processing_time(func):
17
19
  self.logger.info("processed", id=item['_id'], time_ms=processed_time, pipeline=self.name)
18
20
  return result
19
21
  return wrapper
22
+
23
+
24
+ def record_request(func):
25
+ @functools.wraps(func)
26
+ async def wrapper(self: HttpClient, *args, **kwargs) -> Response:
27
+ result = await func(self, *args, **kwargs)
28
+ self._requests_timestamps.append(time.time())
29
+ self._total_requests +=1
30
+ return result
31
+ return wrapper
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zenx
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Summary: mini-framework
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: curl-cffi>=0.12.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes