zenx 0.9.7__tar.gz → 0.9.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zenx-0.9.7 → zenx-0.9.9}/PKG-INFO +1 -1
- {zenx-0.9.7 → zenx-0.9.9}/README.md +1 -1
- {zenx-0.9.7 → zenx-0.9.9}/pyproject.toml +1 -1
- {zenx-0.9.7 → zenx-0.9.9}/zenx/clients/http.py +32 -11
- {zenx-0.9.7 → zenx-0.9.9}/zenx/settings.py +1 -1
- {zenx-0.9.7 → zenx-0.9.9}/zenx/utils.py +4 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx.egg-info/PKG-INFO +1 -1
- {zenx-0.9.7 → zenx-0.9.9}/setup.cfg +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/cli.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/clients/__init__.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/clients/database.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/debug_runner.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/discovery.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/engine.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/exceptions.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/logger.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/__init__.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/base.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/discord.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/google_rpc.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/manager.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/preprocess.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/pipelines/websocket.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/resources/proto/__init__.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/resources/proto/feed_pb2.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/resources/proto/feed_pb2_grpc.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/spiders/__init__.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx/spiders/base.py +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx.egg-info/SOURCES.txt +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx.egg-info/dependency_links.txt +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx.egg-info/entry_points.txt +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx.egg-info/requires.txt +0 -0
- {zenx-0.9.7 → zenx-0.9.9}/zenx.egg-info/top_level.txt +0 -0
@@ -154,7 +154,7 @@ ZenX allows for flexible configuration through environment variables or a `.env`
|
|
154
154
|
- `DB_PASS`: The password for database authentication (if applicable).
|
155
155
|
- `DB_HOST`: The hostname or IP address of the database server.
|
156
156
|
- `DB_PORT`: The port number for the database server.
|
157
|
-
- `
|
157
|
+
- `PROXY_V4`: Specifies an IPv4 proxy to be used for outgoing requests.
|
158
158
|
- `PROXY_V6`: Specifies an IPv6 proxy to be used for outgoing requests.
|
159
159
|
- `SYNOPTIC_GRPC_SERVER_URI`: The URI for the gRPC server endpoint.
|
160
160
|
- `SYNOPTIC_GRPC_TOKEN`: The authentication token for gRPC communication.
|
@@ -12,7 +12,13 @@ import orjson
|
|
12
12
|
from structlog import BoundLogger
|
13
13
|
|
14
14
|
from zenx.settings import Settings
|
15
|
-
from zenx.utils import get_time
|
15
|
+
from zenx.utils import get_time, get_uuid
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class SessionWrapper:
|
19
|
+
id: str
|
20
|
+
requests: int
|
21
|
+
_self: AsyncSession
|
16
22
|
|
17
23
|
|
18
24
|
@dataclass
|
@@ -91,15 +97,24 @@ class CurlCffi(HttpClient):
|
|
91
97
|
self._session_pool = asyncio.Queue(maxsize=settings.SESSION_POOL_SIZE)
|
92
98
|
for _ in range(settings.SESSION_POOL_SIZE):
|
93
99
|
impersonate = self._get_random_fingerprint()
|
94
|
-
|
100
|
+
obj = SessionWrapper(id=get_uuid(), requests=0, _self=AsyncSession(max_clients=1, impersonate=impersonate))
|
101
|
+
self._session_pool.put_nowait(obj)
|
95
102
|
self.logger.debug("created", sessions=self._session_pool.qsize(), client=self.name)
|
96
103
|
|
97
104
|
|
98
105
|
def _get_random_fingerprint(self) -> str:
|
99
106
|
chosen_fingerprint = random.choice(self._fingerprints)
|
100
107
|
return chosen_fingerprint
|
101
|
-
|
102
108
|
|
109
|
+
|
110
|
+
async def _replace_session(self, existing_session: SessionWrapper) -> None:
|
111
|
+
self.logger.debug("discarded", session_id=existing_session.id, requests=existing_session.requests, client=self.name)
|
112
|
+
await existing_session._self.close()
|
113
|
+
obj = SessionWrapper(id=get_uuid(), requests=0, _self=AsyncSession(max_clients=1, impersonate=self._get_random_fingerprint()))
|
114
|
+
self._session_pool.put_nowait(obj)
|
115
|
+
self.logger.debug("replaced", old_session=existing_session.id, new_session=obj.id, client=self.name)
|
116
|
+
|
117
|
+
|
103
118
|
async def request(
|
104
119
|
self,
|
105
120
|
url: str,
|
@@ -139,11 +154,11 @@ class CurlCffi(HttpClient):
|
|
139
154
|
# each session has its own fingerprint set
|
140
155
|
kwargs.pop("impersonate", None)
|
141
156
|
self.logger.debug("acquire_session", client=self.name)
|
142
|
-
|
143
|
-
self.logger.debug("acquired_session",
|
157
|
+
session_wrapper: SessionWrapper = await self._session_pool.get()
|
158
|
+
self.logger.debug("acquired_session", session_id=session_wrapper.id, client=self.name)
|
144
159
|
try:
|
145
160
|
req_at = get_time()
|
146
|
-
response: CurlResponse = await asyncio.wait_for(
|
161
|
+
response: CurlResponse = await asyncio.wait_for(session_wrapper._self.request(
|
147
162
|
url=url,
|
148
163
|
method=method,
|
149
164
|
headers=headers,
|
@@ -153,15 +168,21 @@ class CurlCffi(HttpClient):
|
|
153
168
|
), timeout=10)
|
154
169
|
recv_at = get_time()
|
155
170
|
latency = recv_at - req_at
|
156
|
-
|
171
|
+
session_wrapper.requests +=1
|
172
|
+
self.logger.debug("response", status=response.status_code, url=url, session_id=session_wrapper.id, requests=session_wrapper.requests, client=self.name, requested_at=req_at, responded_at=recv_at, latency_ms=latency)
|
157
173
|
except TimeoutError:
|
158
|
-
self.logger.error("timeout", url=url,
|
174
|
+
self.logger.error("timeout", url=url, client=self.name)
|
175
|
+
await self._replace_session(session_wrapper)
|
159
176
|
raise
|
160
177
|
except Exception:
|
161
|
-
self.logger.
|
178
|
+
self.logger.error("request", url=url, client=self.name)
|
179
|
+
await self._replace_session(session_wrapper)
|
162
180
|
raise
|
163
|
-
|
164
|
-
|
181
|
+
else:
|
182
|
+
if response.status_code != 200:
|
183
|
+
await self._replace_session(session_wrapper)
|
184
|
+
else:
|
185
|
+
self._session_pool.put_nowait(session_wrapper)
|
165
186
|
|
166
187
|
return Response(
|
167
188
|
url=response.url,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|