zenx 0.5.1__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zenx-0.5.1 → zenx-0.5.3}/PKG-INFO +1 -1
- {zenx-0.5.1 → zenx-0.5.3}/pyproject.toml +1 -1
- {zenx-0.5.1 → zenx-0.5.3}/zenx/clients/http.py +4 -4
- {zenx-0.5.1 → zenx-0.5.3}/zenx/pipelines/google_rpc.py +3 -1
- {zenx-0.5.1 → zenx-0.5.3}/zenx/pipelines/preprocess.py +3 -2
- {zenx-0.5.1 → zenx-0.5.3}/zenx/pipelines/websocket.py +3 -3
- zenx-0.5.3/zenx/utils.py +19 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx.egg-info/PKG-INFO +1 -1
- zenx-0.5.1/zenx/utils.py +0 -6
- {zenx-0.5.1 → zenx-0.5.3}/setup.cfg +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/cli.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/clients/__init__.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/clients/database.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/debug_runner.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/discovery.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/engine.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/exceptions.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/logger.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/pipelines/__init__.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/pipelines/base.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/pipelines/manager.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/resources/proto/__init__.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/resources/proto/feed_pb2.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/resources/proto/feed_pb2_grpc.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/settings.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/spiders/__init__.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx/spiders/base.py +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx.egg-info/SOURCES.txt +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx.egg-info/dependency_links.txt +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx.egg-info/entry_points.txt +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx.egg-info/requires.txt +0 -0
- {zenx-0.5.1 → zenx-0.5.3}/zenx.egg-info/top_level.txt +0 -0
@@ -20,7 +20,7 @@ class Response:
|
|
20
20
|
status: int
|
21
21
|
text: str
|
22
22
|
headers: Dict
|
23
|
-
|
23
|
+
responded_at: int
|
24
24
|
requested_at: int
|
25
25
|
latency_ms: int
|
26
26
|
|
@@ -124,7 +124,7 @@ class CurlCffi(HttpClient):
|
|
124
124
|
)
|
125
125
|
recv_at = get_time()
|
126
126
|
latency = recv_at - req_at
|
127
|
-
self.logger.debug("response", status=response.status_code, url=url, impersonate=impersonate, client=self.name, requested_at=req_at,
|
127
|
+
self.logger.debug("response", status=response.status_code, url=url, impersonate=impersonate, client=self.name, requested_at=req_at, responded_at=recv_at, latency_ms=latency)
|
128
128
|
except Exception:
|
129
129
|
self.logger.exception("request", url=url, client=self.name)
|
130
130
|
raise
|
@@ -144,7 +144,7 @@ class CurlCffi(HttpClient):
|
|
144
144
|
)
|
145
145
|
recv_at = get_time()
|
146
146
|
latency = recv_at - req_at
|
147
|
-
self.logger.debug("response", status=response.status_code, url=url, impersonate=impersonate, client=self.name, requested_at=req_at,
|
147
|
+
self.logger.debug("response", status=response.status_code, url=url, impersonate=impersonate, client=self.name, requested_at=req_at, responded_at=recv_at, latency_ms=latency)
|
148
148
|
except Exception:
|
149
149
|
self.logger.exception("request", url=url, client=self.name)
|
150
150
|
raise
|
@@ -157,7 +157,7 @@ class CurlCffi(HttpClient):
|
|
157
157
|
text=response.text,
|
158
158
|
headers=dict(response.headers),
|
159
159
|
requested_at=req_at,
|
160
|
-
|
160
|
+
responded_at=recv_at,
|
161
161
|
latency_ms=latency,
|
162
162
|
)
|
163
163
|
|
@@ -7,6 +7,7 @@ import structlog
|
|
7
7
|
from zenx.pipelines.base import Pipeline
|
8
8
|
from zenx.clients.database import DBClient
|
9
9
|
from zenx.settings import Settings
|
10
|
+
from zenx.utils import log_processing_time
|
10
11
|
|
11
12
|
|
12
13
|
try:
|
@@ -63,7 +64,8 @@ try:
|
|
63
64
|
self.logger.debug("connected", pipeline=self.name)
|
64
65
|
self._connected.set()
|
65
66
|
|
66
|
-
|
67
|
+
|
68
|
+
@log_processing_time
|
67
69
|
async def process_item(self, item: Dict, spider: str) -> Dict:
|
68
70
|
await self._process(item)
|
69
71
|
return item
|
@@ -21,8 +21,9 @@ class PreprocessPipeline(Pipeline):
|
|
21
21
|
if await self.db.exists(_id, spider):
|
22
22
|
raise DropItem
|
23
23
|
await self.db.insert(_id, spider)
|
24
|
-
|
25
|
-
|
24
|
+
|
25
|
+
scraped_time = item['scraped_at'] - item['responded_at']
|
26
|
+
self.logger.info("scraped", id=item.get("_id"), time_ms=scraped_time, pipeline=self.name)
|
26
27
|
return item
|
27
28
|
|
28
29
|
|
@@ -7,6 +7,7 @@ from websockets import ConnectionClosed
|
|
7
7
|
from zenx.clients.database import DBClient
|
8
8
|
from zenx.settings import Settings
|
9
9
|
from zenx.pipelines.base import Pipeline
|
10
|
+
from zenx.utils import get_time, log_processing_time
|
10
11
|
|
11
12
|
|
12
13
|
try:
|
@@ -58,7 +59,8 @@ try:
|
|
58
59
|
self.logger.debug("connected", pipeline=self.name, msg=msg)
|
59
60
|
self._connected.set()
|
60
61
|
|
61
|
-
|
62
|
+
|
63
|
+
@log_processing_time
|
62
64
|
async def process_item(self, item: Dict, spider: str) -> Dict:
|
63
65
|
await self._process(item, spider)
|
64
66
|
return item
|
@@ -80,8 +82,6 @@ try:
|
|
80
82
|
await self._ws_client.send(json.dumps(_item))
|
81
83
|
except ConnectionClosed as e:
|
82
84
|
self.logger.error("processing", exception=str(e), id=item.get("_id"), pipeline=self.name)
|
83
|
-
else:
|
84
|
-
self.logger.debug("processed", id=item['_id'], pipeline=self.name)
|
85
85
|
|
86
86
|
|
87
87
|
async def close(self) -> None:
|
zenx-0.5.3/zenx/utils.py
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
import time
|
2
|
+
from typing import Dict
|
3
|
+
import functools
|
4
|
+
|
5
|
+
|
6
|
+
def get_time() -> int:
|
7
|
+
""" current unix time in milliseconds """
|
8
|
+
return int(time.time() * 1000)
|
9
|
+
|
10
|
+
|
11
|
+
def log_processing_time(func):
|
12
|
+
@functools.wraps(func)
|
13
|
+
async def wrapper(self, item: Dict, *args, **kwargs) -> Dict:
|
14
|
+
start_time = get_time()
|
15
|
+
result = await func(item, *args, **kwargs)
|
16
|
+
processed_time = get_time() - start_time
|
17
|
+
self.logger.debug("processed", id=item['_id'], time_ms=processed_time, pipeline=self.name)
|
18
|
+
return result
|
19
|
+
return wrapper
|
zenx-0.5.1/zenx/utils.py
DELETED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|