zenx 0.10.2__py3-none-any.whl → 0.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenx/engine.py +9 -17
- zenx/settings.py +1 -0
- {zenx-0.10.2.dist-info → zenx-0.10.4.dist-info}/METADATA +1 -1
- {zenx-0.10.2.dist-info → zenx-0.10.4.dist-info}/RECORD +7 -7
- {zenx-0.10.2.dist-info → zenx-0.10.4.dist-info}/WHEEL +0 -0
- {zenx-0.10.2.dist-info → zenx-0.10.4.dist-info}/entry_points.txt +0 -0
- {zenx-0.10.2.dist-info → zenx-0.10.4.dist-info}/top_level.txt +0 -0
zenx/engine.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import signal
|
3
|
-
from typing import List
|
3
|
+
from typing import List
|
4
4
|
from dotenv import load_dotenv
|
5
5
|
import pebble
|
6
6
|
import uvloop
|
@@ -26,7 +26,7 @@ class Engine:
|
|
26
26
|
self.shutdown_event.set()
|
27
27
|
|
28
28
|
|
29
|
-
async def
|
29
|
+
async def _execute(self, spider_name: str) -> None:
|
30
30
|
loop = asyncio.get_running_loop()
|
31
31
|
loop.add_signal_handler(signal.SIGINT, self._shutdown_handler)
|
32
32
|
loop.add_signal_handler(signal.SIGTERM, self._shutdown_handler)
|
@@ -47,41 +47,33 @@ class Engine:
|
|
47
47
|
await pm.start_pipelines()
|
48
48
|
|
49
49
|
spider = spider_cls(client=client, pm=pm, logger=logger, settings=settings)
|
50
|
-
crawl_task: Optional[asyncio.Task] = None
|
51
50
|
try:
|
52
51
|
if self.forever:
|
53
52
|
while not self.shutdown_event.is_set():
|
54
|
-
|
53
|
+
tasks = [asyncio.create_task(spider.crawl()) for _ in range(settings.CONCURRENCY)]
|
55
54
|
try:
|
56
|
-
await
|
55
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
56
|
+
for result in results:
|
57
|
+
if isinstance(result, Exception):
|
58
|
+
raise result
|
57
59
|
except Exception:
|
58
60
|
logger.exception("crawl")
|
59
61
|
await asyncio.sleep(0.01)
|
60
62
|
else:
|
61
|
-
|
62
|
-
await crawl_task
|
63
|
+
await spider.crawl()
|
63
64
|
finally:
|
64
65
|
if self.shutdown_event.is_set():
|
65
66
|
logger.info("shutdown", spider=spider_name)
|
66
|
-
|
67
|
-
if crawl_task and not crawl_task.done():
|
68
|
-
crawl_task.cancel()
|
69
|
-
logger.debug("cancelled", task="crawl")
|
70
|
-
try:
|
71
|
-
await crawl_task
|
72
|
-
except asyncio.CancelledError:
|
73
|
-
pass
|
74
67
|
if spider.background_tasks:
|
75
68
|
logger.debug("waiting", background_tasks=len(spider.background_tasks), belong_to="spider")
|
76
69
|
await asyncio.gather(*spider.background_tasks)
|
77
|
-
|
78
70
|
await client.close()
|
79
71
|
await db.close()
|
80
72
|
await pm.close_pipelines()
|
81
73
|
|
82
74
|
|
83
75
|
def run_spider(self, spider: str) -> None:
|
84
|
-
uvloop.run(self.
|
76
|
+
uvloop.run(self._execute(spider))
|
85
77
|
|
86
78
|
|
87
79
|
def run_spiders(self, spiders: List[str]) -> None:
|
zenx/settings.py
CHANGED
@@ -10,6 +10,7 @@ class Settings(BaseSettings):
|
|
10
10
|
MAX_SCRAPE_DELAY: int = 10 # 10 seconds
|
11
11
|
DQ_MAX_SIZE: int = 100 # max size of the deque for memory database
|
12
12
|
REDIS_RECORD_EXPIRY_SECONDS: int = 604800 # 7 days (7*24*60*60)
|
13
|
+
CONCURRENCY: int = 1
|
13
14
|
|
14
15
|
DB_TYPE: Literal["memory", "redis"] = "memory"
|
15
16
|
DB_NAME: str | None = None
|
@@ -1,10 +1,10 @@
|
|
1
1
|
zenx/cli.py,sha256=pHKhOTdqI6NQQoYK91waRIMpxCXLYtXEryzVaTbmvqc,2810
|
2
2
|
zenx/debug_runner.py,sha256=B2Jd9A4_EHDa-ohLcwnFIxOV73FagTWXX2fl3qgwlpY,809
|
3
3
|
zenx/discovery.py,sha256=YANVGzy2IG1fYruUud-11Y-ynyO6iEp3EjlHnhIQJQI,1014
|
4
|
-
zenx/engine.py,sha256=
|
4
|
+
zenx/engine.py,sha256=G3ryzQ9HjI8D47e4-tlTNMwtFYbhluEMe4jrU_-FhUQ,2850
|
5
5
|
zenx/exceptions.py,sha256=BJXxzwwX2CU6inhppfblx8c8Z6Mhvsk7MAhQ1LAnhBg,37
|
6
6
|
zenx/logger.py,sha256=lr45XGbV769NQcwn8-lAcPfFbR4yBN8LNh7o-i4Aa9M,1652
|
7
|
-
zenx/settings.py,sha256=
|
7
|
+
zenx/settings.py,sha256=Ob4gvov-WVzA4GgpqmIgNcKaX7jWCAyYgpM9DXbbeHU,1069
|
8
8
|
zenx/utils.py,sha256=ouvXUd3-HaxLS3dDKbshEf-CXRf44OcovziEVIRsWNs,725
|
9
9
|
zenx/clients/__init__.py,sha256=CaAAuNa8DPyMdejR0KNSDDg_UzC3WxaTol5_QvwwwG8,132
|
10
10
|
zenx/clients/database.py,sha256=AF-L7iYrWRNzUZKn7taveiihpu--mXXC6eWOrMNlqzQ,4806
|
@@ -21,8 +21,8 @@ zenx/resources/proto/feed_pb2.py,sha256=ZyICOLnyuXekkvV4bAHZ1nE1-wwzcYYRRrmRJCMr
|
|
21
21
|
zenx/resources/proto/feed_pb2_grpc.py,sha256=Mim6FfBgIMj0PmTqHk036nVUMJH3A6I3ts6r1j3bQF8,7441
|
22
22
|
zenx/spiders/__init__.py,sha256=rs5LuqdM2MQlUYiTGJrzkYhzN8_SSLTrR7wGjSRrrSo,25
|
23
23
|
zenx/spiders/base.py,sha256=MeZ3wZOPOyOX4V2ufFXtYGCDtXHZO_mNfnXdKMkisuQ,1951
|
24
|
-
zenx-0.10.
|
25
|
-
zenx-0.10.
|
26
|
-
zenx-0.10.
|
27
|
-
zenx-0.10.
|
28
|
-
zenx-0.10.
|
24
|
+
zenx-0.10.4.dist-info/METADATA,sha256=vcpC3WR_UsidsEfDdmDw0Qvlc7Sh7LYySn5wbxJKIUI,1422
|
25
|
+
zenx-0.10.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
26
|
+
zenx-0.10.4.dist-info/entry_points.txt,sha256=8JXob2f1VtvzGFris-e9Usqywg7oca-cChDlH9moOZU,38
|
27
|
+
zenx-0.10.4.dist-info/top_level.txt,sha256=JeXwvK86d7sB-2x-avugFnZIZa33zaHWKI8RHWJR6KY,5
|
28
|
+
zenx-0.10.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|